summaryrefslogtreecommitdiffstats
path: root/pym/portage/util/_dyn_libs
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2010-08-29 20:28:45 -0700
committerZac Medico <zmedico@gentoo.org>2010-08-29 22:25:48 -0700
commite189d132728fcc6efa7df5d6c0c6598209ca446d (patch)
tree25c282715bd4ed9a01d1494fbd4e51f34cb66dce /pym/portage/util/_dyn_libs
parenteebc987be16523e0912d4bbbe96667ccb47b1ba4 (diff)
downloadportage-e189d132728fcc6efa7df5d6c0c6598209ca446d.tar.gz
portage-e189d132728fcc6efa7df5d6c0c6598209ca446d.tar.bz2
portage-e189d132728fcc6efa7df5d6c0c6598209ca446d.zip
Move LinkageMap to a new portage.util._dyn_libs.LinkageMapELF module.
Diffstat (limited to 'pym/portage/util/_dyn_libs')
-rw-r--r--pym/portage/util/_dyn_libs/LinkageMapELF.py633
1 files changed, 633 insertions, 0 deletions
diff --git a/pym/portage/util/_dyn_libs/LinkageMapELF.py b/pym/portage/util/_dyn_libs/LinkageMapELF.py
new file mode 100644
index 000000000..a159f7a59
--- /dev/null
+++ b/pym/portage/util/_dyn_libs/LinkageMapELF.py
@@ -0,0 +1,633 @@
+# Copyright 1998-2010 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import logging
+import subprocess
+
+import portage
+from portage import _encodings
+from portage import _os_merge
+from portage import _unicode_decode
+from portage import _unicode_encode
+from portage.cache.mappings import slot_dict_class
+from portage.exception import CommandNotFound
+from portage.localization import _
+from portage.util import getlibpaths
+from portage.util import grabfile
+from portage.util import normalize_path
+from portage.util import writemsg_level
+
+class LinkageMapELF(object):
+
+ """Models dynamic linker dependencies."""
+
+ _needed_aux_key = "NEEDED.ELF.2"
+ _soname_map_class = slot_dict_class(
+ ("consumers", "providers"), prefix="")
+
+ def __init__(self, vardbapi):
+ self._dbapi = vardbapi
+ self._eroot = self._dbapi._eroot
+ self._libs = {}
+ self._obj_properties = {}
+ self._obj_key_cache = {}
+ self._defpath = set()
+ self._path_key_cache = {}
+
+ def _clear_cache(self):
+ self._libs.clear()
+ self._obj_properties.clear()
+ self._obj_key_cache.clear()
+ self._defpath.clear()
+ self._path_key_cache.clear()
+
+ def _path_key(self, path):
+ key = self._path_key_cache.get(path)
+ if key is None:
+ key = self._ObjectKey(path, self._eroot)
+ self._path_key_cache[path] = key
+ return key
+
+ def _obj_key(self, path):
+ key = self._obj_key_cache.get(path)
+ if key is None:
+ key = self._ObjectKey(path, self._eroot)
+ self._obj_key_cache[path] = key
+ return key
+
+ class _ObjectKey(object):
+
+ """Helper class used as _obj_properties keys for objects."""
+
+ __slots__ = ("__weakref__", "_key")
+
+ def __init__(self, obj, root):
+ """
+ This takes a path to an object.
+
+ @param object: path to a file
+ @type object: string (example: '/usr/bin/bar')
+
+ """
+ self._key = self._generate_object_key(obj, root)
+
+ def __hash__(self):
+ return hash(self._key)
+
+ def __eq__(self, other):
+ return self._key == other._key
+
+ def _generate_object_key(self, obj, root):
+ """
+ Generate object key for a given object.
+
+ @param object: path to a file
+ @type object: string (example: '/usr/bin/bar')
+ @rtype: 2-tuple of types (long, int) if object exists. string if
+ object does not exist.
+ @return:
+ 1. 2-tuple of object's inode and device from a stat call, if object
+ exists.
+ 2. realpath of object if object does not exist.
+
+ """
+
+ os = _os_merge
+
+ try:
+ _unicode_encode(obj,
+ encoding=_encodings['merge'], errors='strict')
+ except UnicodeEncodeError:
+ # The package appears to have been merged with a
+ # different value of sys.getfilesystemencoding(),
+ # so fall back to utf_8 if appropriate.
+ try:
+ _unicode_encode(obj,
+ encoding=_encodings['fs'], errors='strict')
+ except UnicodeEncodeError:
+ pass
+ else:
+ os = portage.os
+
+ abs_path = os.path.join(root, obj.lstrip(os.sep))
+ try:
+ object_stat = os.stat(abs_path)
+ except OSError:
+ # Use the realpath as the key if the file does not exists on the
+ # filesystem.
+ return os.path.realpath(abs_path)
+ # Return a tuple of the device and inode.
+ return (object_stat.st_dev, object_stat.st_ino)
+
+ def file_exists(self):
+ """
+ Determine if the file for this key exists on the filesystem.
+
+ @rtype: Boolean
+ @return:
+ 1. True if the file exists.
+ 2. False if the file does not exist or is a broken symlink.
+
+ """
+ return isinstance(self._key, tuple)
+
+ class _LibGraphNode(_ObjectKey):
+ __slots__ = ("alt_paths",)
+
+ def __init__(self, obj, root):
+ LinkageMapELF._ObjectKey.__init__(self, obj, root)
+ self.alt_paths = set()
+
+ def __str__(self):
+ return str(sorted(self.alt_paths))
+
+ def rebuild(self, exclude_pkgs=None, include_file=None):
+ """
+ Raises CommandNotFound if there are preserved libs
+ and the scanelf binary is not available.
+ """
+
+ os = _os_merge
+ root = self._eroot
+ root_len = len(root) - 1
+ self._clear_cache()
+ self._defpath.update(getlibpaths(self._eroot))
+ libs = self._libs
+ obj_properties = self._obj_properties
+
+ lines = []
+
+ # Data from include_file is processed first so that it
+ # overrides any data from previously installed files.
+ if include_file is not None:
+ lines += grabfile(include_file)
+
+ aux_keys = [self._needed_aux_key]
+ for cpv in self._dbapi.cpv_all():
+ if exclude_pkgs is not None and cpv in exclude_pkgs:
+ continue
+ lines += self._dbapi.aux_get(cpv, aux_keys)[0].split('\n')
+ # Cache NEEDED.* files avoid doing excessive IO for every rebuild.
+ self._dbapi.flush_cache()
+
+ # have to call scanelf for preserved libs here as they aren't
+ # registered in NEEDED.ELF.2 files
+ plibs = set()
+ if self._dbapi.plib_registry and self._dbapi.plib_registry.getPreservedLibs():
+ args = ["/usr/bin/scanelf", "-qF", "%a;%F;%S;%r;%n"]
+ for items in self._dbapi.plib_registry.getPreservedLibs().values():
+ plibs.update(items)
+ args.extend(os.path.join(root, x.lstrip("." + os.sep)) \
+ for x in items)
+ try:
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE)
+ except EnvironmentError as e:
+ if e.errno != errno.ENOENT:
+ raise
+ raise CommandNotFound(args[0])
+ else:
+ for l in proc.stdout:
+ try:
+ l = _unicode_decode(l,
+ encoding=_encodings['content'], errors='strict')
+ except UnicodeDecodeError:
+ l = _unicode_decode(l,
+ encoding=_encodings['content'], errors='replace')
+ writemsg_level(_("\nError decoding characters " \
+ "returned from scanelf: %s\n\n") % (l,),
+ level=logging.ERROR, noiselevel=-1)
+ l = l[3:].rstrip("\n")
+ if not l:
+ continue
+ fields = l.split(";")
+ if len(fields) < 5:
+ writemsg_level(_("\nWrong number of fields " \
+ "returned from scanelf: %s\n\n") % (l,),
+ level=logging.ERROR, noiselevel=-1)
+ continue
+ fields[1] = fields[1][root_len:]
+ plibs.discard(fields[1])
+ lines.append(";".join(fields))
+ proc.wait()
+
+ if plibs:
+ # Preserved libraries that did not appear in the scanelf output.
+ # This is known to happen with statically linked libraries.
+ # Generate dummy lines for these, so we can assume that every
+ # preserved library has an entry in self._obj_properties. This
+ # is important in order to prevent findConsumers from raising
+ # an unwanted KeyError.
+ for x in plibs:
+ lines.append(";".join(['', x, '', '', '']))
+
+ for l in lines:
+ l = l.rstrip("\n")
+ if not l:
+ continue
+ fields = l.split(";")
+ if len(fields) < 5:
+ writemsg_level(_("\nWrong number of fields " \
+ "in %s: %s\n\n") % (self._needed_aux_key, l),
+ level=logging.ERROR, noiselevel=-1)
+ continue
+ arch = fields[0]
+ obj = fields[1]
+ soname = fields[2]
+ path = set([normalize_path(x) \
+ for x in filter(None, fields[3].replace(
+ "${ORIGIN}", os.path.dirname(obj)).replace(
+ "$ORIGIN", os.path.dirname(obj)).split(":"))])
+ needed = [x for x in fields[4].split(",") if x]
+
+ obj_key = self._obj_key(obj)
+ indexed = True
+ myprops = obj_properties.get(obj_key)
+ if myprops is None:
+ indexed = False
+ myprops = (arch, needed, path, soname, set())
+ obj_properties[obj_key] = myprops
+ # All object paths are added into the obj_properties tuple.
+ myprops[4].add(obj)
+
+ # Don't index the same file more that once since only one
+ # set of data can be correct and therefore mixing data
+ # may corrupt the index (include_file overrides previously
+ # installed).
+ if indexed:
+ continue
+
+ arch_map = libs.get(arch)
+ if arch_map is None:
+ arch_map = {}
+ libs[arch] = arch_map
+ if soname:
+ soname_map = arch_map.get(soname)
+ if soname_map is None:
+ soname_map = self._soname_map_class(
+ providers=set(), consumers=set())
+ arch_map[soname] = soname_map
+ soname_map.providers.add(obj_key)
+ for needed_soname in needed:
+ soname_map = arch_map.get(needed_soname)
+ if soname_map is None:
+ soname_map = self._soname_map_class(
+ providers=set(), consumers=set())
+ arch_map[needed_soname] = soname_map
+ soname_map.consumers.add(obj_key)
+
+ def listBrokenBinaries(self, debug=False):
+ """
+ Find binaries and their needed sonames, which have no providers.
+
+ @param debug: Boolean to enable debug output
+ @type debug: Boolean
+ @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
+ @return: The return value is an object -> set-of-sonames mapping, where
+ object is a broken binary and the set consists of sonames needed by
+ object that have no corresponding libraries to fulfill the dependency.
+
+ """
+
+ os = _os_merge
+
+ class _LibraryCache(object):
+
+ """
+ Caches properties associated with paths.
+
+ The purpose of this class is to prevent multiple instances of
+ _ObjectKey for the same paths.
+
+ """
+
+ def __init__(cache_self):
+ cache_self.cache = {}
+
+ def get(cache_self, obj):
+ """
+ Caches and returns properties associated with an object.
+
+ @param obj: absolute path (can be symlink)
+ @type obj: string (example: '/usr/lib/libfoo.so')
+ @rtype: 4-tuple with types
+ (string or None, string or None, 2-tuple, Boolean)
+ @return: 4-tuple with the following components:
+ 1. arch as a string or None if it does not exist,
+ 2. soname as a string or None if it does not exist,
+ 3. obj_key as 2-tuple,
+ 4. Boolean representing whether the object exists.
+ (example: ('libfoo.so.1', (123L, 456L), True))
+
+ """
+ if obj in cache_self.cache:
+ return cache_self.cache[obj]
+ else:
+ obj_key = self._obj_key(obj)
+ # Check that the library exists on the filesystem.
+ if obj_key.file_exists():
+ # Get the arch and soname from LinkageMap._obj_properties if
+ # it exists. Otherwise, None.
+ arch, _needed, _path, soname, _objs = \
+ self._obj_properties.get(obj_key, (None,)*5)
+ return cache_self.cache.setdefault(obj, \
+ (arch, soname, obj_key, True))
+ else:
+ return cache_self.cache.setdefault(obj, \
+ (None, None, obj_key, False))
+
+ rValue = {}
+ cache = _LibraryCache()
+ providers = self.listProviders()
+
+ # Iterate over all obj_keys and their providers.
+ for obj_key, sonames in providers.items():
+ arch, _needed, path, _soname, objs = self._obj_properties[obj_key]
+ path = path.union(self._defpath)
+ # Iterate over each needed soname and the set of library paths that
+ # fulfill the soname to determine if the dependency is broken.
+ for soname, libraries in sonames.items():
+ # validLibraries is used to store libraries, which satisfy soname,
+ # so if no valid libraries are found, the soname is not satisfied
+ # for obj_key. If unsatisfied, objects associated with obj_key
+ # must be emerged.
+ validLibraries = set()
+ # It could be the case that the library to satisfy the soname is
+ # not in the obj's runpath, but a symlink to the library is (eg
+ # libnvidia-tls.so.1 in nvidia-drivers). Also, since LinkageMap
+ # does not catalog symlinks, broken or missing symlinks may go
+ # unnoticed. As a result of these cases, check that a file with
+ # the same name as the soname exists in obj's runpath.
+ # XXX If we catalog symlinks in LinkageMap, this could be improved.
+ for directory in path:
+ cachedArch, cachedSoname, cachedKey, cachedExists = \
+ cache.get(os.path.join(directory, soname))
+ # Check that this library provides the needed soname. Doing
+ # this, however, will cause consumers of libraries missing
+ # sonames to be unnecessarily emerged. (eg libmix.so)
+ if cachedSoname == soname and cachedArch == arch:
+ validLibraries.add(cachedKey)
+ if debug and cachedKey not in \
+ set(map(self._obj_key_cache.get, libraries)):
+ # XXX This is most often due to soname symlinks not in
+ # a library's directory. We could catalog symlinks in
+ # LinkageMap to avoid checking for this edge case here.
+ writemsg_level(
+ _("Found provider outside of findProviders:") + \
+ (" %s -> %s %s\n" % (os.path.join(directory, soname),
+ self._obj_properties[cachedKey][4], libraries)),
+ level=logging.DEBUG,
+ noiselevel=-1)
+ # A valid library has been found, so there is no need to
+ # continue.
+ break
+ if debug and cachedArch == arch and \
+ cachedKey in self._obj_properties:
+ writemsg_level((_("Broken symlink or missing/bad soname: " + \
+ "%(dir_soname)s -> %(cachedKey)s " + \
+ "with soname %(cachedSoname)s but expecting %(soname)s") % \
+ {"dir_soname":os.path.join(directory, soname),
+ "cachedKey": self._obj_properties[cachedKey],
+ "cachedSoname": cachedSoname, "soname":soname}) + "\n",
+ level=logging.DEBUG,
+ noiselevel=-1)
+ # This conditional checks if there are no libraries to satisfy the
+ # soname (empty set).
+ if not validLibraries:
+ for obj in objs:
+ rValue.setdefault(obj, set()).add(soname)
+ # If no valid libraries have been found by this point, then
+ # there are no files named with the soname within obj's runpath,
+ # but if there are libraries (from the providers mapping), it is
+ # likely that soname symlinks or the actual libraries are
+ # missing or broken. Thus those libraries are added to rValue
+ # in order to emerge corrupt library packages.
+ for lib in libraries:
+ rValue.setdefault(lib, set()).add(soname)
+ if debug:
+ if not os.path.isfile(lib):
+ writemsg_level(_("Missing library:") + " %s\n" % (lib,),
+ level=logging.DEBUG,
+ noiselevel=-1)
+ else:
+ writemsg_level(_("Possibly missing symlink:") + \
+ "%s\n" % (os.path.join(os.path.dirname(lib), soname)),
+ level=logging.DEBUG,
+ noiselevel=-1)
+ return rValue
+
+ def listProviders(self):
+ """
+ Find the providers for all object keys in LinkageMap.
+
+ @rtype: dict (example:
+ {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
+ @return: The return value is an object key -> providers mapping, where
+ providers is a mapping of soname -> set-of-library-paths returned
+ from the findProviders method.
+
+ """
+ rValue = {}
+ if not self._libs:
+ self.rebuild()
+ # Iterate over all object keys within LinkageMap.
+ for obj_key in self._obj_properties:
+ rValue.setdefault(obj_key, self.findProviders(obj_key))
+ return rValue
+
+ def isMasterLink(self, obj):
+ """
+ Determine whether an object is a master link.
+
+ @param obj: absolute path to an object
+ @type obj: string (example: '/usr/bin/foo')
+ @rtype: Boolean
+ @return:
+ 1. True if obj is a master link
+ 2. False if obj is not a master link
+
+ """
+ os = _os_merge
+ basename = os.path.basename(obj)
+ obj_key = self._obj_key(obj)
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+ soname = self._obj_properties[obj_key][3]
+ return (len(basename) < len(soname))
+
+ def listLibraryObjects(self):
+ """
+ Return a list of library objects.
+
+ Known limitation: library objects lacking an soname are not included.
+
+ @rtype: list of strings
+ @return: list of paths to all providers
+
+ """
+ rValue = []
+ if not self._libs:
+ self.rebuild()
+ for arch_map in self._libs.values():
+ for soname_map in arch_map.values():
+ for obj_key in soname_map.providers:
+ rValue.extend(self._obj_properties[obj_key][4])
+ return rValue
+
+ def getSoname(self, obj):
+ """
+ Return the soname associated with an object.
+
+ @param obj: absolute path to an object
+ @type obj: string (example: '/usr/bin/bar')
+ @rtype: string
+ @return: soname as a string
+
+ """
+ if not self._libs:
+ self.rebuild()
+ if isinstance(obj, self._ObjectKey):
+ obj_key = obj
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s not in object list" % obj_key)
+ return self._obj_properties[obj_key][3]
+ if obj not in self._obj_key_cache:
+ raise KeyError("%s not in object list" % obj)
+ return self._obj_properties[self._obj_key_cache[obj]][3]
+
+ def findProviders(self, obj):
+ """
+ Find providers for an object or object key.
+
+ This method may be called with a key from _obj_properties.
+
+ In some cases, not all valid libraries are returned. This may occur when
+ an soname symlink referencing a library is in an object's runpath while
+ the actual library is not. We should consider cataloging symlinks within
+ LinkageMap as this would avoid those cases and would be a better model of
+ library dependencies (since the dynamic linker actually searches for
+ files named with the soname in the runpaths).
+
+ @param obj: absolute path to an object or a key from _obj_properties
+ @type obj: string (example: '/usr/bin/bar') or _ObjectKey
+ @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
+ @return: The return value is a soname -> set-of-library-paths, where
+ set-of-library-paths satisfy soname.
+
+ """
+
+ os = _os_merge
+
+ rValue = {}
+
+ if not self._libs:
+ self.rebuild()
+
+ # Determine the obj_key from the arguments.
+ if isinstance(obj, self._ObjectKey):
+ obj_key = obj
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s not in object list" % obj_key)
+ else:
+ obj_key = self._obj_key(obj)
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+
+ arch, needed, path, _soname, _objs = self._obj_properties[obj_key]
+ path_keys = set(self._path_key(x) for x in path.union(self._defpath))
+ for soname in needed:
+ rValue[soname] = set()
+ if arch not in self._libs or soname not in self._libs[arch]:
+ continue
+ # For each potential provider of the soname, add it to rValue if it
+ # resides in the obj's runpath.
+ for provider_key in self._libs[arch][soname].providers:
+ providers = self._obj_properties[provider_key][4]
+ for provider in providers:
+ if self._path_key(os.path.dirname(provider)) in path_keys:
+ rValue[soname].add(provider)
+ return rValue
+
+ def findConsumers(self, obj):
+ """
+ Find consumers of an object or object key.
+
+ This method may be called with a key from _obj_properties. If this
+ method is going to be called with an object key, to avoid not catching
+ shadowed libraries, do not pass new _ObjectKey instances to this method.
+ Instead pass the obj as a string.
+
+ In some cases, not all consumers are returned. This may occur when
+ an soname symlink referencing a library is in an object's runpath while
+ the actual library is not. For example, this problem is noticeable for
+ binutils since it's libraries are added to the path via symlinks that
+ are gemerated in the /usr/$CHOST/lib/ directory by binutils-config.
+ Failure to recognize consumers of these symlinks makes preserve-libs
+ fail to preserve binutils libs that are needed by these unrecognized
+ consumers.
+
+ Note that library consumption via dlopen (common for kde plugins) is
+ currently undetected. However, it is possible to use the
+ corresponding libtool archive (*.la) files to detect such consumers
+ (revdep-rebuild is able to detect them).
+
+ @param obj: absolute path to an object or a key from _obj_properties
+ @type obj: string (example: '/usr/bin/bar') or _ObjectKey
+ @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
+ @return: The return value is a soname -> set-of-library-paths, where
+ set-of-library-paths satisfy soname.
+
+ """
+
+ os = _os_merge
+
+ rValue = set()
+
+ if not self._libs:
+ self.rebuild()
+
+ # Determine the obj_key and the set of objects matching the arguments.
+ if isinstance(obj, self._ObjectKey):
+ obj_key = obj
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s not in object list" % obj_key)
+ objs = self._obj_properties[obj_key][4]
+ else:
+ objs = set([obj])
+ obj_key = self._obj_key(obj)
+ if obj_key not in self._obj_properties:
+ raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+
+ # If there is another version of this lib with the
+ # same soname and the master link points to that
+ # other version, this lib will be shadowed and won't
+ # have any consumers.
+ if not isinstance(obj, self._ObjectKey):
+ soname = self._obj_properties[obj_key][3]
+ master_link = os.path.join(self._eroot,
+ os.path.dirname(obj).lstrip(os.path.sep), soname)
+ try:
+ master_st = os.stat(master_link)
+ obj_st = os.stat(obj)
+ except OSError:
+ pass
+ else:
+ if (obj_st.st_dev, obj_st.st_ino) != \
+ (master_st.st_dev, master_st.st_ino):
+ return set()
+
+ # Determine the directory(ies) from the set of objects.
+ objs_dir_keys = set(self._path_key(os.path.dirname(x)) for x in objs)
+ defpath_keys = set(self._path_key(x) for x in self._defpath)
+
+ arch, _needed, _path, soname, _objs = self._obj_properties[obj_key]
+ if arch in self._libs and soname in self._libs[arch]:
+ # For each potential consumer, add it to rValue if an object from the
+ # arguments resides in the consumer's runpath.
+ for consumer_key in self._libs[arch][soname].consumers:
+ _arch, _needed, path, _soname, consumer_objs = \
+ self._obj_properties[consumer_key]
+ path_keys = defpath_keys.union(self._path_key(x) for x in path)
+ if objs_dir_keys.intersection(path_keys):
+ rValue.update(consumer_objs)
+ return rValue