diff options
author | Zac Medico <zmedico@gentoo.org> | 2010-08-29 20:28:45 -0700 |
---|---|---|
committer | Zac Medico <zmedico@gentoo.org> | 2010-08-29 22:25:48 -0700 |
commit | e189d132728fcc6efa7df5d6c0c6598209ca446d (patch) | |
tree | 25c282715bd4ed9a01d1494fbd4e51f34cb66dce | |
parent | eebc987be16523e0912d4bbbe96667ccb47b1ba4 (diff) | |
download | portage-e189d132728fcc6efa7df5d6c0c6598209ca446d.tar.gz portage-e189d132728fcc6efa7df5d6c0c6598209ca446d.tar.bz2 portage-e189d132728fcc6efa7df5d6c0c6598209ca446d.zip |
Move LinkageMap to a new portage.util._dyn_libs.LinkageMapELF module.
-rw-r--r-- | pym/portage/dbapi/vartree.py | 617 | ||||
-rw-r--r-- | pym/portage/util/_dyn_libs/LinkageMapELF.py | 633 |
2 files changed, 636 insertions, 614 deletions
diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index fe8e0eac4..48b0bc09f 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -1,7 +1,7 @@ # Copyright 1998-2010 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -__all__ = ["LinkageMap", +__all__ = [ "vardbapi", "vartree", "dblink"] + \ ["write_contents", "tar_contents"] @@ -21,11 +21,12 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.update:fixdbentries', 'portage.util:apply_secpass_permissions,ConfigProtect,ensure_dirs,' + \ 'writemsg,writemsg_level,write_atomic,atomic_ofstream,writedict,' + \ - 'grabfile,grabdict,normalize_path,new_protect_filename,getlibpaths', + 'grabdict,normalize_path,new_protect_filename', 'portage.util.digraph:digraph', 'portage.util.env_update:env_update', 'portage.util.listdir:dircache,listdir', 'portage.util._dyn_libs.PreservedLibsRegistry:PreservedLibsRegistry', + 'portage.util._dyn_libs.LinkageMapELF:LinkageMapELF@LinkageMap', 'portage.versions:best,catpkgsplit,catsplit,cpv_getkey,pkgcmp,' + \ '_pkgsplit@pkgsplit', ) @@ -49,7 +50,6 @@ from portage import _selinux_merge from portage import _unicode_decode from portage import _unicode_encode -from portage.cache.mappings import slot_dict_class from _emerge.TaskScheduler import TaskScheduler from _emerge.MiscFunctionsProcess import MiscFunctionsProcess @@ -73,617 +73,6 @@ if sys.hexversion >= 0x3000000: basestring = str long = int -class LinkageMap(object): - - """Models dynamic linker dependencies.""" - - _needed_aux_key = "NEEDED.ELF.2" - _soname_map_class = slot_dict_class( - ("consumers", "providers"), prefix="") - - def __init__(self, vardbapi): - self._dbapi = vardbapi - self._root = self._dbapi._eroot - self._libs = {} - self._obj_properties = {} - self._obj_key_cache = {} - self._defpath = set() - self._path_key_cache = {} - - def _clear_cache(self): - self._libs.clear() - self._obj_properties.clear() - self._obj_key_cache.clear() - self._defpath.clear() - self._path_key_cache.clear() - - def _path_key(self, path): - key = self._path_key_cache.get(path) - if key is None: - key = self._ObjectKey(path, self._root) - self._path_key_cache[path] = key - return key - - def _obj_key(self, path): - key = self._obj_key_cache.get(path) - if key is None: - key = self._ObjectKey(path, self._root) - self._obj_key_cache[path] = key - return key - - class _ObjectKey(object): - - """Helper class used as _obj_properties keys for objects.""" - - __slots__ = ("__weakref__", "_key") - - def __init__(self, obj, root): - """ - This takes a path to an object. - - @param object: path to a file - @type object: string (example: '/usr/bin/bar') - - """ - self._key = self._generate_object_key(obj, root) - - def __hash__(self): - return hash(self._key) - - def __eq__(self, other): - return self._key == other._key - - def _generate_object_key(self, obj, root): - """ - Generate object key for a given object. - - @param object: path to a file - @type object: string (example: '/usr/bin/bar') - @rtype: 2-tuple of types (long, int) if object exists. string if - object does not exist. - @return: - 1. 2-tuple of object's inode and device from a stat call, if object - exists. - 2. realpath of object if object does not exist. - - """ - - os = _os_merge - - try: - _unicode_encode(obj, - encoding=_encodings['merge'], errors='strict') - except UnicodeEncodeError: - # The package appears to have been merged with a - # different value of sys.getfilesystemencoding(), - # so fall back to utf_8 if appropriate. - try: - _unicode_encode(obj, - encoding=_encodings['fs'], errors='strict') - except UnicodeEncodeError: - pass - else: - os = portage.os - - abs_path = os.path.join(root, obj.lstrip(os.sep)) - try: - object_stat = os.stat(abs_path) - except OSError: - # Use the realpath as the key if the file does not exists on the - # filesystem. - return os.path.realpath(abs_path) - # Return a tuple of the device and inode. - return (object_stat.st_dev, object_stat.st_ino) - - def file_exists(self): - """ - Determine if the file for this key exists on the filesystem. - - @rtype: Boolean - @return: - 1. True if the file exists. - 2. False if the file does not exist or is a broken symlink. - - """ - return isinstance(self._key, tuple) - - class _LibGraphNode(_ObjectKey): - __slots__ = ("alt_paths",) - - def __init__(self, obj, root): - LinkageMap._ObjectKey.__init__(self, obj, root) - self.alt_paths = set() - - def __str__(self): - return str(sorted(self.alt_paths)) - - def rebuild(self, exclude_pkgs=None, include_file=None): - """ - Raises CommandNotFound if there are preserved libs - and the scanelf binary is not available. - """ - - os = _os_merge - root = self._root - root_len = len(root) - 1 - self._clear_cache() - self._defpath.update(getlibpaths(self._root)) - libs = self._libs - obj_key_cache = self._obj_key_cache - obj_properties = self._obj_properties - - lines = [] - - # Data from include_file is processed first so that it - # overrides any data from previously installed files. - if include_file is not None: - lines += grabfile(include_file) - - aux_keys = [self._needed_aux_key] - for cpv in self._dbapi.cpv_all(): - if exclude_pkgs is not None and cpv in exclude_pkgs: - continue - lines += self._dbapi.aux_get(cpv, aux_keys)[0].split('\n') - # Cache NEEDED.* files avoid doing excessive IO for every rebuild. - self._dbapi.flush_cache() - - # have to call scanelf for preserved libs here as they aren't - # registered in NEEDED.ELF.2 files - plibs = set() - if self._dbapi.plib_registry and self._dbapi.plib_registry.getPreservedLibs(): - args = ["/usr/bin/scanelf", "-qF", "%a;%F;%S;%r;%n"] - for items in self._dbapi.plib_registry.getPreservedLibs().values(): - plibs.update(items) - args.extend(os.path.join(root, x.lstrip("." + os.sep)) \ - for x in items) - try: - proc = subprocess.Popen(args, stdout=subprocess.PIPE) - except EnvironmentError as e: - if e.errno != errno.ENOENT: - raise - raise CommandNotFound(args[0]) - else: - for l in proc.stdout: - try: - l = _unicode_decode(l, - encoding=_encodings['content'], errors='strict') - except UnicodeDecodeError: - l = _unicode_decode(l, - encoding=_encodings['content'], errors='replace') - writemsg_level(_("\nError decoding characters " \ - "returned from scanelf: %s\n\n") % (l,), - level=logging.ERROR, noiselevel=-1) - l = l[3:].rstrip("\n") - if not l: - continue - fields = l.split(";") - if len(fields) < 5: - writemsg_level(_("\nWrong number of fields " \ - "returned from scanelf: %s\n\n") % (l,), - level=logging.ERROR, noiselevel=-1) - continue - fields[1] = fields[1][root_len:] - plibs.discard(fields[1]) - lines.append(";".join(fields)) - proc.wait() - - if plibs: - # Preserved libraries that did not appear in the scanelf output. - # This is known to happen with statically linked libraries. - # Generate dummy lines for these, so we can assume that every - # preserved library has an entry in self._obj_properties. This - # is important in order to prevent findConsumers from raising - # an unwanted KeyError. - for x in plibs: - lines.append(";".join(['', x, '', '', ''])) - - for l in lines: - l = l.rstrip("\n") - if not l: - continue - fields = l.split(";") - if len(fields) < 5: - writemsg_level(_("\nWrong number of fields " \ - "in %s: %s\n\n") % (self._needed_aux_key, l), - level=logging.ERROR, noiselevel=-1) - continue - arch = fields[0] - obj = fields[1] - soname = fields[2] - path = set([normalize_path(x) \ - for x in filter(None, fields[3].replace( - "${ORIGIN}", os.path.dirname(obj)).replace( - "$ORIGIN", os.path.dirname(obj)).split(":"))]) - needed = [x for x in fields[4].split(",") if x] - - obj_key = self._obj_key(obj) - indexed = True - myprops = obj_properties.get(obj_key) - if myprops is None: - indexed = False - myprops = (arch, needed, path, soname, set()) - obj_properties[obj_key] = myprops - # All object paths are added into the obj_properties tuple. - myprops[4].add(obj) - - # Don't index the same file more that once since only one - # set of data can be correct and therefore mixing data - # may corrupt the index (include_file overrides previously - # installed). - if indexed: - continue - - arch_map = libs.get(arch) - if arch_map is None: - arch_map = {} - libs[arch] = arch_map - if soname: - soname_map = arch_map.get(soname) - if soname_map is None: - soname_map = self._soname_map_class( - providers=set(), consumers=set()) - arch_map[soname] = soname_map - soname_map.providers.add(obj_key) - for needed_soname in needed: - soname_map = arch_map.get(needed_soname) - if soname_map is None: - soname_map = self._soname_map_class( - providers=set(), consumers=set()) - arch_map[needed_soname] = soname_map - soname_map.consumers.add(obj_key) - - def listBrokenBinaries(self, debug=False): - """ - Find binaries and their needed sonames, which have no providers. - - @param debug: Boolean to enable debug output - @type debug: Boolean - @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])}) - @return: The return value is an object -> set-of-sonames mapping, where - object is a broken binary and the set consists of sonames needed by - object that have no corresponding libraries to fulfill the dependency. - - """ - - os = _os_merge - - class _LibraryCache(object): - - """ - Caches properties associated with paths. - - The purpose of this class is to prevent multiple instances of - _ObjectKey for the same paths. - - """ - - def __init__(cache_self): - cache_self.cache = {} - - def get(cache_self, obj): - """ - Caches and returns properties associated with an object. - - @param obj: absolute path (can be symlink) - @type obj: string (example: '/usr/lib/libfoo.so') - @rtype: 4-tuple with types - (string or None, string or None, 2-tuple, Boolean) - @return: 4-tuple with the following components: - 1. arch as a string or None if it does not exist, - 2. soname as a string or None if it does not exist, - 3. obj_key as 2-tuple, - 4. Boolean representing whether the object exists. - (example: ('libfoo.so.1', (123L, 456L), True)) - - """ - if obj in cache_self.cache: - return cache_self.cache[obj] - else: - obj_key = self._obj_key(obj) - # Check that the library exists on the filesystem. - if obj_key.file_exists(): - # Get the arch and soname from LinkageMap._obj_properties if - # it exists. Otherwise, None. - arch, _needed, _path, soname, _objs = \ - self._obj_properties.get(obj_key, (None,)*5) - return cache_self.cache.setdefault(obj, \ - (arch, soname, obj_key, True)) - else: - return cache_self.cache.setdefault(obj, \ - (None, None, obj_key, False)) - - rValue = {} - cache = _LibraryCache() - providers = self.listProviders() - - # Iterate over all obj_keys and their providers. - for obj_key, sonames in providers.items(): - arch, _needed, path, _soname, objs = self._obj_properties[obj_key] - path = path.union(self._defpath) - # Iterate over each needed soname and the set of library paths that - # fulfill the soname to determine if the dependency is broken. - for soname, libraries in sonames.items(): - # validLibraries is used to store libraries, which satisfy soname, - # so if no valid libraries are found, the soname is not satisfied - # for obj_key. If unsatisfied, objects associated with obj_key - # must be emerged. - validLibraries = set() - # It could be the case that the library to satisfy the soname is - # not in the obj's runpath, but a symlink to the library is (eg - # libnvidia-tls.so.1 in nvidia-drivers). Also, since LinkageMap - # does not catalog symlinks, broken or missing symlinks may go - # unnoticed. As a result of these cases, check that a file with - # the same name as the soname exists in obj's runpath. - # XXX If we catalog symlinks in LinkageMap, this could be improved. - for directory in path: - cachedArch, cachedSoname, cachedKey, cachedExists = \ - cache.get(os.path.join(directory, soname)) - # Check that this library provides the needed soname. Doing - # this, however, will cause consumers of libraries missing - # sonames to be unnecessarily emerged. (eg libmix.so) - if cachedSoname == soname and cachedArch == arch: - validLibraries.add(cachedKey) - if debug and cachedKey not in \ - set(map(self._obj_key_cache.get, libraries)): - # XXX This is most often due to soname symlinks not in - # a library's directory. We could catalog symlinks in - # LinkageMap to avoid checking for this edge case here. - writemsg( - _("Found provider outside of findProviders:") + \ - (" %s -> %s %s\n" % (os.path.join(directory, soname), - self._obj_properties[cachedKey][4], libraries)), - noiselevel=-1) - # A valid library has been found, so there is no need to - # continue. - break - if debug and cachedArch == arch and \ - cachedKey in self._obj_properties: - writemsg((_("Broken symlink or missing/bad soname: " + \ - "%(dir_soname)s -> %(cachedKey)s " + \ - "with soname %(cachedSoname)s but expecting %(soname)s") % \ - {"dir_soname":os.path.join(directory, soname), - "cachedKey": self._obj_properties[cachedKey], - "cachedSoname": cachedSoname, "soname":soname}) + "\n", - noiselevel=-1) - # This conditional checks if there are no libraries to satisfy the - # soname (empty set). - if not validLibraries: - for obj in objs: - rValue.setdefault(obj, set()).add(soname) - # If no valid libraries have been found by this point, then - # there are no files named with the soname within obj's runpath, - # but if there are libraries (from the providers mapping), it is - # likely that soname symlinks or the actual libraries are - # missing or broken. Thus those libraries are added to rValue - # in order to emerge corrupt library packages. - for lib in libraries: - rValue.setdefault(lib, set()).add(soname) - if debug: - if not os.path.isfile(lib): - writemsg(_("Missing library:") + " %s\n" % (lib,), - noiselevel=-1) - else: - writemsg(_("Possibly missing symlink:") + \ - "%s\n" % (os.path.join(os.path.dirname(lib), soname)), - noiselevel=-1) - return rValue - - def listProviders(self): - """ - Find the providers for all object keys in LinkageMap. - - @rtype: dict (example: - {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}}) - @return: The return value is an object key -> providers mapping, where - providers is a mapping of soname -> set-of-library-paths returned - from the findProviders method. - - """ - rValue = {} - if not self._libs: - self.rebuild() - # Iterate over all object keys within LinkageMap. - for obj_key in self._obj_properties: - rValue.setdefault(obj_key, self.findProviders(obj_key)) - return rValue - - def isMasterLink(self, obj): - """ - Determine whether an object is a master link. - - @param obj: absolute path to an object - @type obj: string (example: '/usr/bin/foo') - @rtype: Boolean - @return: - 1. True if obj is a master link - 2. False if obj is not a master link - - """ - os = _os_merge - basename = os.path.basename(obj) - obj_key = self._obj_key(obj) - if obj_key not in self._obj_properties: - raise KeyError("%s (%s) not in object list" % (obj_key, obj)) - soname = self._obj_properties[obj_key][3] - return (len(basename) < len(soname)) - - def listLibraryObjects(self): - """ - Return a list of library objects. - - Known limitation: library objects lacking an soname are not included. - - @rtype: list of strings - @return: list of paths to all providers - - """ - rValue = [] - if not self._libs: - self.rebuild() - for arch_map in self._libs.values(): - for soname_map in arch_map.values(): - for obj_key in soname_map.providers: - rValue.extend(self._obj_properties[obj_key][4]) - return rValue - - def getSoname(self, obj): - """ - Return the soname associated with an object. - - @param obj: absolute path to an object - @type obj: string (example: '/usr/bin/bar') - @rtype: string - @return: soname as a string - - """ - if not self._libs: - self.rebuild() - if isinstance(obj, self._ObjectKey): - obj_key = obj - if obj_key not in self._obj_properties: - raise KeyError("%s not in object list" % obj_key) - return self._obj_properties[obj_key][3] - if obj not in self._obj_key_cache: - raise KeyError("%s not in object list" % obj) - return self._obj_properties[self._obj_key_cache[obj]][3] - - def findProviders(self, obj): - """ - Find providers for an object or object key. - - This method may be called with a key from _obj_properties. - - In some cases, not all valid libraries are returned. This may occur when - an soname symlink referencing a library is in an object's runpath while - the actual library is not. We should consider cataloging symlinks within - LinkageMap as this would avoid those cases and would be a better model of - library dependencies (since the dynamic linker actually searches for - files named with the soname in the runpaths). - - @param obj: absolute path to an object or a key from _obj_properties - @type obj: string (example: '/usr/bin/bar') or _ObjectKey - @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])}) - @return: The return value is a soname -> set-of-library-paths, where - set-of-library-paths satisfy soname. - - """ - - os = _os_merge - - rValue = {} - - if not self._libs: - self.rebuild() - - # Determine the obj_key from the arguments. - if isinstance(obj, self._ObjectKey): - obj_key = obj - if obj_key not in self._obj_properties: - raise KeyError("%s not in object list" % obj_key) - else: - obj_key = self._obj_key(obj) - if obj_key not in self._obj_properties: - raise KeyError("%s (%s) not in object list" % (obj_key, obj)) - - arch, needed, path, _soname, _objs = self._obj_properties[obj_key] - path_keys = set(self._path_key(x) for x in path.union(self._defpath)) - for soname in needed: - rValue[soname] = set() - if arch not in self._libs or soname not in self._libs[arch]: - continue - # For each potential provider of the soname, add it to rValue if it - # resides in the obj's runpath. - for provider_key in self._libs[arch][soname].providers: - providers = self._obj_properties[provider_key][4] - for provider in providers: - if self._path_key(os.path.dirname(provider)) in path_keys: - rValue[soname].add(provider) - return rValue - - def findConsumers(self, obj): - """ - Find consumers of an object or object key. - - This method may be called with a key from _obj_properties. If this - method is going to be called with an object key, to avoid not catching - shadowed libraries, do not pass new _ObjectKey instances to this method. - Instead pass the obj as a string. - - In some cases, not all consumers are returned. This may occur when - an soname symlink referencing a library is in an object's runpath while - the actual library is not. For example, this problem is noticeable for - binutils since it's libraries are added to the path via symlinks that - are gemerated in the /usr/$CHOST/lib/ directory by binutils-config. - Failure to recognize consumers of these symlinks makes preserve-libs - fail to preserve binutils libs that are needed by these unrecognized - consumers. - - Note that library consumption via dlopen (common for kde plugins) is - currently undetected. However, it is possible to use the - corresponding libtool archive (*.la) files to detect such consumers - (revdep-rebuild is able to detect them). - - @param obj: absolute path to an object or a key from _obj_properties - @type obj: string (example: '/usr/bin/bar') or _ObjectKey - @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar'])) - @return: The return value is a soname -> set-of-library-paths, where - set-of-library-paths satisfy soname. - - """ - - os = _os_merge - - rValue = set() - - if not self._libs: - self.rebuild() - - # Determine the obj_key and the set of objects matching the arguments. - if isinstance(obj, self._ObjectKey): - obj_key = obj - if obj_key not in self._obj_properties: - raise KeyError("%s not in object list" % obj_key) - objs = self._obj_properties[obj_key][4] - else: - objs = set([obj]) - obj_key = self._obj_key(obj) - if obj_key not in self._obj_properties: - raise KeyError("%s (%s) not in object list" % (obj_key, obj)) - - # If there is another version of this lib with the - # same soname and the master link points to that - # other version, this lib will be shadowed and won't - # have any consumers. - if not isinstance(obj, self._ObjectKey): - soname = self._obj_properties[obj_key][3] - master_link = os.path.join(self._root, - os.path.dirname(obj).lstrip(os.path.sep), soname) - try: - master_st = os.stat(master_link) - obj_st = os.stat(obj) - except OSError: - pass - else: - if (obj_st.st_dev, obj_st.st_ino) != \ - (master_st.st_dev, master_st.st_ino): - return set() - - # Determine the directory(ies) from the set of objects. - objs_dir_keys = set(self._path_key(os.path.dirname(x)) for x in objs) - defpath_keys = set(self._path_key(x) for x in self._defpath) - - arch, _needed, _path, soname, _objs = self._obj_properties[obj_key] - if arch in self._libs and soname in self._libs[arch]: - # For each potential consumer, add it to rValue if an object from the - # arguments resides in the consumer's runpath. - for consumer_key in self._libs[arch][soname].consumers: - _arch, _needed, path, _soname, consumer_objs = \ - self._obj_properties[consumer_key] - path_keys = defpath_keys.union(self._path_key(x) for x in path) - if objs_dir_keys.intersection(path_keys): - rValue.update(consumer_objs) - return rValue - class vardbapi(dbapi): _excluded_dirs = ["CVS", "lost+found"] diff --git a/pym/portage/util/_dyn_libs/LinkageMapELF.py b/pym/portage/util/_dyn_libs/LinkageMapELF.py new file mode 100644 index 000000000..a159f7a59 --- /dev/null +++ b/pym/portage/util/_dyn_libs/LinkageMapELF.py @@ -0,0 +1,633 @@ +# Copyright 1998-2010 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +import errno +import logging +import subprocess + +import portage +from portage import _encodings +from portage import _os_merge +from portage import _unicode_decode +from portage import _unicode_encode +from portage.cache.mappings import slot_dict_class +from portage.exception import CommandNotFound +from portage.localization import _ +from portage.util import getlibpaths +from portage.util import grabfile +from portage.util import normalize_path +from portage.util import writemsg_level + +class LinkageMapELF(object): + + """Models dynamic linker dependencies.""" + + _needed_aux_key = "NEEDED.ELF.2" + _soname_map_class = slot_dict_class( + ("consumers", "providers"), prefix="") + + def __init__(self, vardbapi): + self._dbapi = vardbapi + self._eroot = self._dbapi._eroot + self._libs = {} + self._obj_properties = {} + self._obj_key_cache = {} + self._defpath = set() + self._path_key_cache = {} + + def _clear_cache(self): + self._libs.clear() + self._obj_properties.clear() + self._obj_key_cache.clear() + self._defpath.clear() + self._path_key_cache.clear() + + def _path_key(self, path): + key = self._path_key_cache.get(path) + if key is None: + key = self._ObjectKey(path, self._eroot) + self._path_key_cache[path] = key + return key + + def _obj_key(self, path): + key = self._obj_key_cache.get(path) + if key is None: + key = self._ObjectKey(path, self._eroot) + self._obj_key_cache[path] = key + return key + + class _ObjectKey(object): + + """Helper class used as _obj_properties keys for objects.""" + + __slots__ = ("__weakref__", "_key") + + def __init__(self, obj, root): + """ + This takes a path to an object. + + @param object: path to a file + @type object: string (example: '/usr/bin/bar') + + """ + self._key = self._generate_object_key(obj, root) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + return self._key == other._key + + def _generate_object_key(self, obj, root): + """ + Generate object key for a given object. + + @param object: path to a file + @type object: string (example: '/usr/bin/bar') + @rtype: 2-tuple of types (long, int) if object exists. string if + object does not exist. + @return: + 1. 2-tuple of object's inode and device from a stat call, if object + exists. + 2. realpath of object if object does not exist. + + """ + + os = _os_merge + + try: + _unicode_encode(obj, + encoding=_encodings['merge'], errors='strict') + except UnicodeEncodeError: + # The package appears to have been merged with a + # different value of sys.getfilesystemencoding(), + # so fall back to utf_8 if appropriate. + try: + _unicode_encode(obj, + encoding=_encodings['fs'], errors='strict') + except UnicodeEncodeError: + pass + else: + os = portage.os + + abs_path = os.path.join(root, obj.lstrip(os.sep)) + try: + object_stat = os.stat(abs_path) + except OSError: + # Use the realpath as the key if the file does not exists on the + # filesystem. + return os.path.realpath(abs_path) + # Return a tuple of the device and inode. + return (object_stat.st_dev, object_stat.st_ino) + + def file_exists(self): + """ + Determine if the file for this key exists on the filesystem. + + @rtype: Boolean + @return: + 1. True if the file exists. + 2. False if the file does not exist or is a broken symlink. + + """ + return isinstance(self._key, tuple) + + class _LibGraphNode(_ObjectKey): + __slots__ = ("alt_paths",) + + def __init__(self, obj, root): + LinkageMapELF._ObjectKey.__init__(self, obj, root) + self.alt_paths = set() + + def __str__(self): + return str(sorted(self.alt_paths)) + + def rebuild(self, exclude_pkgs=None, include_file=None): + """ + Raises CommandNotFound if there are preserved libs + and the scanelf binary is not available. + """ + + os = _os_merge + root = self._eroot + root_len = len(root) - 1 + self._clear_cache() + self._defpath.update(getlibpaths(self._eroot)) + libs = self._libs + obj_properties = self._obj_properties + + lines = [] + + # Data from include_file is processed first so that it + # overrides any data from previously installed files. + if include_file is not None: + lines += grabfile(include_file) + + aux_keys = [self._needed_aux_key] + for cpv in self._dbapi.cpv_all(): + if exclude_pkgs is not None and cpv in exclude_pkgs: + continue + lines += self._dbapi.aux_get(cpv, aux_keys)[0].split('\n') + # Cache NEEDED.* files avoid doing excessive IO for every rebuild. + self._dbapi.flush_cache() + + # have to call scanelf for preserved libs here as they aren't + # registered in NEEDED.ELF.2 files + plibs = set() + if self._dbapi.plib_registry and self._dbapi.plib_registry.getPreservedLibs(): + args = ["/usr/bin/scanelf", "-qF", "%a;%F;%S;%r;%n"] + for items in self._dbapi.plib_registry.getPreservedLibs().values(): + plibs.update(items) + args.extend(os.path.join(root, x.lstrip("." + os.sep)) \ + for x in items) + try: + proc = subprocess.Popen(args, stdout=subprocess.PIPE) + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + raise CommandNotFound(args[0]) + else: + for l in proc.stdout: + try: + l = _unicode_decode(l, + encoding=_encodings['content'], errors='strict') + except UnicodeDecodeError: + l = _unicode_decode(l, + encoding=_encodings['content'], errors='replace') + writemsg_level(_("\nError decoding characters " \ + "returned from scanelf: %s\n\n") % (l,), + level=logging.ERROR, noiselevel=-1) + l = l[3:].rstrip("\n") + if not l: + continue + fields = l.split(";") + if len(fields) < 5: + writemsg_level(_("\nWrong number of fields " \ + "returned from scanelf: %s\n\n") % (l,), + level=logging.ERROR, noiselevel=-1) + continue + fields[1] = fields[1][root_len:] + plibs.discard(fields[1]) + lines.append(";".join(fields)) + proc.wait() + + if plibs: + # Preserved libraries that did not appear in the scanelf output. + # This is known to happen with statically linked libraries. + # Generate dummy lines for these, so we can assume that every + # preserved library has an entry in self._obj_properties. This + # is important in order to prevent findConsumers from raising + # an unwanted KeyError. + for x in plibs: + lines.append(";".join(['', x, '', '', ''])) + + for l in lines: + l = l.rstrip("\n") + if not l: + continue + fields = l.split(";") + if len(fields) < 5: + writemsg_level(_("\nWrong number of fields " \ + "in %s: %s\n\n") % (self._needed_aux_key, l), + level=logging.ERROR, noiselevel=-1) + continue + arch = fields[0] + obj = fields[1] + soname = fields[2] + path = set([normalize_path(x) \ + for x in filter(None, fields[3].replace( + "${ORIGIN}", os.path.dirname(obj)).replace( + "$ORIGIN", os.path.dirname(obj)).split(":"))]) + needed = [x for x in fields[4].split(",") if x] + + obj_key = self._obj_key(obj) + indexed = True + myprops = obj_properties.get(obj_key) + if myprops is None: + indexed = False + myprops = (arch, needed, path, soname, set()) + obj_properties[obj_key] = myprops + # All object paths are added into the obj_properties tuple. + myprops[4].add(obj) + + # Don't index the same file more that once since only one + # set of data can be correct and therefore mixing data + # may corrupt the index (include_file overrides previously + # installed). + if indexed: + continue + + arch_map = libs.get(arch) + if arch_map is None: + arch_map = {} + libs[arch] = arch_map + if soname: + soname_map = arch_map.get(soname) + if soname_map is None: + soname_map = self._soname_map_class( + providers=set(), consumers=set()) + arch_map[soname] = soname_map + soname_map.providers.add(obj_key) + for needed_soname in needed: + soname_map = arch_map.get(needed_soname) + if soname_map is None: + soname_map = self._soname_map_class( + providers=set(), consumers=set()) + arch_map[needed_soname] = soname_map + soname_map.consumers.add(obj_key) + + def listBrokenBinaries(self, debug=False): + """ + Find binaries and their needed sonames, which have no providers. + + @param debug: Boolean to enable debug output + @type debug: Boolean + @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])}) + @return: The return value is an object -> set-of-sonames mapping, where + object is a broken binary and the set consists of sonames needed by + object that have no corresponding libraries to fulfill the dependency. + + """ + + os = _os_merge + + class _LibraryCache(object): + + """ + Caches properties associated with paths. + + The purpose of this class is to prevent multiple instances of + _ObjectKey for the same paths. + + """ + + def __init__(cache_self): + cache_self.cache = {} + + def get(cache_self, obj): + """ + Caches and returns properties associated with an object. + + @param obj: absolute path (can be symlink) + @type obj: string (example: '/usr/lib/libfoo.so') + @rtype: 4-tuple with types + (string or None, string or None, 2-tuple, Boolean) + @return: 4-tuple with the following components: + 1. arch as a string or None if it does not exist, + 2. soname as a string or None if it does not exist, + 3. obj_key as 2-tuple, + 4. Boolean representing whether the object exists. + (example: ('libfoo.so.1', (123L, 456L), True)) + + """ + if obj in cache_self.cache: + return cache_self.cache[obj] + else: + obj_key = self._obj_key(obj) + # Check that the library exists on the filesystem. + if obj_key.file_exists(): + # Get the arch and soname from LinkageMap._obj_properties if + # it exists. Otherwise, None. + arch, _needed, _path, soname, _objs = \ + self._obj_properties.get(obj_key, (None,)*5) + return cache_self.cache.setdefault(obj, \ + (arch, soname, obj_key, True)) + else: + return cache_self.cache.setdefault(obj, \ + (None, None, obj_key, False)) + + rValue = {} + cache = _LibraryCache() + providers = self.listProviders() + + # Iterate over all obj_keys and their providers. + for obj_key, sonames in providers.items(): + arch, _needed, path, _soname, objs = self._obj_properties[obj_key] + path = path.union(self._defpath) + # Iterate over each needed soname and the set of library paths that + # fulfill the soname to determine if the dependency is broken. + for soname, libraries in sonames.items(): + # validLibraries is used to store libraries, which satisfy soname, + # so if no valid libraries are found, the soname is not satisfied + # for obj_key. If unsatisfied, objects associated with obj_key + # must be emerged. + validLibraries = set() + # It could be the case that the library to satisfy the soname is + # not in the obj's runpath, but a symlink to the library is (eg + # libnvidia-tls.so.1 in nvidia-drivers). Also, since LinkageMap + # does not catalog symlinks, broken or missing symlinks may go + # unnoticed. As a result of these cases, check that a file with + # the same name as the soname exists in obj's runpath. + # XXX If we catalog symlinks in LinkageMap, this could be improved. + for directory in path: + cachedArch, cachedSoname, cachedKey, cachedExists = \ + cache.get(os.path.join(directory, soname)) + # Check that this library provides the needed soname. Doing + # this, however, will cause consumers of libraries missing + # sonames to be unnecessarily emerged. (eg libmix.so) + if cachedSoname == soname and cachedArch == arch: + validLibraries.add(cachedKey) + if debug and cachedKey not in \ + set(map(self._obj_key_cache.get, libraries)): + # XXX This is most often due to soname symlinks not in + # a library's directory. We could catalog symlinks in + # LinkageMap to avoid checking for this edge case here. + writemsg_level( + _("Found provider outside of findProviders:") + \ + (" %s -> %s %s\n" % (os.path.join(directory, soname), + self._obj_properties[cachedKey][4], libraries)), + level=logging.DEBUG, + noiselevel=-1) + # A valid library has been found, so there is no need to + # continue. + break + if debug and cachedArch == arch and \ + cachedKey in self._obj_properties: + writemsg_level((_("Broken symlink or missing/bad soname: " + \ + "%(dir_soname)s -> %(cachedKey)s " + \ + "with soname %(cachedSoname)s but expecting %(soname)s") % \ + {"dir_soname":os.path.join(directory, soname), + "cachedKey": self._obj_properties[cachedKey], + "cachedSoname": cachedSoname, "soname":soname}) + "\n", + level=logging.DEBUG, + noiselevel=-1) + # This conditional checks if there are no libraries to satisfy the + # soname (empty set). + if not validLibraries: + for obj in objs: + rValue.setdefault(obj, set()).add(soname) + # If no valid libraries have been found by this point, then + # there are no files named with the soname within obj's runpath, + # but if there are libraries (from the providers mapping), it is + # likely that soname symlinks or the actual libraries are + # missing or broken. Thus those libraries are added to rValue + # in order to emerge corrupt library packages. + for lib in libraries: + rValue.setdefault(lib, set()).add(soname) + if debug: + if not os.path.isfile(lib): + writemsg_level(_("Missing library:") + " %s\n" % (lib,), + level=logging.DEBUG, + noiselevel=-1) + else: + writemsg_level(_("Possibly missing symlink:") + \ + "%s\n" % (os.path.join(os.path.dirname(lib), soname)), + level=logging.DEBUG, + noiselevel=-1) + return rValue + + def listProviders(self): + """ + Find the providers for all object keys in LinkageMap. + + @rtype: dict (example: + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}}) + @return: The return value is an object key -> providers mapping, where + providers is a mapping of soname -> set-of-library-paths returned + from the findProviders method. + + """ + rValue = {} + if not self._libs: + self.rebuild() + # Iterate over all object keys within LinkageMap. + for obj_key in self._obj_properties: + rValue.setdefault(obj_key, self.findProviders(obj_key)) + return rValue + + def isMasterLink(self, obj): + """ + Determine whether an object is a master link. + + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/foo') + @rtype: Boolean + @return: + 1. True if obj is a master link + 2. False if obj is not a master link + + """ + os = _os_merge + basename = os.path.basename(obj) + obj_key = self._obj_key(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + soname = self._obj_properties[obj_key][3] + return (len(basename) < len(soname)) + + def listLibraryObjects(self): + """ + Return a list of library objects. + + Known limitation: library objects lacking an soname are not included. + + @rtype: list of strings + @return: list of paths to all providers + + """ + rValue = [] + if not self._libs: + self.rebuild() + for arch_map in self._libs.values(): + for soname_map in arch_map.values(): + for obj_key in soname_map.providers: + rValue.extend(self._obj_properties[obj_key][4]) + return rValue + + def getSoname(self, obj): + """ + Return the soname associated with an object. + + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/bar') + @rtype: string + @return: soname as a string + + """ + if not self._libs: + self.rebuild() + if isinstance(obj, self._ObjectKey): + obj_key = obj + if obj_key not in self._obj_properties: + raise KeyError("%s not in object list" % obj_key) + return self._obj_properties[obj_key][3] + if obj not in self._obj_key_cache: + raise KeyError("%s not in object list" % obj) + return self._obj_properties[self._obj_key_cache[obj]][3] + + def findProviders(self, obj): + """ + Find providers for an object or object key. + + This method may be called with a key from _obj_properties. + + In some cases, not all valid libraries are returned. This may occur when + an soname symlink referencing a library is in an object's runpath while + the actual library is not. We should consider cataloging symlinks within + LinkageMap as this would avoid those cases and would be a better model of + library dependencies (since the dynamic linker actually searches for + files named with the soname in the runpaths). + + @param obj: absolute path to an object or a key from _obj_properties + @type obj: string (example: '/usr/bin/bar') or _ObjectKey + @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])}) + @return: The return value is a soname -> set-of-library-paths, where + set-of-library-paths satisfy soname. + + """ + + os = _os_merge + + rValue = {} + + if not self._libs: + self.rebuild() + + # Determine the obj_key from the arguments. + if isinstance(obj, self._ObjectKey): + obj_key = obj + if obj_key not in self._obj_properties: + raise KeyError("%s not in object list" % obj_key) + else: + obj_key = self._obj_key(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + + arch, needed, path, _soname, _objs = self._obj_properties[obj_key] + path_keys = set(self._path_key(x) for x in path.union(self._defpath)) + for soname in needed: + rValue[soname] = set() + if arch not in self._libs or soname not in self._libs[arch]: + continue + # For each potential provider of the soname, add it to rValue if it + # resides in the obj's runpath. + for provider_key in self._libs[arch][soname].providers: + providers = self._obj_properties[provider_key][4] + for provider in providers: + if self._path_key(os.path.dirname(provider)) in path_keys: + rValue[soname].add(provider) + return rValue + + def findConsumers(self, obj): + """ + Find consumers of an object or object key. + + This method may be called with a key from _obj_properties. If this + method is going to be called with an object key, to avoid not catching + shadowed libraries, do not pass new _ObjectKey instances to this method. + Instead pass the obj as a string. + + In some cases, not all consumers are returned. This may occur when + an soname symlink referencing a library is in an object's runpath while + the actual library is not. For example, this problem is noticeable for + binutils since it's libraries are added to the path via symlinks that + are gemerated in the /usr/$CHOST/lib/ directory by binutils-config. + Failure to recognize consumers of these symlinks makes preserve-libs + fail to preserve binutils libs that are needed by these unrecognized + consumers. + + Note that library consumption via dlopen (common for kde plugins) is + currently undetected. However, it is possible to use the + corresponding libtool archive (*.la) files to detect such consumers + (revdep-rebuild is able to detect them). + + @param obj: absolute path to an object or a key from _obj_properties + @type obj: string (example: '/usr/bin/bar') or _ObjectKey + @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar'])) + @return: The return value is a soname -> set-of-library-paths, where + set-of-library-paths satisfy soname. + + """ + + os = _os_merge + + rValue = set() + + if not self._libs: + self.rebuild() + + # Determine the obj_key and the set of objects matching the arguments. + if isinstance(obj, self._ObjectKey): + obj_key = obj + if obj_key not in self._obj_properties: + raise KeyError("%s not in object list" % obj_key) + objs = self._obj_properties[obj_key][4] + else: + objs = set([obj]) + obj_key = self._obj_key(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + + # If there is another version of this lib with the + # same soname and the master link points to that + # other version, this lib will be shadowed and won't + # have any consumers. + if not isinstance(obj, self._ObjectKey): + soname = self._obj_properties[obj_key][3] + master_link = os.path.join(self._eroot, + os.path.dirname(obj).lstrip(os.path.sep), soname) + try: + master_st = os.stat(master_link) + obj_st = os.stat(obj) + except OSError: + pass + else: + if (obj_st.st_dev, obj_st.st_ino) != \ + (master_st.st_dev, master_st.st_ino): + return set() + + # Determine the directory(ies) from the set of objects. + objs_dir_keys = set(self._path_key(os.path.dirname(x)) for x in objs) + defpath_keys = set(self._path_key(x) for x in self._defpath) + + arch, _needed, _path, soname, _objs = self._obj_properties[obj_key] + if arch in self._libs and soname in self._libs[arch]: + # For each potential consumer, add it to rValue if an object from the + # arguments resides in the consumer's runpath. + for consumer_key in self._libs[arch][soname].consumers: + _arch, _needed, path, _soname, consumer_objs = \ + self._obj_properties[consumer_key] + path_keys = defpath_keys.union(self._path_key(x) for x in path) + if objs_dir_keys.intersection(path_keys): + rValue.update(consumer_objs) + return rValue |