summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2007-10-17 00:02:29 +0000
committerZac Medico <zmedico@gentoo.org>2007-10-17 00:02:29 +0000
commit20df5faa51753aae84791277cd0d07c6b44114bf (patch)
tree55f8943d5fe6f9fd2d5d2f91ed7fe405d6b9962e
parenta3bd5ae0fde41bbdc401a7c6fa238a37ad58ac8e (diff)
downloadportage-20df5faa51753aae84791277cd0d07c6b44114bf.tar.gz
portage-20df5faa51753aae84791277cd0d07c6b44114bf.tar.bz2
portage-20df5faa51753aae84791277cd0d07c6b44114bf.zip
Optimize dblink.isowner() to use fewer stat calls by
only collecting stat results for parent directories. This provides equivalent accuracy to the previous approach but will perform much better when used to scan all installed packages for owners in the event of a file collision. svn path=/main/trunk/; revision=8153
-rw-r--r--pym/portage/dbapi/vartree.py53
1 files changed, 38 insertions, 15 deletions
diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py
index 99db29661..2d2e916bd 100644
--- a/pym/portage/dbapi/vartree.py
+++ b/pym/portage/dbapi/vartree.py
@@ -1380,9 +1380,14 @@ class dblink(object):
def isowner(self,filename, destroot):
"""
- Check if filename is a new file or belongs to this package
- (for this or a previous version)
-
+ Check if a file belongs to this package. This may
+ result in a stat call for the parent directory of
+ every installed file, since the inode numbers are
+ used to work around the problem of ambiguous paths
+ caused by symlinked directories. The results of
+ stat calls are cached to optimize multiple calls
+ to this method.
+
@param filename:
@type filename:
@param destroot:
@@ -1399,23 +1404,46 @@ class dblink(object):
if pkgfiles and destfile in pkgfiles:
return True
if pkgfiles:
+ # Use stat rather than lstat since we want to follow
+ # any symlinks to the real parent directory.
+ parent_path = os.path.dirname(destfile)
try:
- mylstat = os.lstat(destfile)
+ parent_stat = os.stat(parent_path)
except EnvironmentError, e:
if e.errno != errno.ENOENT:
raise
del e
- return True
+ return False
if self._contents_inodes is None:
- self._contents_inodes = set()
+ self._contents_inodes = {}
+ parent_paths = set()
for x in pkgfiles:
+ p_path = os.path.dirname(x)
+ if p_path in parent_paths:
+ continue
+ parent_paths.add(p_path)
try:
- lstat = os.lstat(x)
- self._contents_inodes.add((lstat.st_dev, lstat.st_ino))
+ s = os.stat(p_path)
except OSError:
pass
- if (mylstat.st_dev, mylstat.st_ino) in self._contents_inodes:
- return True
+ else:
+ inode_key = (s.st_dev, s.st_ino)
+ # Use lists of paths in case multiple
+ # paths reference the same inode.
+ p_path_list = self._contents_inodes.get(inode_key)
+ if p_path_list is None:
+ p_path_list = []
+ self._contents_inodes[inode_key] = p_path_list
+ if p_path not in p_path_list:
+ p_path_list.append(p_path)
+ p_path_list = self._contents_inodes.get(
+ (parent_stat.st_dev, parent_stat.st_ino))
+ if p_path_list:
+ basename = os.path.basename(destfile)
+ for p_path in p_path_list:
+ x = os.path.join(p_path, basename)
+ if x in pkgfiles:
+ return True
return False
@@ -1570,11 +1598,6 @@ class dblink(object):
print "Searching all installed packages for file collisions..."
print "Press Ctrl-C to Stop"
print
- """ Note: The isowner calls result in a stat call for *every*
- single installed file, since the inode numbers are used to work
- around the problem of ambiguous paths caused by symlinked files
- and/or directories. Though it is slow, it is as accurate as
- possible."""
found_owner = False
for cpv in self.vartree.dbapi.cpv_all():
cat, pkg = catsplit(cpv)