summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2008-11-26 22:40:41 +0000
committerZac Medico <zmedico@gentoo.org>2008-11-26 22:40:41 +0000
commit6415910cb04e773c91b0f8b41704a5e9611f4198 (patch)
tree353484d1ae763f1e3ce4c7dbda5ed6795f8df263
parent57649cc562d63e027b570bf1ec518f6a686442ae (diff)
downloadportage-6415910cb04e773c91b0f8b41704a5e9611f4198.tar.gz
portage-6415910cb04e773c91b0f8b41704a5e9611f4198.tar.bz2
portage-6415910cb04e773c91b0f8b41704a5e9611f4198.zip
Bug #235642 - Create hardlinks when merging identical files. This works by using a
tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates are used in case some happen to be merged to separate devices. (trunk r12109) svn path=/main/branches/2.1.6/; revision=12110
-rw-r--r--pym/portage/__init__.py53
-rw-r--r--pym/portage/dbapi/vartree.py12
2 files changed, 58 insertions, 7 deletions
diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py
index 186363235..2fcb4b143 100644
--- a/pym/portage/__init__.py
+++ b/pym/portage/__init__.py
@@ -5958,7 +5958,8 @@ def _movefile(src, dest, **kwargs):
raise portage.exception.PortageException(
"mv '%s' '%s'" % (src, dest))
-def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
+def movefile(src, dest, newmtime=None, sstat=None, mysettings=None,
+ hardlink_candidates=None):
"""moves a file from src to dest, preserving all permissions and attributes; mtime will
be preserved even when moving across filesystems. Returns true on success and false on
failure. Move is atomic."""
@@ -6030,8 +6031,45 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
print "!!!",e
return None
+ hardlinked = False
+ # Since identical files might be merged to multiple filesystems,
+ # so os.link() calls might fail for some paths, so try them all.
+ # For atomic replacement, first create the link as a temp file
+ # and them use os.rename() to replace the destination.
+ if hardlink_candidates is not None:
+ head, tail = os.path.split(dest)
+ hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \
+ (tail, os.getpid()))
+ try:
+ os.unlink(hardlink_tmp)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ writemsg("!!! Failed to remove hardlink temp file: %s\n" % \
+ (hardlink_tmp,), noiselevel=-1)
+ writemsg("!!! %s\n" % (e,), noiselevel=-1)
+ return None
+ del e
+ for hardlink_src in hardlink_candidates:
+ try:
+ os.link(hardlink_src, hardlink_tmp)
+ except OSError:
+ continue
+ else:
+ try:
+ os.rename(hardlink_tmp, dest)
+ except OSError, e:
+ writemsg("!!! Failed to rename %s to %s\n" % \
+ (hardlink_tmp, dest), noiselevel=-1)
+ writemsg("!!! %s\n" % (e,), noiselevel=-1)
+ return None
+ hardlinked = True
+ break
+
renamefailed=1
- if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled:
+ if hardlinked:
+ renamefailed = False
+ if not hardlinked and \
+ (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]):
try:
if selinux_enabled:
ret=selinux.secure_rename(src,dest)
@@ -6092,11 +6130,14 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
return None
try:
- if newmtime is not None:
- os.utime(dest, (newmtime, newmtime))
+ if hardlinked:
+ newmtime = long(os.stat(dest).st_mtime)
else:
- os.utime(dest, (sstat.st_atime, sstat.st_mtime))
- newmtime = long(sstat.st_mtime)
+ if newmtime is not None:
+ os.utime(dest, (newmtime, newmtime))
+ else:
+ os.utime(dest, (sstat.st_atime, sstat.st_mtime))
+ newmtime = long(sstat.st_mtime)
except OSError:
# The utime can fail here with EPERM even though the move succeeded.
# Instead of failing, use stat to return the mtime if possible.
diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py
index 2555657ba..b27f30fe3 100644
--- a/pym/portage/dbapi/vartree.py
+++ b/pym/portage/dbapi/vartree.py
@@ -1100,6 +1100,7 @@ class dblink(object):
self.contentscache = None
self._contents_inodes = None
self._contents_basenames = None
+ self._md5_merge_map = {}
def lockdb(self):
if self._lock_vdb:
@@ -2377,6 +2378,7 @@ class dblink(object):
if self.mergeme(srcroot, destroot, outfile, None,
secondhand, cfgfiledict, mymtime):
return 1
+ self._md5_merge_map.clear()
#restore umask
os.umask(prevmask)
@@ -2734,9 +2736,17 @@ class dblink(object):
# whether config protection or not, we merge the new file the
# same way. Unless moveme=0 (blocking directory)
if moveme:
- mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings)
+ hardlink_key = (mymd5, mystat.st_size)
+ hardlink_candidates = self._md5_merge_map.get(hardlink_key)
+ if hardlink_candidates is None:
+ hardlink_candidates = []
+ self._md5_merge_map[hardlink_key] = hardlink_candidates
+ mymtime = movefile(mysrc, mydest, newmtime=thismtime,
+ sstat=mystat, mysettings=self.settings,
+ hardlink_candidates=hardlink_candidates)
if mymtime is None:
return 1
+ hardlink_candidates.append(mydest)
zing = ">>>"
if mymtime != None: