summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2008-11-26 22:31:23 +0000
committerZac Medico <zmedico@gentoo.org>2008-11-26 22:31:23 +0000
commit2c72bb29c7ba405e2deef81248a9f14ea049344d (patch)
tree7eb95e96973f3d0283d2be65138d2c25b5f265f9
parentf23f670939c0acb330ccd3ec7d6695d0f2c00bd0 (diff)
downloadportage-2c72bb29c7ba405e2deef81248a9f14ea049344d.tar.gz
portage-2c72bb29c7ba405e2deef81248a9f14ea049344d.tar.bz2
portage-2c72bb29c7ba405e2deef81248a9f14ea049344d.zip
Bug #235642 - Create hardlinks when merging identical files. This works by using a
tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates are used in case some happen to be merged to separate devices. svn path=/main/trunk/; revision=12109
-rw-r--r--pym/portage/__init__.py53
-rw-r--r--pym/portage/dbapi/vartree.py12
2 files changed, 58 insertions, 7 deletions
diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py
index db5f0a061..bedbbf3f9 100644
--- a/pym/portage/__init__.py
+++ b/pym/portage/__init__.py
@@ -5968,7 +5968,8 @@ def _movefile(src, dest, **kwargs):
raise portage.exception.PortageException(
"mv '%s' '%s'" % (src, dest))
-def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
+def movefile(src, dest, newmtime=None, sstat=None, mysettings=None,
+ hardlink_candidates=None):
"""moves a file from src to dest, preserving all permissions and attributes; mtime will
be preserved even when moving across filesystems. Returns true on success and false on
failure. Move is atomic."""
@@ -6040,8 +6041,45 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
print "!!!",e
return None
+ hardlinked = False
+ # Since identical files might be merged to multiple filesystems,
+ # so os.link() calls might fail for some paths, so try them all.
+ # For atomic replacement, first create the link as a temp file
+ # and them use os.rename() to replace the destination.
+ if hardlink_candidates is not None:
+ head, tail = os.path.split(dest)
+ hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \
+ (tail, os.getpid()))
+ try:
+ os.unlink(hardlink_tmp)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ writemsg("!!! Failed to remove hardlink temp file: %s\n" % \
+ (hardlink_tmp,), noiselevel=-1)
+ writemsg("!!! %s\n" % (e,), noiselevel=-1)
+ return None
+ del e
+ for hardlink_src in hardlink_candidates:
+ try:
+ os.link(hardlink_src, hardlink_tmp)
+ except OSError:
+ continue
+ else:
+ try:
+ os.rename(hardlink_tmp, dest)
+ except OSError, e:
+ writemsg("!!! Failed to rename %s to %s\n" % \
+ (hardlink_tmp, dest), noiselevel=-1)
+ writemsg("!!! %s\n" % (e,), noiselevel=-1)
+ return None
+ hardlinked = True
+ break
+
renamefailed=1
- if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled:
+ if hardlinked:
+ renamefailed = False
+ if not hardlinked and \
+ (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]):
try:
if selinux_enabled:
ret=selinux.secure_rename(src,dest)
@@ -6102,11 +6140,14 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
return None
try:
- if newmtime is not None:
- os.utime(dest, (newmtime, newmtime))
+ if hardlinked:
+ newmtime = long(os.stat(dest).st_mtime)
else:
- os.utime(dest, (sstat.st_atime, sstat.st_mtime))
- newmtime = long(sstat.st_mtime)
+ if newmtime is not None:
+ os.utime(dest, (newmtime, newmtime))
+ else:
+ os.utime(dest, (sstat.st_atime, sstat.st_mtime))
+ newmtime = long(sstat.st_mtime)
except OSError:
# The utime can fail here with EPERM even though the move succeeded.
# Instead of failing, use stat to return the mtime if possible.
diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py
index e78d32a6f..90e7a8693 100644
--- a/pym/portage/dbapi/vartree.py
+++ b/pym/portage/dbapi/vartree.py
@@ -1756,6 +1756,7 @@ class dblink(object):
self._contents_inodes = None
self._contents_basenames = None
self._linkmap_broken = False
+ self._md5_merge_map = {}
def lockdb(self):
if self._lock_vdb:
@@ -3366,6 +3367,7 @@ class dblink(object):
if self.mergeme(srcroot, destroot, outfile, None,
secondhand, cfgfiledict, mymtime):
return 1
+ self._md5_merge_map.clear()
#restore umask
os.umask(prevmask)
@@ -3767,9 +3769,17 @@ class dblink(object):
# whether config protection or not, we merge the new file the
# same way. Unless moveme=0 (blocking directory)
if moveme:
- mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings)
+ hardlink_key = (mymd5, mystat.st_size)
+ hardlink_candidates = self._md5_merge_map.get(hardlink_key)
+ if hardlink_candidates is None:
+ hardlink_candidates = []
+ self._md5_merge_map[hardlink_key] = hardlink_candidates
+ mymtime = movefile(mysrc, mydest, newmtime=thismtime,
+ sstat=mystat, mysettings=self.settings,
+ hardlink_candidates=hardlink_candidates)
if mymtime is None:
return 1
+ hardlink_candidates.append(mydest)
zing = ">>>"
if mymtime != None: