From be8e36ce968a62ddda8619b2c17a614bf5868612 Mon Sep 17 00:00:00 2001 From: Narayan Desai Date: Thu, 18 Jun 2009 18:16:13 +0000 Subject: Packages: Improve YumSource performance dramatically Track needed file paths so that the contents of all packages do not need to be tracked. This reduces memory consumption ~60% on my test repository (360MB -> 60MB for bcfg2-info). woo! git-svn-id: https://svn.mcs.anl.gov/repos/bcfg/trunk/bcfg2@5283 ce84e21b-d406-0410-9b95-82705330c041 --- src/lib/Server/Plugins/Packages.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/lib/Server/Plugins/Packages.py b/src/lib/Server/Plugins/Packages.py index d8e6d2004..b4bc01d2c 100644 --- a/src/lib/Server/Plugins/Packages.py +++ b/src/lib/Server/Plugins/Packages.py @@ -163,6 +163,7 @@ class YUMSource(Source): self.deps = dict([('global', dict())]) self.provides = dict([('global', dict())]) self.filemap = dict([(x, dict()) for x in ['global'] + self.arches]) + self.needed_paths = set() def save_state(self): cache = file(self.cachefile, 'wb') @@ -195,13 +196,16 @@ class YUMSource(Source): urls = property(get_urls) def read_files(self): - for fname in self.files: + for fname in [f for f in self.files if f.endswith('primary.xml.gz')]: + print fname + farch = fname.split('@')[-3] + fdata = lxml.etree.parse(fname).getroot() + self.parse_primary(fdata, farch) + for fname in [f for f in self.files if f.endswith('filelists.xml.gz')]: + print fname farch = fname.split('@')[-3] fdata = lxml.etree.parse(fname).getroot() - if fname.endswith('primary.xml.gz'): - self.parse_primary(fdata, farch) - elif fname.endswith('filelists.xml.gz'): - self.parse_filelist(fdata, farch) + self.parse_filelist(fdata, farch) # merge data sdata = self.packages.values() self.packages['global'] = copy.deepcopy(sdata.pop()) @@ -216,7 +220,8 @@ class YUMSource(Source): def parse_filelist(self, data, arch): for pkg in data.findall(self.fl + 'package'): for fentry in pkg.findall(self.fl + 'file'): - self.filemap[arch][fentry.text] = pkg.get('name') + if fentry in self.needed_paths: + self.filemap[arch][fentry.text] = pkg.get('name') def parse_primary(self, data, arch): if arch not in self.packages: @@ -236,6 +241,8 @@ class YUMSource(Source): self.deps[arch][pkgname] = set() for entry in pre.getchildren(): self.deps[arch][pkgname].add(entry.get('name')) + if entry.get('name').startswith('/'): + self.needed_paths.add(entry.get('name')) pro = pdata.find(self.rp + 'provides') if pro != None: for entry in pro.getchildren(): -- cgit v1.2.3-1-g7c22