#!/usr/bin/python """Program to generate a bcfg2 Pkgmgr configuration file from a list of directories that contain RPMS. All versions or only the latest may be included in the output. rpm.labelCompare is used to compare the package versions, so that a proper rpm version comparison is done (epoch:version-release). The output file may be formated for use with the RPM or Yum bcfg2 client drivers. The output can also contain the PackageList and nested group headers. """ import collections import datetime import glob import gzip import optparse import os import rpm import sys from lxml.etree import parse import xml.sax from xml.sax.handler import ContentHandler # Compatibility imports from Bcfg2.Compat import urljoin def info(object, spacing=10, collapse=1): """Print methods and doc strings. Takes module, class, list, dictionary, or string. """ methodList = [method for method in dir(object) if isinstance(getattr(object, method), collections.Callable)] processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s) print("\n".join(["%s %s" % (method.ljust(spacing), processFunc(str(getattr(object, method).__doc__))) for method in methodList])) def readRpmHeader(ts, filename): """ Read an rpm header from an RPM file. """ try: fd = os.open(filename, os.O_RDONLY) except: print("Failed to open RPM file %s" % filename) h = ts.hdrFromFdno(fd) os.close(fd) return h def sortedDictValues(adict): """ Sort a dictionary by its keys and return the items in sorted key order. """ keys = list(adict.keys()) keys.sort() return list(map(adict.get, keys)) def cmpRpmHeader(a, b): """ cmp() implemetation suitable for use with sort. a and b are dictionaries as created by loadRpms(). Comparison is made by package name and then by the full rpm version (epoch, version, release). rpm.labelCompare is used for the version part of the comparison. """ n1 = str(a['name']) e1 = str(a['epoch']) v1 = str(a['version']) r1 = str(a['release']) n2 = str(b['name']) e2 = str(b['epoch']) v2 = str(b['version']) r2 = str(b['release']) ret = cmp(n1, n2) if ret == 0: ret = rpm.labelCompare((e1, v1, r1), (e2, v2, r2)) return ret def loadRpms(dirs): """ dirs is a list of directories to search for rpms. Builds a dictionary keyed by the package name. Dictionary item is a list, one entry per package instance found. The list entries are dictionaries. Keys are 'filename', 'mtime' 'name', 'arch', 'epoch', 'version' and 'release'. e.g. packages = { 'bcfg2' : [ {'filename':'bcfg2-0.9.2-0.0rc1.noarch.rpm', 'mtime':'' 'name':"bcfg2', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc1'} {'filename':'bcfg2-0.9.2-0.0rc5.noarch.rpm', 'mtime':'' 'name':"bcfg2', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc5'}], 'bcfg2-server' : [ {'filename':'bcfg2-server-0.9.2-0.0rc1.noarch.rpm', 'mtime':'' 'name':"bcfg2-server', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc1'} {'filename':'bcfg2-server-0.9.2-0.0rc5.noarch.rpm', 'mtime':'' 'name':"bcfg2-server', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc5'}], } """ packages = {} ts = rpm.TransactionSet() vsflags = 0 vsflags |= rpm._RPMVSF_NODIGESTS vsflags |= rpm._RPMVSF_NOSIGNATURES ovsflags = ts.setVSFlags(vsflags) for dir in dirs: if options.verbose: print("Scanning directory: %s" % dir) for file in [files for files in os.listdir(dir) if files.endswith('.rpm')]: filename = os.path.join(dir, file) # Get the mtime of the RPM file. file_mtime = datetime.date.fromtimestamp(os.stat(filename).st_mtime) # Get the RPM header header = readRpmHeader(ts, filename) # Get what we are interesting in out of the header. name = header[rpm.RPMTAG_NAME] epoch = header[rpm.RPMTAG_EPOCH] version = header[rpm.RPMTAG_VERSION] release = header[rpm.RPMTAG_RELEASE] subarch = header[rpm.RPMTAG_ARCH] # Only load RPMs with subarchitectures as calculated from the --archs option. if subarch in subarchs or 'all' in subarchs: # Store what we want in our structure. packages.setdefault(name, []).append({'filename': file, 'mtime': file_mtime, 'name': name, 'arch': subarch, 'epoch': epoch, 'version': version, 'release': release}) # Print '.' for each package. stdio is line buffered, so have to flush it. if options.verbose: sys.stdout.write('.') sys.stdout.flush() if options.verbose: sys.stdout.write('\n') return packages class pkgmgr_URLopener(urllib.FancyURLopener): """ Override default error handling so that we can see what the errors are. """ def http_error_default(self, url, fp, errcode, errmsg, headers): """ Override default error handling so that we can see what the errors are. """ print("ERROR %s: Unable to retrieve %s" % (errcode, url)) class PrimaryParser(ContentHandler): def __init__(self, packages): self.inPackage = 0 self.inName = 0 self.inArch = 0 self.packages = packages def startElement(self, name, attrs): if name == "package": self.package = {'file': None, 'name': '', 'subarch': '', 'epoch': None, 'version': None, 'release': None} self.inPackage = 1 elif self.inPackage: if name == "name": self.inName = 1 elif name == "arch": self.inArch = 1 elif name == "version": self.package['epoch'] = attrs.getValue('epoch') self.package['version'] = attrs.getValue('ver') self.package['release'] = attrs.getValue('rel') elif name == "location": self.package['file'] = attrs.getValue('href') def endElement(self, name): if name == "package": self.inPackage = 0 # Only load RPMs with subarchitectures as calculated from the --archs option. if self.package['subarch'] in subarchs or 'all' in subarchs: self.packages.setdefault(self.package['name'], []).append( {'filename': self.package['file'], 'name': self.package['name'], 'arch': self.package['subarch'], 'epoch': self.package['epoch'], 'version': self.package['version'], 'release': self.package['release']}) # Print '.' for each package. stdio is line buffered, so have to flush it. if options.verbose: sys.stdout.write('.') sys.stdout.flush() elif self.inPackage: if name == "name": self.inName = 0 elif name == "arch": self.inArch = 0 def characters(self, content): if self.inPackage: if self.inName: self.package['name'] += content if self.inArch: self.package['subarch'] += content def loadRepos(repolist): ''' repolist is a list of urls to yum repositories. Builds a dictionary keyed by the package name. Dictionary item is a list, one entry per package instance found. The list entries are dictionaries. Keys are 'filename', 'mtime' 'name', 'arch', 'epoch', 'version' and 'release'. e.g. packages = { 'bcfg2' : [ {'filename':'bcfg2-0.9.2-0.0rc1.noarch.rpm', 'mtime':'' 'name':"bcfg2', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc1'} {'filename':'bcfg2-0.9.2-0.0rc5.noarch.rpm', 'mtime':'' 'name':"bcfg2', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc5'}], 'bcfg2-server' : [ {'filename':'bcfg2-server-0.9.2-0.0rc1.noarch.rpm', 'mtime':'' 'name':"bcfg2-server', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc1'} {'filename':'bcfg2-server-0.9.2-0.0rc5.noarch.rpm', 'mtime':'' 'name':"bcfg2-server', ''arch':'noarch', 'epoch':None, 'version':'0.9.2', 'release':'0.0rc5'}], } ''' packages = {} for repo in repolist: url = urljoin(repo, './repodata/repomd.xml') if options.verbose: print("Loading repo metadata : %s" % url) try: opener = pkgmgr_URLopener() file, message = opener.retrieve(url) except: sys.exit() try: tree = parse(file) except IOError: print("ERROR: Unable to parse retrieved repomd.xml.") sys.exit() repomd = tree.getroot() for element in repomd: if element.tag.endswith('data') and element.get('type') == 'primary': for property in element: if property.tag.endswith('location'): primaryhref = property.get('href') url = urljoin(repo, './' + primaryhref) if options.verbose: print("Loading : %s" % url) try: opener = pkgmgr_URLopener() file, message = opener.retrieve(url) except: sys.exit() try: repo_file = gzip.open(file) except IOError: print("ERROR: Unable to parse retrieved file.") sys.exit() parser = xml.sax.make_parser() parser.setContentHandler(PrimaryParser(packages)) parser.parse(repo_file) if options.verbose: sys.stdout.write('\n') repo_file.close() return packages def printInstance(instance, group_count): """ Print the details for a package instance with the appropriate indentation and in the specified format (rpm or yum). """ group_count = group_count + 1 name = instance['name'] epoch = instance['epoch'] version = instance['version'] release = instance['release'] arch = instance['arch'] output_line = '' if options.format == 'rpm': output_line = '%s\n' % (version, release, arch) output.write(output_line) def printPackage(entry, group_count): """ Print the details of a package with the appropriate indentation. Only the specified (all or latest) release(s) is printed. entry is a single package entry as created in loadRpms(). """ output.write('%s\n' \ % (group_count * indent, entry[0]['name'], options.format)) subarch_dict = {} arch_dict = {} # Split instances of this package into subarchitectures. for instance in entry: if instance['arch'] == 'src': continue if instance['arch'] in subarch_dict: subarch_dict[instance['arch']].append(instance) else: subarch_dict[instance['arch']] = [instance] # Keep track of the subarchitectures we have found in each architecture. if subarch_mapping[instance['arch']] in arch_dict: if instance['arch'] not in arch_dict[subarch_mapping[instance['arch']]]: arch_dict[subarch_mapping[instance['arch']]].append(instance['arch']) else: arch_dict[subarch_mapping[instance['arch']]] = [instance['arch']] # Only keep the 'highest' subarchitecture in each architecture. for arch in list(arch_dict.keys()): if len(arch_dict[arch]) > 1: arch_dict[arch].sort() for s in arch_dict[arch][:-1]: del subarch_dict[s] # Sort packages within each architecture into version order for arch in subarch_dict: subarch_dict[arch].sort(cmpRpmHeader) if options.release == 'all': # Output all instances for header in subarch_dict[arch]: printInstance(header, group_count) else: # Output the latest printInstance(subarch_dict[arch][-1], group_count) output.write('%s\n' % (group_count * indent)) def main(): if options.verbose: print("Loading package headers") if options.rpmdirs: package_dict = loadRpms(search_dirs) elif options.yumrepos: package_dict = loadRepos(repos) if options.verbose: print("Processing package headers") if options.pkgmgrhdr: if options.format == "rpm": output.write("\n" % (options.uri, options.priority)) else: output.write("\n" % (options.priority)) group_count = 1 if groups_list: for group in groups_list: output.write("%s\n" % (indent * group_count, group)) group_count = group_count + 1 # Process packages in name order for package_entry in sortedDictValues(package_dict): printPackage(package_entry, group_count) if groups_list: group_count = group_count - 1 while group_count: output.write('%s\n' % (indent * group_count)) group_count = group_count - 1 if options.pkgmgrhdr: output.write('\n') if options.verbose: print("%i package instances were processed" % len(package_dict)) if __name__ == "__main__": p = optparse.OptionParser() p.add_option('--archs', '-a', action='store', \ default='all', \ type='string', \ help='''Comma separated list of subarchitectures to include. The highest subarichitecture required in an architecture group should specified. Lower subarchitecture packages will be loaded if that is all that is available. e.g. The higher of i386, i486 and i586 packages will be loaded if -a i586 is specified. (Default: all). ''') p.add_option('--rpmdirs', '-d', action='store', type='string', \ help='''Comma separated list of directories to scan for RPMS. Wilcards are permitted. ''') p.add_option('--enddate', '-e', action='store', \ type='string', \ help='End date for RPM file selection.') p.add_option('--format', '-f', action='store', \ default='yum', \ type='choice', \ choices=('yum', 'rpm'), \ help='''Format of the Output. Choices are yum or rpm. (Default: yum) ''') p.add_option('--groups', '-g', action='store', \ type='string', \ help='''List of comma separated groups to nest Package entities in. ''') p.add_option('--indent', '-i', action='store', \ default=4, \ type='int', \ help='''Number of leading spaces to indent nested entries in the output. (Default:4) ''') p.add_option('--outfile', '-o', action='store', \ type='string', \ help='Output file name.') p.add_option('--pkgmgrhdr', '-P', action='store_true', \ help='Include PackageList header in output.') p.add_option('--priority', '-p', action='store', \ default=0, \ type='int', \ help='''Value to set priority attribute in the PackageList Tag. (Default: 0) ''') p.add_option('--release', '-r', action='store', \ default='latest', \ type='choice', \ choices=('all', 'latest'), \ help='''Which releases to include in the output. Choices are all or latest. (Default: latest).''') p.add_option('--startdate', '-s', action='store', \ type='string', \ help='Start date for RPM file selection.') p.add_option('--uri', '-u', action='store', \ type='string', \ help='URI for PackageList header required for RPM format ouput.') p.add_option('--verbose', '-v', action='store_true', \ help='Enable verbose output.') p.add_option('--yumrepos', '-y', action='store', type='string', \ help='''Comma separated list of YUM repository URLs to load. NOTE: Each URL must end in a '/' character.''') options, arguments = p.parse_args() if options.pkgmgrhdr and options.format == 'rpm' and not options.uri: print("Option --uri must be specified to produce a PackageList Tag " "for rpm formatted files.") sys.exit(1) if not options.rpmdirs and not options.yumrepos: print("One of --rpmdirs and --yumrepos must be specified") sys.exit(1) # Set up list of directories to search if options.rpmdirs: search_dirs = [] for d in options.rpmdirs.split(','): search_dirs += glob.glob(d) if options.verbose: print("The following directories will be scanned:") for d in search_dirs: print(" %s" % d) # Setup list of repos if options.yumrepos: repos = [] for r in options.yumrepos.split(','): repos.append(r) if options.verbose: print("The following repositories will be scanned:") for d in repos: print(" %s" % d) # Set up list of architectures to include and some mappings # to use later. arch_mapping = {'x86': ['i686', 'i586', 'i486', 'i386', 'athlon'], 'x86_64': ['x86_64'], 'ia64': ['ia64'], 'ppc': ['ppc'], 'ppc64': ['ppc64'], 'sparc': ['sparc'], 'noarch': ['noarch']} subarch_mapping = {'i686': 'x86', 'i586': 'x86', 'i486': 'x86', 'i386': 'x86', 'athlon': 'x86', 'x86_64': 'x86_64', 'ia64': 'ia64', 'ppc': 'ppc', 'ppc64': 'ppc64', 'sparc': 'sparc', 'noarch': 'noarch'} commandline_subarchs = options.archs.split(',') arch_list = [] subarchs = [] if 'all' in commandline_subarchs: subarchs.append('all') else: for s in commandline_subarchs: if s not in subarch_mapping: print("Error: Invalid subarchitecture specified: ", s) sys.exit(1) # Only allow one subarchitecture per architecture to be specified. if s not in arch_list: arch_list.append(s) # Add subarchitectures lower than the one specified to the list. # e.g. If i486 is specified this will add i386 to the list of # subarchitectures to load. i = arch_mapping[subarch_mapping[s]].index(s) #if i != len(arch_mapping[subarch_mapping[s]]): subarchs += arch_mapping[subarch_mapping[s]][i:] else: print("Error: Multiple subarchitecutes of the same " "architecture specified.") sys.exit(1) indent = ' ' * options.indent if options.groups: groups_list = options.groups.split(',') else: groups_list = None if options.outfile: output = file(options.outfile, "w") else: output = sys.stdout main()