#!/usr/bin/env python """ bcfg2-repo-validate checks all xml files in Bcfg2 repos against their respective XML schemas. """ __revision__ = '$Revision$' import glob import lxml.etree import os import sys import fnmatch import logging import Bcfg2.Options from subprocess import Popen, PIPE, STDOUT def validate(filename, schemafile, schema=None, xinclude=True): """validate a fail against the given lxml.etree.Schema. return True on success, False on failure""" if schema is None: # if no schema object was provided, instantiate one try: schema = lxml.etree.XMLSchema(lxml.etree.parse(schemafile)) except: logging.warn("Failed to process schema %s", schemafile) return False try: datafile = lxml.etree.parse(filename) except SyntaxError: logging.warn("%s ***FAILS*** to parse \t\t<----", filename) lint = Popen(["xmllint", filename], stdout=PIPE, stderr=STDOUT) logging.warn(lint.communicate()[0]) lint.wait() return False except IOError: logging.warn("Failed to open file %s \t\t<---", filename) return False if schema.validate(datafile): logging.info("%s checks out", filename) else: cmd = ["xmllint"] if xinclude: cmd.append("--xinclude") cmd.extend(["--noout", "--schema", schemafile, filename]) lint = Popen(cmd, stdout=PIPE, stderr=STDOUT) output = lint.communicate()[0] if lint.wait(): logging.warn("%s ***FAILS*** to verify \t\t<----", filename) logging.warn(output) return False else: logging.info("%s checks out", filename) return True if __name__ == '__main__': opts = {'repo': Bcfg2.Options.SERVER_REPOSITORY, 'verbose': Bcfg2.Options.VERBOSE, 'configfile': Bcfg2.Options.CFILE, 'schema' : Bcfg2.Options.SCHEMA_PATH, 'stdin': Bcfg2.Options.FILES_ON_STDIN, 'require-schema': Bcfg2.Options.REQUIRE_SCHEMA} setup = Bcfg2.Options.OptionParser(opts) setup.parse(sys.argv[1:]) verbose = setup['verbose'] cpath = setup['configfile'] schemadir = setup['schema'] os.chdir(schemadir) repo = setup['repo'] # set up logging level = logging.WARNING if verbose: level = logging.INFO logging.basicConfig(level=level, format="%(message)s") if setup['stdin']: file_list = map(lambda s: s.strip(), sys.stdin.readlines()) info_list = [f for f in file_list if os.path.basename(f) == 'info.xml'] metadata_list = fnmatch.filter(file_list, "*/Metadata/*.xml") clients_list = fnmatch.filter(file_list, "*/Metadata/clients.xml") bundle_list = fnmatch.filter(file_list, "*/Bundler/*.xml") genshibundle_list = fnmatch.filter(file_list, "*/Bundler/*.genshi") pkg_list = fnmatch.filter(file_list, "*/Pkgmgr/*.xml") base_list = fnmatch.filter(file_list, "*/Base/*.xml") rules_list = fnmatch.filter(file_list, "*/Rules/*.xml") imageinfo_list = fnmatch.filter(file_list, "*/etc/report-configuration.xml") services_list = fnmatch.filter(file_list, "*/Svcmgr/*.xml") deps_list = fnmatch.filter(file_list, "*/Deps/*.xml") dec_list = fnmatch.filter(file_list, "*/Decisions/*") pkgcfg_list = fnmatch.filter(file_list, "*/Packages/config.xml") gp_list = fnmatch.filter(file_list, "*/GroupPatterns/config.xml") props_list = [f for f in fnmatch.filter(file_list, "*/Properties/*.xml") if "%s.xsd" % os.path.splitext(f)[0] in file_list] else: # not reading files from stdin # Get a list of all info.xml files in the bcfg2 repository info_list = [] for infodir in ['Cfg', 'TGenshi', 'TCheetah']: for root, dirs, files in os.walk('%s/%s' % (repo, infodir)): info_list.extend([os.path.join(root, f) for f in files if f == 'info.xml']) # get metadata list metadata_list = glob.glob("%s/Metadata/groups.xml" % repo) # get other file lists clients_list = glob.glob("%s/Metadata/clients.xml" % repo) bundle_list = glob.glob("%s/Bundler/*.xml" % repo) genshibundle_list = glob.glob("%s/Bundler/*.genshi" % repo) pkg_list = glob.glob("%s/Pkgmgr/*.xml" % repo) base_list = glob.glob("%s/Base/*.xml" % repo) rules_list = glob.glob("%s/Rules/*.xml" % repo) imageinfo_list = glob.glob("%s/etc/report-configuration.xml" % repo) services_list = glob.glob("%s/Svcmgr/*.xml" % repo) deps_list = glob.glob("%s/Deps/*.xml" % repo) dec_list = glob.glob("%s/Decisions/*" % repo) pkgcfg_list = glob.glob("%s/Packages/config.xml" % repo) gp_list = glob.glob('%s/GroupPatterns/config.xml' % repo) props_list = glob.glob("%s/Properties/*.xml" % repo) # include files in metadata_list ref_bundles = set() xdata = lxml.etree.parse("%s/Metadata/groups.xml" % repo) included = set([ent.get('href') for ent in xdata.findall('./{http://www.w3.org/2001/XInclude}include')]) while included: try: filename = included.pop() except KeyError: continue metadata_list.append("%s/Metadata/%s" % (repo, filename)) groupdata = lxml.etree.parse("%s/Metadata/%s" % (repo, filename)) group_ents = [ent.get('href') for ent in \ groupdata. findall('./{http://www.w3.org/2001/XInclude}include')] for ent in group_ents: included.add(ent) included.discard(filename) # get all XIncluded bundles xdata.xinclude() for bundle in xdata.findall("//Bundle"): ref_bundles.add("%s/Bundler/%s" % (repo, bundle.get('name'))) # check for multiple default group definitions default_groups = [] for grp in lxml.etree.parse("%s/Metadata/groups.xml" \ % repo).findall('.//Group'): if grp.get('default') == 'true': default_groups.append(grp) if len(default_groups) > 1: logging.warn("*** Warning: Multiple default groups defined") for grp in default_groups: logging.warn(" %s", grp.get('name')) # verify attributes for configuration entries # (as defined in doc/server/configurationentries) # TODO: See if it is possible to do this in the schema instead required_configuration_attrs = { 'device': ['name', 'owner', 'group', 'dev_type'], 'directory': ['name', 'owner', 'group', 'perms'], 'file': ['name', 'owner', 'group', 'perms'], 'hardlink': ['name', 'to'], 'symlink': ['name', 'to'], 'ignore': ['name'], 'nonexistent': ['name'], 'permissions': ['name', 'owner', 'group', 'perms']} for rfile in rules_list: try: xdata = lxml.etree.parse(rfile) except lxml.etree.XMLSyntaxError, e: logging.warn("Failed to parse %s: %s", rfile, e) for posixpath in xdata.findall("//Path"): pathname = posixpath.get('name') pathtype = posixpath.get('type') pathset = set(posixpath.attrib.keys()) try: required_attrs = set(required_configuration_attrs[pathtype] \ + ['type']) except KeyError: continue if 'dev_type' in required_attrs: dev_type = posixpath.get('dev_type') if dev_type in ['block', 'char']: # check if major/minor are specified required_attrs |= set(['major', 'minor']) if pathset.issuperset(required_attrs): continue else: logging.warn("The following required attributes are missing for" " Path %s in %s: %s", pathname, rfile, [attr for attr in required_attrs.difference(pathset)]) # warn on duplicate Pkgmgr entries with the same priority pset = set() for plist in pkg_list: try: xdata = lxml.etree.parse(plist) except lxml.etree.XMLSyntaxError, e: logging.warn("Failed to parse %s: %s", plist, e) # get priority, type, group priority = xdata.getroot().get('priority') ptype = xdata.getroot().get('type') for pkg in xdata.findall("//Package"): if pkg.getparent().tag == 'Group': grp = pkg.getparent().get('name') if type(grp) is not str and grp.getparent().tag == 'Group': pgrp = grp.getparent().get('name') else: pgrp = 'none' else: grp = 'none' pgrp = 'none' ptuple = (pkg.get('name'), priority, ptype, grp, pgrp) # check if package is already listed with same priority, # type, grp if ptuple in pset: logging.warn("Duplicate Package %s, priority:%s, type:%s", pkg.get('name'), priority, ptype) else: pset.add(ptuple) filesets = {"%s/metadata.xsd": metadata_list, "%s/clients.xsd": clients_list, "%s/info.xsd": info_list, "%s/bundle.xsd": bundle_list + genshibundle_list, "%s/pkglist.xsd": pkg_list, "%s/base.xsd": base_list, "%s/rules.xsd": rules_list, "%s/report-configuration.xsd": imageinfo_list, "%s/services.xsd": services_list, "%s/deps.xsd": deps_list, "%s/decisions.xsd": dec_list, "%s/packages.xsd": pkgcfg_list, "%s/grouppatterns.xsd": gp_list} failures = 0 for schemaname, filelist in list(filesets.items()): if filelist: # avoid loading schemas for empty file lists try: schema = lxml.etree.XMLSchema(lxml.etree.parse(schemaname % schemadir)) except: logging.warn("Failed to process schema %s", schemaname % schemadir) failures = 1 continue for filename in filelist: if not validate(filename, schemaname % schemadir, schema=schema, xinclude=not setup['stdin']): failures = 1 # check Properties files against their schemas for filename in props_list: logging.info("checking %s" % filename) schemafile = "%s.xsd" % os.path.splitext(filename)[0] if os.path.exists(schemafile): if not validate(filename, schemafile, xinclude=not setup['stdin']): failures = 1 elif setup['require-schema']: logging.warn("No schema found for %s", filename) failures = 1 # print out missing bundle information logging.info("") if not setup['stdin']: # if we've taken a list of files on stdin, there's an # excellent chance that referenced bundles do not exist, so # skip this check for bundle in ref_bundles: # check for both regular and genshi bundles xmlbundle = "%s.xml" % bundle genshibundle = "%s.genshi" % bundle allbundles = bundle_list + genshibundle_list if xmlbundle not in allbundles and genshibundle not in allbundles: logging.info("*** Warning: Bundle %s referenced, but does not " "exist.", bundle) # verify bundle name attribute matches filename for bundle in (bundle_list + genshibundle_list): fname = bundle.split('Bundler/')[1].split('.')[0] xdata = lxml.etree.parse(bundle) bname = xdata.getroot().get('name') if fname != bname: logging.warn("The following names are inconsistent:") logging.warn(" Filename is %s", fname) logging.warn(" Bundle name found in %s is %s", fname, bname) raise SystemExit(failures)