From 9d6e6241954d001a5b49e4ea9a48c10e2a792958 Mon Sep 17 00:00:00 2001 From: "Chris St. Pierre" Date: Mon, 10 Dec 2012 17:19:54 -0600 Subject: generate XML schema docs from XML schemas themselves --- doc/exts/xmlschema.py | 810 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 810 insertions(+) create mode 100644 doc/exts/xmlschema.py (limited to 'doc/exts/xmlschema.py') diff --git a/doc/exts/xmlschema.py b/doc/exts/xmlschema.py new file mode 100644 index 000000000..fc5788107 --- /dev/null +++ b/doc/exts/xmlschema.py @@ -0,0 +1,810 @@ +""" Sphinx extension to generate documention from XML schemas. Known +to be woefully imcomplete, probably buggy, terrible error handling, +but it *works* for the subset of XML schema we use in Bcfg2. + +Provides the following directives: + +* ``.. xml:schema:: ``: Document an XML schema +* ``.. xml:type:: ``: Document a complexType or simpleType +* ``.. xml:group:: ``: Document an element group +* ``.. xml:attributegroup:: ``: Document an attributeGroup +* ``.. xml:element:: ``: Document an XML element + +Provides the following roles to link to the objects documented above: + +* ``:xml:schema:````: Link to an XML schema +* ``:xml:type:````: Link to a complexType or simpleType +* ``:xml:group:````: Link to an element group +* ``:xml:attributegroup:````: Link to an attributeGroup +* ``:xml:element:````: Link to an element +* ``:xml:attribute:`:```: Link to the attribute in the + given context. The context is the name of the containing object, + e.g., the parent attributeGroup, element, or complexType. +* ``:xml:datatype:````: Link to a built-in XML data type. + +Note that the entity being linked to does not need to have been +explicitly documented with a directive; e.g., if you document a schema +that contains a complexType, you can link to that type without having +used the ``xml:type::`` directive. + +Note also that it's far more reliable to link to a complexType than an +element, since element name collisions are fairly common. You should +avoid type name collisions whenever possible to maximize usability of +this extension. + +There are two configuration items that may be added to conf.py: + +* ``xmlschema_path`` gives the base path to all XML schemas. +* ``xmlschema_datatype_url`` gives a string pattern that will be used + to generate links to built-in XML types. It must contain a single + ``%s``, which will be replaced by the name of the type. +""" + +import os +import operator +import lxml.etree +from docutils import nodes +from sphinx import addnodes, roles +from docutils.statemachine import ViewList +from docutils.parsers.rst import directives +from sphinx.util.nodes import make_refnode, split_explicit_title, \ + nested_parse_with_titles +from sphinx.util.compat import Directive +from sphinx.domains import ObjType, Domain + +XS = "http://www.w3.org/2001/XMLSchema" +XS_NS = "{%s}" % XS +NSMAP = dict(xs=XS) + + +def comma_split(opt): + return opt.split(",") + + +def flag_or_split(opt): + try: + return opt.split(",") + except AttributeError: + return True + + +class _XMLDirective(Directive): + """ Superclass for the other XML schema directives. """ + required_arguments = 1 + option_spec = dict(namespace=directives.unchanged, + nochildren=directives.flag, + noattributegroups=directives.flag, + nodoc=directives.flag, + notext=directives.flag, + onlyattrs=comma_split, + requiredattrs=comma_split, + linktotype=flag_or_split, + noautodep=flag_or_split, + inlinetypes=comma_split) + types = [] + + def run(self): + name = self.arguments[0] + env = self.state.document.settings.env + ns_name = self.options.get('namespace') + try: + ns_uri = env.xmlschema_namespaces[ns_name] + except KeyError: + # URI given as namespace + ns_uri = ns_name + etype = None + for etype in self.types: + try: + entity = env.xmlschema_entities[ns_uri][etype][name] + break + except KeyError: + pass + else: + env.app.error("No XML %s %s found" % + (" or ".join(self.types), name)) + documentor = XMLDocumentor(entity, env, self.state, name=name, + ns_uri=ns_uri, + include=self.process_include(), + options=self.process_options()) + return documentor.document() + + def process_include(self): + return dict(children='nochildren' not in self.options, + attributegroups='noattributegroups' not in self.options, + doc='nodoc' not in self.options, + text='notext' not in self.options) + + def process_options(self): + return dict(onlyattrs=self.options.get('onlyattrs'), + requiredattrs=self.options.get('requiredattrs', []), + linktotype=self.options.get('linktotype', []), + noautodep=self.options.get('noautodep', False), + inlinetypes=self.options.get('inlinetypes', [])) + + +def XMLDirective(types): + class cls(_XMLDirective): + pass + + cls.__name__ = 'XML%sDirective' % types[0] + cls.types = types + return cls + + +class XMLDocumentor(object): + def __init__(self, entity, environment, state, name=None, ns_uri=None, + parent=None, include=None, options=None): + self.entity = entity + self.env = environment + self.entities = self.env.xmlschema_entities + self.namespaces = self.env.xmlschema_namespaces + self.namespaces_by_uri = self.env.xmlschema_namespaces_by_uri + self.state = state + self.include = include + self.options = options + self.app = self.env.app + + if name is None: + self.ns_uri = ns_uri + self.fqname = self.entity.get("name") + self.ns_name, self.name = self.split_ns(self.fqname) + if self.ns_uri is None and self.ns_name is not None: + self.ns_uri = self.namespaces[self.ns_name] + else: + self.ns_uri = ns_uri + self.ns_name = self.namespaces_by_uri[self.ns_uri] + self.name = name + if self.ns_name: + self.fqname = "%s:%s" % (self.ns_name, self.name) + else: + self.fqname = name + self.tname = nodes.strong(self.fqname, self.fqname) + self.tag = self.entity.tag[len(XS_NS):] + self.type = tag2type(self.tag) + self.parent = parent + if self.parent is None: + self.dependencies = [] + self.documented = [] + else: + self.dependencies = self.parent.dependencies + self.documented = self.parent.documented + + def document(self): + eid = (self.tag, self.fqname) + if eid in self.documented: + return [build_paragraph(get_xref(self.tag, eid[1]))] + else: + self.documented.append(eid) + + rv = [self.target_node(self.tag, self.ns_name, self.name)] + + data = addnodes.desc(objtype=self.tag) + targetid = get_target_id(self.tag, self.ns_name, self.name) + header = addnodes.desc_signature('', '', + first=True, + ids=[targetid]) + + if self.include['doc']: + header.extend([nodes.emphasis(self.tag, self.tag), + text(" "), self.tname]) + data.append(header) + contents = nodes.definition() + if self.include['doc']: + contents.append(self.get_doc(self.entity)) + contents.extend(getattr(self, "document_%s" % self.tag)()) + data.append(contents) + rv.append(data) + + if self.parent is None: + # avoid adding duplicate dependencies + added = [(self.type, self.name)] + for typ, name, entity in self.dependencies: + if not name: + name = entity.get('name') + if (typ, name) in added: + continue + ns_name, name = self.split_ns(name) + ns_uri = self.namespaces[ns_name] + if not entity: + try: + entity = self.entities[ns_uri][typ][name] + except KeyError: + self.app.warn("Dependency %s not found in schemas" % + get_target_id(typ, ns_name, name)) + continue + doc = self.get_documentor(entity, name=name, ns_uri=ns_uri) + rv.extend(doc.document()) + added.append((typ, name)) + return rv + + def document_schema(self): + element = self.entity.find("xs:element", namespaces=NSMAP) + if element is not None: + ns, name = self.split_ns(element.get("name")) + doc = self.get_documentor(element, name=name, + ns_uri=self.namespaces[ns]) + return doc.document() + else: + # no top-level element or group -- just a list of + # (abstract) complexTypes? + rv = [] + for ctype in self.entity.findall("xs:complexType", + namespaces=NSMAP): + ns, name = self.split_ns(ctype.get("name")) + doc = self.get_documentor(ctype, name=name, + ns_uri=self.namespaces[ns]) + rv.extend(doc.document()) + return rv + + def document_group(self): + rv = nodes.definition_list() + try: + (children, groups) = \ + self.get_child_elements(self.entity, nodeclass=nodes.paragraph) + except TypeError: + return [build_paragraph(nodes.strong("Any", "Any"), + " arbitrary element allowed")] + + append_node(rv, nodes.term, text("Elements:")) + append_node(rv, nodes.definition, *children) + if len(groups): + append_node(rv, nodes.term, text("Element groups:")) + append_node(rv, nodes.definition, *groups) + return rv + + def document_element(self): + fqtype = self.entity.get("type") + if fqtype: + (etype_ns, etype) = self.split_ns(fqtype) + ns_uri = self.get_namespace_uri(etype_ns) + values = self.get_values_from_type() + if values != "Any": + return [build_paragraph( + self.tname, + " takes only text content, which may be the ", + "following values: ", + values)] + elif etype in self.entities[ns_uri]["complexType"]: + if ((self.options['linktotype'] is True or + self.name in self.options['linktotype'] or + etype in self.options['linktotype'] or + fqtype in self.options['linktotype']) and + self.name not in self.options['inlinetypes'] and + etype not in self.options['inlinetypes']): + self.add_dep('complexType', fqtype, None) + return [build_paragraph("Type: ", + get_xref("type", fqtype))] + + typespec = self.entities[ns_uri]["complexType"][etype] + doc = self.get_documentor(typespec, + name=self.entity.get("name")) + rv = [self.target_node("complexType", etype_ns, etype)] + if self.include['doc'] and not self.get_doc(self.entity): + rv.append(self.get_doc(typespec)) + rv.extend(doc.document_complexType()) + return rv + else: + self.app.error("Unknown element type %s" % fqtype) + else: + rv = [] + typespec = self.entity.find("xs:complexType", namespaces=NSMAP) + if self.include['doc'] and not self.get_doc(self.entity): + rv.append(self.get_doc(typespec)) + if typespec is not None: + rv = [self.target_node("complexType", self.ns_name, self.name)] + doc = self.get_documentor(typespec) + rv.extend(doc.document_complexType()) + return rv + + def document_complexType(self): + rv = nodes.definition_list() + + ##### ATTRIBUTES ##### + table, tbody = self.get_attr_table() + attrs = self.get_attrs(self.entity) + if attrs: + tbody.extend(attrs) + + foreign_attr_groups = nodes.bullet_list() + for agroup in self.entity.xpath("xs:attributeGroup", + namespaces=NSMAP): + # if the attribute group is in another namespace, just + # link to it + ns, name = self.split_ns(agroup.get('ref')) + if ns != self.ns_name: + append_node( + foreign_attr_groups, + nodes.list_item, + build_paragraph(get_xref(tag2type("attributeGroup"), + ":".join([ns, name])))) + else: + tbody.extend(self.get_attrs( + self.entities['attributeGroup'][name])) + + if len(tbody): + append_node(rv, nodes.term, text("Attributes:")) + append_node(rv, nodes.definition, table) + if self.include['attributegroups'] and len(foreign_attr_groups): + append_node(rv, nodes.term, text("Attribute groups:")) + append_node(rv, nodes.definition, foreign_attr_groups) + + ##### ELEMENTS ##### + if self.include['children']: + # todo: distinguish between elements that may occur and + # elements that must occur + try: + (children, groups) = self.get_child_elements(self.entity) + except TypeError: + children = None + groups = None + rv.append(build_paragraph(nodes.strong("Any", "Any"), + " arbitrary child elements allowed")) + if children: + append_node(rv, nodes.term, text("Child elements:")) + append_node(rv, nodes.definition, + build_node(nodes.bullet_list, *children)) + + if groups: + append_node(rv, nodes.term, text("Element groups:")) + append_node(rv, nodes.definition, *groups) + + ##### TEXT CONTENT ##### + if (self.include['text'] and + self.entity.get("mixed", "false").lower() == "true"): + append_node(rv, nodes.term, text("Text content:")) + append_node(rv, nodes.definition, + build_paragraph(self.get_values_from_simpletype())) + return [rv] + + def document_attributeGroup(self): + attrs = self.get_attrs(self.entity) + if attrs: + table, tbody = self.get_attr_table() + tbody.extend(attrs) + return [table] + else: + return [] + + def get_attr_table(self): + atable = nodes.table() + atgroup = build_node(nodes.tgroup('', cols=5), + nodes.colspec(colwidth=10), + nodes.colspec(colwidth=50), + nodes.colspec(colwidth=20), + nodes.colspec(colwidth=10), + nodes.colspec(colwidth=10), + nodes.thead('', + build_table_row("Name", "Description", + "Values", "Required", + "Default"))) + atable.append(atgroup) + atable_body = nodes.tbody() + atgroup.append(atable_body) + return (atable, atable_body) + + def get_child_elements(self, el, nodeclass=None): + """ returns a tuple of (child element nodes, element group + nodes). HOWEVER, if _any_ child is allowed, returns True. """ + children = [] + groups = [] + if nodeclass is None: + nodeclass = nodes.list_item + + if el.find("xs:any", namespaces=NSMAP) is not None: + return True + + for child in el.findall("xs:element", namespaces=NSMAP): + node = nodeclass() + if child.get('ref'): + node.append(build_paragraph(get_xref('element', + child.get('ref')))) + else: + # child element given inline + doc = self.get_documentor(child, name=child.get('name')) + node.extend(doc.document()) + children.append(node) + + for group in el.findall("xs:group", namespaces=NSMAP): + if group.get('ref'): + name = group.get('ref') + node = nodeclass() + node.append(build_paragraph(get_xref('group', name))) + self.add_dep('group', name, None) + groups.append(node) + else: + rv = self.get_child_elements(group, nodeclass=nodeclass) + try: + children.extend(rv[0]) + groups.extend(rv[1]) + except TypeError: + return rv + + for container in el.xpath("xs:all|xs:choice|xs:sequence", + namespaces=NSMAP): + rv = self.get_child_elements(container, nodeclass=nodeclass) + try: + children.extend(rv[0]) + groups.extend(rv[1]) + except TypeError: + return rv + return (children, groups) + + def get_documentor(self, entity, name=None, ns_uri=None): + if name is None: + name = self.name + if ns_uri is None: + ns_uri = self.ns_uri + return XMLDocumentor(entity, self.env, self.state, name=name, + ns_uri=ns_uri, parent=self, options=self.options, + include=self.include) + + def get_attrs(self, el, context=None): + cnode = el + while context is None and cnode is not None: + context = cnode.get('name') + cnode = cnode.getparent() + + rows = [] + for attr in el.xpath("xs:attribute[@name]", namespaces=NSMAP): + name = attr.get("name") + if self.ns_name: + fqname = "%s:%s" % (self.ns_name, name) + else: + fqname = name + if (self.options['onlyattrs'] and + name not in self.options['onlyattrs'] and + fqname not in self.options['onlyattrs']): + continue + tag = attr.tag[len(XS_NS):] + row = [build_paragraph(self.target_node(tag, self.ns_name, context, + name), + nodes.literal(fqname, fqname))] + row.append(self.get_doc(attr)) + if attr.get("type") is not None: + row.append(build_paragraph( + self.get_values_from_type(entity=attr))) + else: + atype = attr.find("xs:simpleType", namespaces=NSMAP) + if atype: + row.append(self.get_values_from_simpletype(atype)) + else: + # todo: warn about no type found + pass + reqd = 0 + if (name in self.options['requiredattrs'] or + attr.get("use", "optional") == "required"): + row.append("Yes") + reqd = 1 + else: + row.append("No") + default = attr.get("default") + if default is None: + row.append("None") + else: + row.append(nodes.literal(default, default)) + # we record name and required separately to make sorting + # easier + rows.append((name, reqd, build_table_row(*row))) + rows.sort(key=operator.itemgetter(0)) + rows.sort(key=operator.itemgetter(1), reverse=True) + if not self.options['onlyattrs'] or '*' in self.options['onlyattrs']: + anyattr = el.find("xs:anyAttribute", namespaces=NSMAP) + if anyattr is not None: + rows.append((None, None, + build_table_row('*', self.get_doc(anyattr), + "Any", "No", "None"))) + return [r[2] for r in rows] + + def get_values_from_type(self, entity=None, typeattr='type'): + if entity is None: + entity = self.entity + ns_name, name = self.split_ns(entity.get(typeattr)) + ns_uri = self.get_namespace_uri(ns_name, entity=entity) + if ns_uri == XS: + return self.get_builtin_type(name) + elif name in self.entities[ns_uri]['simpleType']: + return self.get_values_from_simpletype( + self.entities[ns_uri]['simpleType'][name]) + else: + return "Any" + + def get_builtin_type(self, vtype): + if vtype == "boolean": + return get_value_list(["true", "false"]) + else: + return get_datatype_ref(vtype, vtype, + self.app.config.xmlschema_datatype_url) + + def get_doc(self, el): + try: + return self.parse(el.xpath("xs:annotation/xs:documentation", + namespaces=NSMAP)[0].text) + except IndexError: + return build_paragraph('') + + def parse(self, text): + node = nodes.paragraph() + vl = ViewList() + for line in text.splitlines(): + vl.append(line, '') + nested_parse_with_titles(self.state, vl, node) + try: + return node[0] + except IndexError: + return build_paragraph(text) + + def split_ns(self, name): + try: + (ns, name) = name.split(":") + except ValueError: + ns = self.ns_name + return (ns, name) + + def get_values_from_simpletype(self, entity=None): + if entity is None: + entity = self.entity + # todo: xs:union, xs:list + restriction = entity.find("xs:restriction", namespaces=NSMAP) + if restriction is None: + return "Any" + doc = self.get_doc(restriction) + if len(doc) == 1 and len(doc[0]) == 0: + # if get_doc returns a paragraph node with an empty Text + # node + enum = [e.get("value") + for e in restriction.findall("xs:enumeration", + namespaces=NSMAP)] + if len(enum): + return get_value_list(enum) + else: + return self.get_values_from_type(entity=restriction, + typeattr='base') + else: + return doc + + def add_dep(self, typ, name, entity): + try: + if name in self.options['noautodep']: + return + except TypeError: + if self.options['noautodep']: + return + self.dependencies.append((typ, name, entity)) + + def target_node(self, tag, ns, *extra): + targetid = get_target_id(tag, ns, *extra) + fqname = targetid[len(tag) + 1:] + rv = nodes.target('', '', ids=[targetid]) + self.add_domain_data(tag2type(tag), fqname, + (self.env.docname, targetid)) + return rv + + def add_domain_data(self, typ, key, data): + if key not in self.env.domaindata['xml'][typ]: + self.env.domaindata['xml'][typ][key] = data + + def get_namespace_uri(self, ns_name, entity=None): + if entity is None: + entity = self.entity + xs_ns = get_xs_ns(entity) + if ns_name == xs_ns: + return XS + else: + return self.namespaces[ns_name] + + +def tag2type(tag): + if tag in ['complexType', 'simpleType']: + return 'type' + elif tag == 'attributeGroup': + return 'attributegroup' + return tag + + +def text(txt): + return nodes.Text(txt, txt) + + +def append_node(parent, cls_or_node, *contents): + parent.append(build_node(cls_or_node, *contents)) + + +def build_node(cls_or_node, *contents): + if isinstance(cls_or_node, type): + rv = cls_or_node() + else: + rv = cls_or_node + rv.extend(contents) + return rv + + +def get_xref(typ, target, title=None): + if title is None: + title = target + ref = addnodes.pending_xref(title, + reftype=typ, + refdomain="xml", + reftarget=target) + ref.append(nodes.literal(title, title)) + return ref + + +def build_table_row(*vals): + rv = nodes.row('') + for val in vals: + if isinstance(val, nodes.Node): + node = val + else: + node = nodes.paragraph(val, val) + rv.append(nodes.entry(node, node)) + return rv + + +def build_paragraph(*args): + """ convenience method to build a paragraph node """ + rv = nodes.paragraph() + for content in args: + if isinstance(content, nodes.Node): + rv.append(content) + else: + rv.append(text(content)) + return rv + + +def get_target_id(etype, ns_name, *extra): + if ns_name: + return ":".join([etype, ns_name] + list(extra)) + else: + return ":".join([etype] + list(extra)) + + +def get_value_list(vals): + rv = nodes.paragraph() + if vals: + rv.append(nodes.literal(vals[0], vals[0])) + for i in range(1, len(vals)): + rv.append(text(" | ")) + rv.append(nodes.literal(vals[i], vals[i])) + return rv + + +def get_xs_ns(el): + return get_namespace_name(el, XS) + + +def get_namespace_name(el, ns_uri): + for name, ns in el.nsmap.items(): + if ns == ns_uri: + return name + return None + + +def get_datatype_ref(title, target, baseurl): + return build_node(nodes.reference('', '', refuri=baseurl % target), + nodes.literal(title, title)) + + +class XMLDatatypeRole(object): + def __init__(self, baseurl): + self.baseurl = baseurl + + def __call__(self, name, rawtext, text, lineno, inliner, options={}, + content=[]): + has_explicit_title, title, target = split_explicit_title(text) + return [get_datatype_ref(title, target, self.baseurl)], [] + + +class XMLXRefRole(roles.XRefRole): + def __init__(self, typ, **kwargs): + roles.XRefRole.__init__(self, **kwargs) + self.type = typ + + def process_link(self, env, refnode, has_explicit_title, title, target): + if (self.type == 'attribute' and + not has_explicit_title and + ':' in title): + title = title.split(':')[-1] + return roles.XRefRole.process_link(self, env, refnode, + has_explicit_title, title, target) + + +class XMLDomain(Domain): + name = "xml" + label = "XML" + + types = dict(schema=['schema'], + type=['complexType', 'simpleType'], + group=['group'], + attributegroup=['attributeGroup'], + element=['element'], + attribute=None) + + object_types = dict([(t, ObjType("XML %s" % t.title(), t)) + for t in types.keys()]) + directives = dict([(t, XMLDirective(h)) + for t, h in types.items() if h is not None]) + roles = dict([(t, XMLXRefRole(t)) for t in types.keys()]) + dangling_warnings = dict([(t, "unknown XML %s: %%(target)s" % t) + for t in types.keys()]) + initial_data = dict([(t, dict()) for t in types.keys()]) + data_version = 3 + + def clear_doc(self, docname): + to_del = [] + for dtype in self.types.keys(): + for key, (doc, _) in self.data[dtype].iteritems(): + if doc == docname: + to_del.append((dtype, key)) + for dtype, key in to_del: + del self.data[dtype][key] + + def resolve_xref(self, env, fromdocname, builder, typ, target, node, + contnode): + if target in self.data[typ]: + docname, labelid = self.data[typ][target] + else: + return None + return make_refnode(builder, fromdocname, docname, + labelid, contnode) + + def get_objects(self): + for dtype in self.types.keys(): + for name, (docname, tgtid) in self.data[dtype].iteritems(): + yield (name, name, dtype, docname, tgtid, + self.object_types[dtype].attrs['searchprio']) + + +def setup(app): + app.add_config_value('xmlschema_path', '.', False) + app.add_config_value('xmlschema_datatype_url', + 'http://www.w3.org/TR/xmlschema-2/#%s', False) + app.add_domain(XMLDomain) + app.connect('builder-inited', load_xml_schemas) + app.connect('builder-inited', add_xml_datatype_role) + + +def add_xml_datatype_role(app): + app.add_role_to_domain('xml', 'datatype', + XMLDatatypeRole(app.config.xmlschema_datatype_url)) + + +def load_xml_schemas(app): + entities = dict() + namespaces = dict() + namespaces_by_uri = dict() + schemapath = os.path.abspath(os.path.join(app.builder.env.srcdir, + app.config.xmlschema_path)) + for root, _, files in os.walk(schemapath): + for fname in files: + if not fname.endswith(".xsd"): + continue + path = os.path.join(root, fname) + relpath = path[len(schemapath):].strip("/") + schema = lxml.etree.parse(path).getroot() + + ns = schema.get("targetNamespace") + ns_name = get_namespace_name(schema, ns) + if ns_name not in namespaces: + namespaces[ns_name] = ns + if ns not in namespaces_by_uri: + namespaces_by_uri[ns] = ns_name + + if ns not in entities: + entities[ns] = dict(schema=dict(), + group=dict(), + attributeGroup=dict(), + element=dict(), + simpleType=dict(), + complexType=dict()) + # schemas don't require namespaces to be identified + # uniquely, but we let the user identify them either with + # or without the namespace + entities[None]['schema'][relpath] = schema + entities[ns]['schema'][relpath] = schema + for entity in schema.xpath("//xs:*[@name]", namespaces=NSMAP): + tag = entity.tag[len(XS_NS):] + if tag in entities[ns]: + entities[ns][tag][entity.get("name")] = entity + app.builder.env.xmlschema_namespaces = namespaces + app.builder.env.xmlschema_namespaces_by_uri = namespaces_by_uri + app.builder.env.xmlschema_entities = entities -- cgit v1.2.3-1-g7c22