# Copyright 2010-2013 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 """Provides an easy-to-use python interface to Gentoo's metadata.xml file. Example usage: >>> from portage.xml.metadata import MetaDataXML >>> pkg_md = MetaDataXML('/usr/portage/app-misc/gourmet/metadata.xml') >>> pkg_md >>> pkg_md.herds() ['no-herd'] >>> for maint in pkg_md.maintainers(): ... print "{0} ({1})".format(maint.email, maint.name) ... nixphoeni@gentoo.org (Joe Sapp) >>> for flag in pkg_md.use(): ... print flag.name, "->", flag.description ... rtf -> Enable export to RTF gnome-print -> Enable printing support using gnome-print >>> upstream = pkg_md.upstream() >>> upstream [<_Upstream {'docs': [], 'remoteid': [], 'maintainer': [<_Maintainer 'Thomas_Hinkle@alumni.brown.edu'>], 'bugtracker': [], 'changelog': []}>] >>> upstream[0].maintainer[0].name 'Thomas Mills Hinkle' """ from __future__ import unicode_literals __all__ = ('MetaDataXML',) import sys if sys.hexversion < 0x2070000 or \ (sys.hexversion < 0x3020000 and sys.hexversion >= 0x3000000): # Our _MetadataTreeBuilder usage is incompatible with # cElementTree in Python 2.6, 3.0, and 3.1: # File "/usr/lib/python2.6/xml/etree/ElementTree.py", line 644, in findall # assert self._root is not None import xml.etree.ElementTree as etree else: try: import xml.etree.cElementTree as etree except (SystemExit, KeyboardInterrupt): raise except (ImportError, SystemError, RuntimeError, Exception): # broken or missing xml support # http://bugs.python.org/issue14988 import xml.etree.ElementTree as etree try: from xml.parsers.expat import ExpatError except (SystemExit, KeyboardInterrupt): raise except (ImportError, SystemError, RuntimeError, Exception): ExpatError = SyntaxError import re import xml.etree.ElementTree from portage import _encodings, _unicode_encode from portage.util import unique_everseen class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder): """ Implements doctype() as required to avoid deprecation warnings with Python >=2.7. """ def doctype(self, name, pubid, system): pass class _Maintainer(object): """An object for representing one maintainer. @type email: str or None @ivar email: Maintainer's email address. Used for both Gentoo and upstream. @type name: str or None @ivar name: Maintainer's name. Used for both Gentoo and upstream. @type description: str or None @ivar description: Description of what a maintainer does. Gentoo only. @type restrict: str or None @ivar restrict: e.g. >=portage-2.2 means only maintains versions of Portage greater than 2.2. Should be DEPEND string with < and > converted to < and > respectively. @type status: str or None @ivar status: If set, either 'active' or 'inactive'. Upstream only. """ def __init__(self, node): self.email = None self.name = None self.description = None self.restrict = node.get('restrict') self.status = node.get('status') for attr in node: setattr(self, attr.tag, attr.text) def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.email) class _Useflag(object): """An object for representing one USE flag. @todo: Is there any way to have a keyword option to leave in and for later processing? @type name: str or None @ivar name: USE flag @type restrict: str or None @ivar restrict: e.g. >=portage-2.2 means flag is only available in versions greater than 2.2 @type description: str @ivar description: description of the USE flag """ def __init__(self, node): self.name = node.get('name') self.restrict = node.get('restrict') _desc = '' if node.text: _desc = node.text for child in node.getchildren(): _desc += child.text if child.text else '' _desc += child.tail if child.tail else '' # This takes care of tabs and newlines left from the file self.description = re.sub('\s+', ' ', _desc) def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.name) class _Upstream(object): """An object for representing one package's upstream. @type maintainers: list @ivar maintainers: L{_Maintainer} objects for each upstream maintainer @type changelogs: list @ivar changelogs: URLs to upstream's ChangeLog file in str format @type docs: list @ivar docs: Sequence of tuples containing URLs to upstream documentation in the first slot and 'lang' attribute in the second, e.g., [('http.../docs/en/tut.html', None), ('http.../doc/fr/tut.html', 'fr')] @type bugtrackers: list @ivar bugtrackers: URLs to upstream's bugtracker. May also contain an email address if prepended with 'mailto:' @type remoteids: list @ivar remoteids: Sequence of tuples containing the project's hosting site name in the first slot and the project's ID name or number for that site in the second, e.g., [('sourceforge', 'systemrescuecd')] """ def __init__(self, node): self.node = node self.maintainers = self.upstream_maintainers() self.changelogs = self.upstream_changelogs() self.docs = self.upstream_documentation() self.bugtrackers = self.upstream_bugtrackers() self.remoteids = self.upstream_remoteids() def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.__dict__) def upstream_bugtrackers(self): """Retrieve upstream bugtracker location from xml node.""" return [e.text for e in self.node.findall('bugs-to')] def upstream_changelogs(self): """Retrieve upstream changelog location from xml node.""" return [e.text for e in self.node.findall('changelog')] def upstream_documentation(self): """Retrieve upstream documentation location from xml node.""" result = [] for elem in self.node.findall('doc'): lang = elem.get('lang') result.append((elem.text, lang)) return result def upstream_maintainers(self): """Retrieve upstream maintainer information from xml node.""" return [_Maintainer(m) for m in self.node.findall('maintainer')] def upstream_remoteids(self): """Retrieve upstream remote ID from xml node.""" return [(e.text, e.get('type')) for e in self.node.findall('remote-id')] class MetaDataXML(object): """Access metadata.xml""" def __init__(self, metadata_xml_path, herds): """Parse a valid metadata.xml file. @type metadata_xml_path: str @param metadata_xml_path: path to a valid metadata.xml file @type herds: str or ElementTree @param herds: path to a herds.xml, or a pre-parsed ElementTree @raise IOError: if C{metadata_xml_path} can not be read """ self.metadata_xml_path = metadata_xml_path self._xml_tree = None try: self._xml_tree = etree.parse(_unicode_encode(metadata_xml_path, encoding=_encodings['fs'], errors='strict'), parser=etree.XMLParser(target=_MetadataTreeBuilder())) except ImportError: pass except ExpatError as e: raise SyntaxError("%s" % (e,)) if isinstance(herds, etree.ElementTree): herds_etree = herds herds_path = None else: herds_etree = None herds_path = herds # Used for caching self._herdstree = herds_etree self._herds_path = herds_path self._descriptions = None self._maintainers = None self._herds = None self._useflags = None self._upstream = None def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.metadata_xml_path) def _get_herd_email(self, herd): """Get a herd's email address. @type herd: str @param herd: herd whose email you want @rtype: str or None @return: email address or None if herd is not in herds.xml @raise IOError: if $PORTDIR/metadata/herds.xml can not be read """ if self._herdstree is None: try: self._herdstree = etree.parse(_unicode_encode(self._herds_path, encoding=_encodings['fs'], errors='strict'), parser=etree.XMLParser(target=_MetadataTreeBuilder())) except (ImportError, IOError, SyntaxError): return None # Some special herds are not listed in herds.xml if herd in ('no-herd', 'maintainer-wanted', 'maintainer-needed'): return None try: # Python 2.7 or >=3.2 iterate = self._herdstree.iter except AttributeError: iterate = self._herdstree.getiterator for node in iterate('herd'): if node.findtext('name') == herd: return node.findtext('email') def herds(self, include_email=False): """Return a list of text nodes for . @type include_email: bool @keyword include_email: if True, also look up the herd's email @rtype: tuple @return: if include_email is False, return a list of strings; if include_email is True, return a list of tuples containing: [('herd1', 'herd1@gentoo.org'), ('no-herd', None); """ if self._herds is None: if self._xml_tree is None: self._herds = tuple() else: herds = [] for elem in self._xml_tree.findall('herd'): text = elem.text if text is None: text = '' if include_email: herd_mail = self._get_herd_email(text) herds.append((text, herd_mail)) else: herds.append(text) self._herds = tuple(herds) return self._herds def descriptions(self): """Return a list of text nodes for . @rtype: list @return: package description in string format @todo: Support the C{lang} attribute """ if self._descriptions is None: if self._xml_tree is None: self._descriptions = tuple() else: self._descriptions = tuple(e.text \ for e in self._xml_tree.findall("longdescription")) return self._descriptions def maintainers(self): """Get maintainers' name, email and description. @rtype: list @return: a sequence of L{_Maintainer} objects in document order. """ if self._maintainers is None: if self._xml_tree is None: self._maintainers = tuple() else: self._maintainers = tuple(_Maintainer(node) \ for node in self._xml_tree.findall('maintainer')) return self._maintainers def use(self): """Get names and descriptions for USE flags defined in metadata. @rtype: list @return: a sequence of L{_Useflag} objects in document order. """ if self._useflags is None: if self._xml_tree is None: self._useflags = tuple() else: try: # Python 2.7 or >=3.2 iterate = self._xml_tree.iter except AttributeError: iterate = self._xml_tree.getiterator self._useflags = tuple(_Useflag(node) \ for node in iterate('flag')) return self._useflags def upstream(self): """Get upstream contact information. @rtype: list @return: a sequence of L{_Upstream} objects in document order. """ if self._upstream is None: if self._xml_tree is None: self._upstream = tuple() else: self._upstream = tuple(_Upstream(node) \ for node in self._xml_tree.findall('upstream')) return self._upstream def format_maintainer_string(self): """Format string containing maintainers and herds (emails if possible). Used by emerge to display maintainer information. Entries are sorted according to the rules stated on the bug wranglers page. @rtype: String @return: a string containing maintainers and herds """ maintainers = [] for maintainer in self.maintainers(): if maintainer.email is None or not maintainer.email.strip(): if maintainer.name and maintainer.name.strip(): maintainers.append(maintainer.name) else: maintainers.append(maintainer.email) for herd, email in self.herds(include_email=True): if herd == "no-herd": continue if email is None or not email.strip(): if herd and herd.strip(): maintainers.append(herd) else: maintainers.append(email) maintainers = list(unique_everseen(maintainers)) maint_str = "" if maintainers: maint_str = maintainers[0] maintainers = maintainers[1:] if maintainers: maint_str += " " + ",".join(maintainers) return maint_str def format_upstream_string(self): """Format string containing upstream maintainers and bugtrackers. Used by emerge to display upstream information. @rtype: String @return: a string containing upstream maintainers and bugtrackers """ maintainers = [] for upstream in self.upstream(): for maintainer in upstream.maintainers: if maintainer.email is None or not maintainer.email.strip(): if maintainer.name and maintainer.name.strip(): maintainers.append(maintainer.name) else: maintainers.append(maintainer.email) for bugtracker in upstream.bugtrackers: if bugtracker.startswith("mailto:"): bugtracker = bugtracker[7:] maintainers.append(bugtracker) maintainers = list(unique_everseen(maintainers)) maint_str = " ".join(maintainers) return maint_str