diff options
Diffstat (limited to 'lib/portage/xml/metadata.py')
-rw-r--r-- | lib/portage/xml/metadata.py | 505 |
1 files changed, 505 insertions, 0 deletions
diff --git a/lib/portage/xml/metadata.py b/lib/portage/xml/metadata.py new file mode 100644 index 000000000..9e48dddde --- /dev/null +++ b/lib/portage/xml/metadata.py @@ -0,0 +1,505 @@ +# Copyright 2010-2017 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +"""Provides an easy-to-use python interface to Gentoo's metadata.xml file. + + Example usage: + >>> from portage.xml.metadata import MetaDataXML + >>> pkg_md = MetaDataXML('/usr/portage/app-misc/gourmet/metadata.xml') + >>> pkg_md + <MetaDataXML '/usr/portage/app-misc/gourmet/metadata.xml'> + >>> pkg_md.herds() + ['no-herd'] + >>> for maint in pkg_md.maintainers(): + ... print "{0} ({1})".format(maint.email, maint.name) + ... + nixphoeni@gentoo.org (Joe Sapp) + >>> for flag in pkg_md.use(): + ... print flag.name, "->", flag.description + ... + rtf -> Enable export to RTF + gnome-print -> Enable printing support using gnome-print + >>> upstream = pkg_md.upstream() + >>> upstream + [<_Upstream {'docs': [], 'remoteid': [], 'maintainer': + [<_Maintainer 'Thomas_Hinkle@alumni.brown.edu'>], 'bugtracker': [], + 'changelog': []}>] + >>> upstream[0].maintainer[0].name + 'Thomas Mills Hinkle' +""" + +from __future__ import unicode_literals + +__all__ = ('MetaDataXML', 'parse_metadata_use') + +import sys + +if sys.hexversion < 0x2070000 or \ + (sys.hexversion < 0x3020000 and sys.hexversion >= 0x3000000): + # Our _MetadataTreeBuilder usage is incompatible with + # cElementTree in Python 2.6, 3.0, and 3.1: + # File "/usr/lib/python2.6/xml/etree/ElementTree.py", line 644, in findall + # assert self._root is not None + import xml.etree.ElementTree as etree +else: + try: + import xml.etree.cElementTree as etree + except (SystemExit, KeyboardInterrupt): + raise + except (ImportError, SystemError, RuntimeError, Exception): + # broken or missing xml support + # https://bugs.python.org/issue14988 + import xml.etree.ElementTree as etree + +try: + from xml.parsers.expat import ExpatError +except (SystemExit, KeyboardInterrupt): + raise +except (ImportError, SystemError, RuntimeError, Exception): + ExpatError = SyntaxError + +import re +import xml.etree.ElementTree +from portage import _encodings, _unicode_encode +from portage.util import cmp_sort_key, unique_everseen + +if sys.hexversion >= 0x3000000: + # pylint: disable=W0622 + basestring = str + + +class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder): + """ + Implements doctype() as required to avoid deprecation warnings with + Python >=2.7. + """ + def doctype(self, name, pubid, system): + pass + +class _Maintainer(object): + """An object for representing one maintainer. + + @type email: str or None + @ivar email: Maintainer's email address. Used for both Gentoo and upstream. + @type name: str or None + @ivar name: Maintainer's name. Used for both Gentoo and upstream. + @type description: str or None + @ivar description: Description of what a maintainer does. Gentoo only. + @type maint_type: str or None + @ivar maint_type: GLEP67 maintainer type (project or person). Gentoo only. + @type restrict: str or None + @ivar restrict: e.g. >=portage-2.2 means only maintains versions + of Portage greater than 2.2. Should be DEPEND string with < and > + converted to < and > respectively. + @type status: str or None + @ivar status: If set, either 'active' or 'inactive'. Upstream only. + """ + + def __init__(self, node): + self.email = None + self.name = None + self.description = None + self.maint_type = node.get('type') + self.restrict = node.get('restrict') + self.status = node.get('status') + for attr in node: + setattr(self, attr.tag, attr.text) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.email) + + +class _Useflag(object): + """An object for representing one USE flag. + + @todo: Is there any way to have a keyword option to leave in + <pkg> and <cat> for later processing? + @type name: str or None + @ivar name: USE flag + @type restrict: str or None + @ivar restrict: e.g. >=portage-2.2 means flag is only available in + versions greater than 2.2 + @type description: str + @ivar description: description of the USE flag + """ + + def __init__(self, node): + self.name = node.get('name') + self.restrict = node.get('restrict') + _desc = '' + if node.text: + _desc = node.text + for child in node.getchildren(): + _desc += child.text if child.text else '' + _desc += child.tail if child.tail else '' + # This takes care of tabs and newlines left from the file + self.description = re.sub(r'\s+', ' ', _desc) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.name) + + +class _Upstream(object): + """An object for representing one package's upstream. + + @type maintainers: list + @ivar maintainers: L{_Maintainer} objects for each upstream maintainer + @type changelogs: list + @ivar changelogs: URLs to upstream's ChangeLog file in str format + @type docs: list + @ivar docs: Sequence of tuples containing URLs to upstream documentation + in the first slot and 'lang' attribute in the second, e.g., + [('http.../docs/en/tut.html', None), ('http.../doc/fr/tut.html', 'fr')] + @type bugtrackers: list + @ivar bugtrackers: URLs to upstream's bugtracker. May also contain an email + address if prepended with 'mailto:' + @type remoteids: list + @ivar remoteids: Sequence of tuples containing the project's hosting site + name in the first slot and the project's ID name or number for that + site in the second, e.g., [('sourceforge', 'systemrescuecd')] + """ + + def __init__(self, node): + self.node = node + self.maintainers = self.upstream_maintainers() + self.changelogs = self.upstream_changelogs() + self.docs = self.upstream_documentation() + self.bugtrackers = self.upstream_bugtrackers() + self.remoteids = self.upstream_remoteids() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.__dict__) + + def upstream_bugtrackers(self): + """Retrieve upstream bugtracker location from xml node.""" + return [e.text for e in self.node.findall('bugs-to')] + + def upstream_changelogs(self): + """Retrieve upstream changelog location from xml node.""" + return [e.text for e in self.node.findall('changelog')] + + def upstream_documentation(self): + """Retrieve upstream documentation location from xml node.""" + result = [] + for elem in self.node.findall('doc'): + lang = elem.get('lang') + result.append((elem.text, lang)) + return result + + def upstream_maintainers(self): + """Retrieve upstream maintainer information from xml node.""" + return [_Maintainer(m) for m in self.node.findall('maintainer')] + + def upstream_remoteids(self): + """Retrieve upstream remote ID from xml node.""" + return [(e.text, e.get('type')) for e in self.node.findall('remote-id')] + + +class MetaDataXML(object): + """Access metadata.xml""" + + def __init__(self, metadata_xml_path, herds): + """Parse a valid metadata.xml file. + + @type metadata_xml_path: str + @param metadata_xml_path: path to a valid metadata.xml file + @type herds: str or ElementTree + @param herds: path to a herds.xml, or a pre-parsed ElementTree + @raise IOError: if C{metadata_xml_path} can not be read + """ + + self.metadata_xml_path = metadata_xml_path + self._xml_tree = None + + try: + self._xml_tree = etree.parse(_unicode_encode(metadata_xml_path, + encoding=_encodings['fs'], errors='strict'), + parser=etree.XMLParser(target=_MetadataTreeBuilder())) + except ImportError: + pass + except ExpatError as e: + raise SyntaxError("%s" % (e,)) + + if isinstance(herds, etree.ElementTree): + herds_etree = herds + herds_path = None + else: + herds_etree = None + herds_path = herds + + # Used for caching + self._herdstree = herds_etree + self._herds_path = herds_path + self._descriptions = None + self._maintainers = None + self._herds = None + self._useflags = None + self._upstream = None + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.metadata_xml_path) + + def _get_herd_email(self, herd): + """Get a herd's email address. + + @type herd: str + @param herd: herd whose email you want + @rtype: str or None + @return: email address or None if herd is not in herds.xml + @raise IOError: if $PORTDIR/metadata/herds.xml can not be read + """ + + if self._herdstree is None: + try: + self._herdstree = etree.parse(_unicode_encode(self._herds_path, + encoding=_encodings['fs'], errors='strict'), + parser=etree.XMLParser(target=_MetadataTreeBuilder())) + except (ImportError, IOError, SyntaxError): + return None + + # Some special herds are not listed in herds.xml + if herd in ('no-herd', 'maintainer-wanted', 'maintainer-needed'): + return None + + try: + # Python 2.7 or >=3.2 + iterate = self._herdstree.iter + except AttributeError: + iterate = self._herdstree.getiterator + + for node in iterate('herd'): + if node.findtext('name') == herd: + return node.findtext('email') + + def herds(self, include_email=False): + """Return a list of text nodes for <herd>. + + @type include_email: bool + @keyword include_email: if True, also look up the herd's email + @rtype: tuple + @return: if include_email is False, return a list of strings; + if include_email is True, return a list of tuples containing: + [('herd1', 'herd1@gentoo.org'), ('no-herd', None); + """ + if self._herds is None: + if self._xml_tree is None: + self._herds = tuple() + else: + herds = [] + for elem in self._xml_tree.findall('herd'): + text = elem.text + if text is None: + text = '' + if include_email: + herd_mail = self._get_herd_email(text) + herds.append((text, herd_mail)) + else: + herds.append(text) + self._herds = tuple(herds) + + return self._herds + + def descriptions(self): + """Return a list of text nodes for <longdescription>. + + @rtype: list + @return: package description in string format + @todo: Support the C{lang} attribute + """ + if self._descriptions is None: + if self._xml_tree is None: + self._descriptions = tuple() + else: + self._descriptions = tuple(e.text \ + for e in self._xml_tree.findall("longdescription")) + + return self._descriptions + + def maintainers(self): + """Get maintainers' name, email and description. + + @rtype: list + @return: a sequence of L{_Maintainer} objects in document order. + """ + + if self._maintainers is None: + if self._xml_tree is None: + self._maintainers = tuple() + else: + self._maintainers = tuple(_Maintainer(node) \ + for node in self._xml_tree.findall('maintainer')) + + return self._maintainers + + def use(self): + """Get names and descriptions for USE flags defined in metadata. + + @rtype: list + @return: a sequence of L{_Useflag} objects in document order. + """ + + if self._useflags is None: + if self._xml_tree is None: + self._useflags = tuple() + else: + try: + # Python 2.7 or >=3.2 + iterate = self._xml_tree.iter + except AttributeError: + iterate = self._xml_tree.getiterator + self._useflags = tuple(_Useflag(node) \ + for node in iterate('flag')) + + return self._useflags + + def upstream(self): + """Get upstream contact information. + + @rtype: list + @return: a sequence of L{_Upstream} objects in document order. + """ + + if self._upstream is None: + if self._xml_tree is None: + self._upstream = tuple() + else: + self._upstream = tuple(_Upstream(node) \ + for node in self._xml_tree.findall('upstream')) + + return self._upstream + + def format_maintainer_string(self): + """Format string containing maintainers and herds (emails if possible). + Used by emerge to display maintainer information. + Entries are sorted according to the rules stated on the bug wranglers page. + + @rtype: String + @return: a string containing maintainers and herds + """ + maintainers = [] + for maintainer in self.maintainers(): + if maintainer.email is None or not maintainer.email.strip(): + if maintainer.name and maintainer.name.strip(): + maintainers.append(maintainer.name) + else: + maintainers.append(maintainer.email) + + for herd, email in self.herds(include_email=True): + if herd == "no-herd": + continue + if email is None or not email.strip(): + if herd and herd.strip(): + maintainers.append(herd) + else: + maintainers.append(email) + + maintainers = list(unique_everseen(maintainers)) + + maint_str = "" + if maintainers: + maint_str = maintainers[0] + maintainers = maintainers[1:] + if maintainers: + maint_str += " " + ",".join(maintainers) + + return maint_str + + def format_upstream_string(self): + """Format string containing upstream maintainers and bugtrackers. + Used by emerge to display upstream information. + + @rtype: String + @return: a string containing upstream maintainers and bugtrackers + """ + maintainers = [] + for upstream in self.upstream(): + for maintainer in upstream.maintainers: + if maintainer.email is None or not maintainer.email.strip(): + if maintainer.name and maintainer.name.strip(): + maintainers.append(maintainer.name) + else: + maintainers.append(maintainer.email) + + for bugtracker in upstream.bugtrackers: + if bugtracker.startswith("mailto:"): + bugtracker = bugtracker[7:] + maintainers.append(bugtracker) + + + maintainers = list(unique_everseen(maintainers)) + maint_str = " ".join(maintainers) + return maint_str + +# lang with higher value is preferred +_lang_pref = { + "" : 0, + "en": 1, +} + + +def _cmp_lang(a, b): + a_score = _lang_pref.get(a.get("lang", ""), -1) + b_score = _lang_pref.get(b.get("lang", ""), -1) + + return a_score - b_score + + +def parse_metadata_use(xml_tree): + """ + Records are wrapped in XML as per GLEP 56 + returns a dict with keys constisting of USE flag names and values + containing their respective descriptions + """ + uselist = {} + + usetags = xml_tree.findall("use") + if not usetags: + return uselist + + # Sort by language preference in descending order. + usetags.sort(key=cmp_sort_key(_cmp_lang), reverse=True) + + # It's possible to have multiple 'use' elements. + for usetag in usetags: + flags = usetag.findall("flag") + if not flags: + # DTD allows use elements containing no flag elements. + continue + + for flag in flags: + pkg_flag = flag.get("name") + if pkg_flag is not None: + flag_restrict = flag.get("restrict") + + # Descriptions may exist for multiple languages, so + # ignore all except the first description found for a + # particular value of restrict (see bug 599060). + try: + uselist[pkg_flag][flag_restrict] + except KeyError: + pass + else: + continue + + # emulate the Element.itertext() method from python-2.7 + inner_text = [] + stack = [] + stack.append(flag) + while stack: + obj = stack.pop() + if isinstance(obj, basestring): + inner_text.append(obj) + continue + if isinstance(obj.text, basestring): + inner_text.append(obj.text) + if isinstance(obj.tail, basestring): + stack.append(obj.tail) + stack.extend(reversed(obj)) + + if flag.get("name") not in uselist: + uselist[flag.get("name")] = {} + + # (flag_restrict can be None) + uselist[flag.get("name")][flag_restrict] = " ".join("".join(inner_text).split()) + return uselist + |