aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/portage/xml/metadata.py')
-rw-r--r--lib/portage/xml/metadata.py505
1 files changed, 505 insertions, 0 deletions
diff --git a/lib/portage/xml/metadata.py b/lib/portage/xml/metadata.py
new file mode 100644
index 000000000..9e48dddde
--- /dev/null
+++ b/lib/portage/xml/metadata.py
@@ -0,0 +1,505 @@
+# Copyright 2010-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+"""Provides an easy-to-use python interface to Gentoo's metadata.xml file.
+
+ Example usage:
+ >>> from portage.xml.metadata import MetaDataXML
+ >>> pkg_md = MetaDataXML('/usr/portage/app-misc/gourmet/metadata.xml')
+ >>> pkg_md
+ <MetaDataXML '/usr/portage/app-misc/gourmet/metadata.xml'>
+ >>> pkg_md.herds()
+ ['no-herd']
+ >>> for maint in pkg_md.maintainers():
+ ... print "{0} ({1})".format(maint.email, maint.name)
+ ...
+ nixphoeni@gentoo.org (Joe Sapp)
+ >>> for flag in pkg_md.use():
+ ... print flag.name, "->", flag.description
+ ...
+ rtf -> Enable export to RTF
+ gnome-print -> Enable printing support using gnome-print
+ >>> upstream = pkg_md.upstream()
+ >>> upstream
+ [<_Upstream {'docs': [], 'remoteid': [], 'maintainer':
+ [<_Maintainer 'Thomas_Hinkle@alumni.brown.edu'>], 'bugtracker': [],
+ 'changelog': []}>]
+ >>> upstream[0].maintainer[0].name
+ 'Thomas Mills Hinkle'
+"""
+
+from __future__ import unicode_literals
+
+__all__ = ('MetaDataXML', 'parse_metadata_use')
+
+import sys
+
+if sys.hexversion < 0x2070000 or \
+ (sys.hexversion < 0x3020000 and sys.hexversion >= 0x3000000):
+ # Our _MetadataTreeBuilder usage is incompatible with
+ # cElementTree in Python 2.6, 3.0, and 3.1:
+ # File "/usr/lib/python2.6/xml/etree/ElementTree.py", line 644, in findall
+ # assert self._root is not None
+ import xml.etree.ElementTree as etree
+else:
+ try:
+ import xml.etree.cElementTree as etree
+ except (SystemExit, KeyboardInterrupt):
+ raise
+ except (ImportError, SystemError, RuntimeError, Exception):
+ # broken or missing xml support
+ # https://bugs.python.org/issue14988
+ import xml.etree.ElementTree as etree
+
+try:
+ from xml.parsers.expat import ExpatError
+except (SystemExit, KeyboardInterrupt):
+ raise
+except (ImportError, SystemError, RuntimeError, Exception):
+ ExpatError = SyntaxError
+
+import re
+import xml.etree.ElementTree
+from portage import _encodings, _unicode_encode
+from portage.util import cmp_sort_key, unique_everseen
+
+if sys.hexversion >= 0x3000000:
+ # pylint: disable=W0622
+ basestring = str
+
+
+class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ """
+ Implements doctype() as required to avoid deprecation warnings with
+ Python >=2.7.
+ """
+ def doctype(self, name, pubid, system):
+ pass
+
+class _Maintainer(object):
+ """An object for representing one maintainer.
+
+ @type email: str or None
+ @ivar email: Maintainer's email address. Used for both Gentoo and upstream.
+ @type name: str or None
+ @ivar name: Maintainer's name. Used for both Gentoo and upstream.
+ @type description: str or None
+ @ivar description: Description of what a maintainer does. Gentoo only.
+ @type maint_type: str or None
+ @ivar maint_type: GLEP67 maintainer type (project or person). Gentoo only.
+ @type restrict: str or None
+ @ivar restrict: e.g. &gt;=portage-2.2 means only maintains versions
+ of Portage greater than 2.2. Should be DEPEND string with < and >
+ converted to &lt; and &gt; respectively.
+ @type status: str or None
+ @ivar status: If set, either 'active' or 'inactive'. Upstream only.
+ """
+
+ def __init__(self, node):
+ self.email = None
+ self.name = None
+ self.description = None
+ self.maint_type = node.get('type')
+ self.restrict = node.get('restrict')
+ self.status = node.get('status')
+ for attr in node:
+ setattr(self, attr.tag, attr.text)
+
+ def __repr__(self):
+ return "<%s %r>" % (self.__class__.__name__, self.email)
+
+
+class _Useflag(object):
+ """An object for representing one USE flag.
+
+ @todo: Is there any way to have a keyword option to leave in
+ <pkg> and <cat> for later processing?
+ @type name: str or None
+ @ivar name: USE flag
+ @type restrict: str or None
+ @ivar restrict: e.g. &gt;=portage-2.2 means flag is only available in
+ versions greater than 2.2
+ @type description: str
+ @ivar description: description of the USE flag
+ """
+
+ def __init__(self, node):
+ self.name = node.get('name')
+ self.restrict = node.get('restrict')
+ _desc = ''
+ if node.text:
+ _desc = node.text
+ for child in node.getchildren():
+ _desc += child.text if child.text else ''
+ _desc += child.tail if child.tail else ''
+ # This takes care of tabs and newlines left from the file
+ self.description = re.sub(r'\s+', ' ', _desc)
+
+ def __repr__(self):
+ return "<%s %r>" % (self.__class__.__name__, self.name)
+
+
+class _Upstream(object):
+ """An object for representing one package's upstream.
+
+ @type maintainers: list
+ @ivar maintainers: L{_Maintainer} objects for each upstream maintainer
+ @type changelogs: list
+ @ivar changelogs: URLs to upstream's ChangeLog file in str format
+ @type docs: list
+ @ivar docs: Sequence of tuples containing URLs to upstream documentation
+ in the first slot and 'lang' attribute in the second, e.g.,
+ [('http.../docs/en/tut.html', None), ('http.../doc/fr/tut.html', 'fr')]
+ @type bugtrackers: list
+ @ivar bugtrackers: URLs to upstream's bugtracker. May also contain an email
+ address if prepended with 'mailto:'
+ @type remoteids: list
+ @ivar remoteids: Sequence of tuples containing the project's hosting site
+ name in the first slot and the project's ID name or number for that
+ site in the second, e.g., [('sourceforge', 'systemrescuecd')]
+ """
+
+ def __init__(self, node):
+ self.node = node
+ self.maintainers = self.upstream_maintainers()
+ self.changelogs = self.upstream_changelogs()
+ self.docs = self.upstream_documentation()
+ self.bugtrackers = self.upstream_bugtrackers()
+ self.remoteids = self.upstream_remoteids()
+
+ def __repr__(self):
+ return "<%s %r>" % (self.__class__.__name__, self.__dict__)
+
+ def upstream_bugtrackers(self):
+ """Retrieve upstream bugtracker location from xml node."""
+ return [e.text for e in self.node.findall('bugs-to')]
+
+ def upstream_changelogs(self):
+ """Retrieve upstream changelog location from xml node."""
+ return [e.text for e in self.node.findall('changelog')]
+
+ def upstream_documentation(self):
+ """Retrieve upstream documentation location from xml node."""
+ result = []
+ for elem in self.node.findall('doc'):
+ lang = elem.get('lang')
+ result.append((elem.text, lang))
+ return result
+
+ def upstream_maintainers(self):
+ """Retrieve upstream maintainer information from xml node."""
+ return [_Maintainer(m) for m in self.node.findall('maintainer')]
+
+ def upstream_remoteids(self):
+ """Retrieve upstream remote ID from xml node."""
+ return [(e.text, e.get('type')) for e in self.node.findall('remote-id')]
+
+
+class MetaDataXML(object):
+ """Access metadata.xml"""
+
+ def __init__(self, metadata_xml_path, herds):
+ """Parse a valid metadata.xml file.
+
+ @type metadata_xml_path: str
+ @param metadata_xml_path: path to a valid metadata.xml file
+ @type herds: str or ElementTree
+ @param herds: path to a herds.xml, or a pre-parsed ElementTree
+ @raise IOError: if C{metadata_xml_path} can not be read
+ """
+
+ self.metadata_xml_path = metadata_xml_path
+ self._xml_tree = None
+
+ try:
+ self._xml_tree = etree.parse(_unicode_encode(metadata_xml_path,
+ encoding=_encodings['fs'], errors='strict'),
+ parser=etree.XMLParser(target=_MetadataTreeBuilder()))
+ except ImportError:
+ pass
+ except ExpatError as e:
+ raise SyntaxError("%s" % (e,))
+
+ if isinstance(herds, etree.ElementTree):
+ herds_etree = herds
+ herds_path = None
+ else:
+ herds_etree = None
+ herds_path = herds
+
+ # Used for caching
+ self._herdstree = herds_etree
+ self._herds_path = herds_path
+ self._descriptions = None
+ self._maintainers = None
+ self._herds = None
+ self._useflags = None
+ self._upstream = None
+
+ def __repr__(self):
+ return "<%s %r>" % (self.__class__.__name__, self.metadata_xml_path)
+
+ def _get_herd_email(self, herd):
+ """Get a herd's email address.
+
+ @type herd: str
+ @param herd: herd whose email you want
+ @rtype: str or None
+ @return: email address or None if herd is not in herds.xml
+ @raise IOError: if $PORTDIR/metadata/herds.xml can not be read
+ """
+
+ if self._herdstree is None:
+ try:
+ self._herdstree = etree.parse(_unicode_encode(self._herds_path,
+ encoding=_encodings['fs'], errors='strict'),
+ parser=etree.XMLParser(target=_MetadataTreeBuilder()))
+ except (ImportError, IOError, SyntaxError):
+ return None
+
+ # Some special herds are not listed in herds.xml
+ if herd in ('no-herd', 'maintainer-wanted', 'maintainer-needed'):
+ return None
+
+ try:
+ # Python 2.7 or >=3.2
+ iterate = self._herdstree.iter
+ except AttributeError:
+ iterate = self._herdstree.getiterator
+
+ for node in iterate('herd'):
+ if node.findtext('name') == herd:
+ return node.findtext('email')
+
+ def herds(self, include_email=False):
+ """Return a list of text nodes for <herd>.
+
+ @type include_email: bool
+ @keyword include_email: if True, also look up the herd's email
+ @rtype: tuple
+ @return: if include_email is False, return a list of strings;
+ if include_email is True, return a list of tuples containing:
+ [('herd1', 'herd1@gentoo.org'), ('no-herd', None);
+ """
+ if self._herds is None:
+ if self._xml_tree is None:
+ self._herds = tuple()
+ else:
+ herds = []
+ for elem in self._xml_tree.findall('herd'):
+ text = elem.text
+ if text is None:
+ text = ''
+ if include_email:
+ herd_mail = self._get_herd_email(text)
+ herds.append((text, herd_mail))
+ else:
+ herds.append(text)
+ self._herds = tuple(herds)
+
+ return self._herds
+
+ def descriptions(self):
+ """Return a list of text nodes for <longdescription>.
+
+ @rtype: list
+ @return: package description in string format
+ @todo: Support the C{lang} attribute
+ """
+ if self._descriptions is None:
+ if self._xml_tree is None:
+ self._descriptions = tuple()
+ else:
+ self._descriptions = tuple(e.text \
+ for e in self._xml_tree.findall("longdescription"))
+
+ return self._descriptions
+
+ def maintainers(self):
+ """Get maintainers' name, email and description.
+
+ @rtype: list
+ @return: a sequence of L{_Maintainer} objects in document order.
+ """
+
+ if self._maintainers is None:
+ if self._xml_tree is None:
+ self._maintainers = tuple()
+ else:
+ self._maintainers = tuple(_Maintainer(node) \
+ for node in self._xml_tree.findall('maintainer'))
+
+ return self._maintainers
+
+ def use(self):
+ """Get names and descriptions for USE flags defined in metadata.
+
+ @rtype: list
+ @return: a sequence of L{_Useflag} objects in document order.
+ """
+
+ if self._useflags is None:
+ if self._xml_tree is None:
+ self._useflags = tuple()
+ else:
+ try:
+ # Python 2.7 or >=3.2
+ iterate = self._xml_tree.iter
+ except AttributeError:
+ iterate = self._xml_tree.getiterator
+ self._useflags = tuple(_Useflag(node) \
+ for node in iterate('flag'))
+
+ return self._useflags
+
+ def upstream(self):
+ """Get upstream contact information.
+
+ @rtype: list
+ @return: a sequence of L{_Upstream} objects in document order.
+ """
+
+ if self._upstream is None:
+ if self._xml_tree is None:
+ self._upstream = tuple()
+ else:
+ self._upstream = tuple(_Upstream(node) \
+ for node in self._xml_tree.findall('upstream'))
+
+ return self._upstream
+
+ def format_maintainer_string(self):
+ """Format string containing maintainers and herds (emails if possible).
+ Used by emerge to display maintainer information.
+ Entries are sorted according to the rules stated on the bug wranglers page.
+
+ @rtype: String
+ @return: a string containing maintainers and herds
+ """
+ maintainers = []
+ for maintainer in self.maintainers():
+ if maintainer.email is None or not maintainer.email.strip():
+ if maintainer.name and maintainer.name.strip():
+ maintainers.append(maintainer.name)
+ else:
+ maintainers.append(maintainer.email)
+
+ for herd, email in self.herds(include_email=True):
+ if herd == "no-herd":
+ continue
+ if email is None or not email.strip():
+ if herd and herd.strip():
+ maintainers.append(herd)
+ else:
+ maintainers.append(email)
+
+ maintainers = list(unique_everseen(maintainers))
+
+ maint_str = ""
+ if maintainers:
+ maint_str = maintainers[0]
+ maintainers = maintainers[1:]
+ if maintainers:
+ maint_str += " " + ",".join(maintainers)
+
+ return maint_str
+
+ def format_upstream_string(self):
+ """Format string containing upstream maintainers and bugtrackers.
+ Used by emerge to display upstream information.
+
+ @rtype: String
+ @return: a string containing upstream maintainers and bugtrackers
+ """
+ maintainers = []
+ for upstream in self.upstream():
+ for maintainer in upstream.maintainers:
+ if maintainer.email is None or not maintainer.email.strip():
+ if maintainer.name and maintainer.name.strip():
+ maintainers.append(maintainer.name)
+ else:
+ maintainers.append(maintainer.email)
+
+ for bugtracker in upstream.bugtrackers:
+ if bugtracker.startswith("mailto:"):
+ bugtracker = bugtracker[7:]
+ maintainers.append(bugtracker)
+
+
+ maintainers = list(unique_everseen(maintainers))
+ maint_str = " ".join(maintainers)
+ return maint_str
+
+# lang with higher value is preferred
+_lang_pref = {
+ "" : 0,
+ "en": 1,
+}
+
+
+def _cmp_lang(a, b):
+ a_score = _lang_pref.get(a.get("lang", ""), -1)
+ b_score = _lang_pref.get(b.get("lang", ""), -1)
+
+ return a_score - b_score
+
+
+def parse_metadata_use(xml_tree):
+ """
+ Records are wrapped in XML as per GLEP 56
+ returns a dict with keys constisting of USE flag names and values
+ containing their respective descriptions
+ """
+ uselist = {}
+
+ usetags = xml_tree.findall("use")
+ if not usetags:
+ return uselist
+
+ # Sort by language preference in descending order.
+ usetags.sort(key=cmp_sort_key(_cmp_lang), reverse=True)
+
+ # It's possible to have multiple 'use' elements.
+ for usetag in usetags:
+ flags = usetag.findall("flag")
+ if not flags:
+ # DTD allows use elements containing no flag elements.
+ continue
+
+ for flag in flags:
+ pkg_flag = flag.get("name")
+ if pkg_flag is not None:
+ flag_restrict = flag.get("restrict")
+
+ # Descriptions may exist for multiple languages, so
+ # ignore all except the first description found for a
+ # particular value of restrict (see bug 599060).
+ try:
+ uselist[pkg_flag][flag_restrict]
+ except KeyError:
+ pass
+ else:
+ continue
+
+ # emulate the Element.itertext() method from python-2.7
+ inner_text = []
+ stack = []
+ stack.append(flag)
+ while stack:
+ obj = stack.pop()
+ if isinstance(obj, basestring):
+ inner_text.append(obj)
+ continue
+ if isinstance(obj.text, basestring):
+ inner_text.append(obj.text)
+ if isinstance(obj.tail, basestring):
+ stack.append(obj.tail)
+ stack.extend(reversed(obj))
+
+ if flag.get("name") not in uselist:
+ uselist[flag.get("name")] = {}
+
+ # (flag_restrict can be None)
+ uselist[flag.get("name")][flag_restrict] = " ".join("".join(inner_text).split())
+ return uselist
+