aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2019-01-19 00:39:47 -0800
committerZac Medico <zmedico@gentoo.org>2019-01-20 14:57:28 -0800
commiteb2674c1c2d84b2c9e9923e1c1666cb7a596c36c (patch)
treeb85724e26e35def7b6f50a5cac9818f56d22fec6
parentResolverPlayground: add chgrp to essential_binaries (diff)
downloadportage-eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c.tar.gz
portage-eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c.tar.bz2
portage-eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c.zip
INSTALL_MASK: index patterns anchored with leading slash (bug 675826)
For scalability, use a tree structure to index patterns that are anchored with a leading slash. Patterns anchored with leading slash are indexed by leading non-glob components, making it possible to minimize the number of fnmatch calls. For example: /foo*/bar -> {'.': ['/foo*/bar']} /foo/bar* -> {'foo': {'.': ['/foo/bar*']}} /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}} Bug: https://bugs.gentoo.org/675826 Signed-off-by: Zac Medico <zmedico@gentoo.org>
-rw-r--r--lib/portage/tests/util/test_install_mask.py36
-rw-r--r--lib/portage/util/install_mask.py84
2 files changed, 113 insertions, 7 deletions
diff --git a/lib/portage/tests/util/test_install_mask.py b/lib/portage/tests/util/test_install_mask.py
index f651eb4b7..6a29db79a 100644
--- a/lib/portage/tests/util/test_install_mask.py
+++ b/lib/portage/tests/util/test_install_mask.py
@@ -119,6 +119,42 @@ class InstallMaskTestCase(TestCase):
),
)
),
+ (
+ '/usr/share/locale '
+ '-/usr/share/locale/en* '
+ '-/usr/share/locale/kf5_all_languages '
+ '-/usr/share/locale/locale.alias',
+ (
+ (
+ 'usr/share/locale/en',
+ False,
+ ),
+ (
+ 'usr/share/locale/en_GB',
+ False,
+ ),
+ (
+ 'usr/share/locale/en/kf5_all_languages',
+ False,
+ ),
+ (
+ 'usr/share/locale/locale.alias',
+ False,
+ ),
+ (
+ 'usr/share/locale/es',
+ True,
+ ),
+ (
+ 'usr/share/locale/fr',
+ True,
+ ),
+ (
+ 'usr/share/locale',
+ True,
+ ),
+ )
+ ),
)
for install_mask_str, paths in cases:
diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py
index 32627eb05..a8c0cbda5 100644
--- a/lib/portage/util/install_mask.py
+++ b/lib/portage/util/install_mask.py
@@ -3,8 +3,11 @@
__all__ = ['install_mask_dir', 'InstallMask']
+import collections
import errno
import fnmatch
+import functools
+import operator
import sys
from portage import os, _unicode_decode
@@ -18,13 +21,82 @@ else:
_unicode = unicode
+def _defaultdict_tree():
+ return collections.defaultdict(_defaultdict_tree)
+
+
+_pattern = collections.namedtuple('_pattern', (
+ 'orig_index',
+ 'is_inclusive',
+ 'pattern',
+ 'leading_slash',
+))
+
+
class InstallMask(object):
def __init__(self, install_mask):
"""
@param install_mask: INSTALL_MASK value
@type install_mask: str
"""
- self._install_mask = install_mask.split()
+ # Patterns not anchored with leading slash
+ self._unanchored = []
+
+ # Patterns anchored with leading slash are indexed by leading
+ # non-glob components, making it possible to minimize the
+ # number of fnmatch calls. For example:
+ # /foo*/bar -> {'.': ['/foo*/bar']}
+ # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
+ # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
+ self._anchored = _defaultdict_tree()
+ for orig_index, pattern in enumerate(install_mask.split()):
+ # if pattern starts with -, possibly exclude this path
+ is_inclusive = not pattern.startswith('-')
+ if not is_inclusive:
+ pattern = pattern[1:]
+ pattern_obj = _pattern(orig_index, is_inclusive, pattern, pattern.startswith('/'))
+ # absolute path pattern
+ if pattern_obj.leading_slash:
+ current_dir = self._anchored
+ for component in list(filter(None, pattern.split('/'))):
+ if '*' in component:
+ break
+ else:
+ current_dir = current_dir[component]
+ current_dir.setdefault('.', []).append(pattern_obj)
+
+ # filename
+ else:
+ self._unanchored.append(pattern_obj)
+
+ def _iter_relevant_patterns(self, path):
+ """
+ Iterate over patterns that may be relevant for the given path.
+
+ Patterns anchored with leading / are indexed by leading
+ non-glob components, making it possible to minimize the
+ number of fnmatch calls.
+ """
+ current_dir = self._anchored
+ components = list(filter(None, path.split('/')))
+ patterns = []
+ patterns.extend(current_dir.get('.', []))
+ for component in components:
+ next_dir = current_dir.get(component, None)
+ if next_dir is None:
+ break
+ current_dir = next_dir
+ patterns.extend(current_dir.get('.', []))
+
+ if patterns:
+ # Sort by original pattern index, since order matters for
+ # non-inclusive patterns.
+ patterns.extend(self._unanchored)
+ if any(not pattern.is_inclusive for pattern in patterns):
+ patterns.sort(key=operator.attrgetter('orig_index'))
+ return iter(patterns)
+
+ return iter(self._unanchored)
def match(self, path):
"""
@@ -34,13 +106,11 @@ class InstallMask(object):
@return: True if path matches INSTALL_MASK, False otherwise
"""
ret = False
- for pattern in self._install_mask:
- # if pattern starts with -, possibly exclude this path
- is_inclusive = not pattern.startswith('-')
- if not is_inclusive:
- pattern = pattern[1:]
+
+ for pattern_obj in self._iter_relevant_patterns(path):
+ is_inclusive, pattern = pattern_obj.is_inclusive, pattern_obj.pattern
# absolute path pattern
- if pattern.startswith('/'):
+ if pattern_obj.leading_slash:
# handle trailing slash for explicit directory match
if path.endswith('/'):
pattern = pattern.rstrip('/') + '/'