aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Robbins <drobbins@funtoo.org>2017-10-14 17:38:05 -0600
committerZac Medico <zmedico@gentoo.org>2017-10-14 17:53:40 -0700
commitc5a2a0edc4f4b01b16a274268431fa21f7f678b2 (patch)
treeda18689e723369b115621770fab74928f24d2d55
parentman: Update URI of GLEP references. (diff)
downloadportage-c5a2a0ed.tar.gz
portage-c5a2a0ed.tar.bz2
portage-c5a2a0ed.zip
portdbapi: factor out _better_cache class
Better_cache -- now even better :) This version only scans individual categories on-demand. I have addressed concerns about PMS-compliance by enhancing the documentation so that developers are aware of what assumptions to make (and not make) when using better_cache. Closes: https://github.com/gentoo/portage/pull/219
-rw-r--r--pym/portage/dbapi/porttree.py124
1 files changed, 71 insertions, 53 deletions
diff --git a/pym/portage/dbapi/porttree.py b/pym/portage/dbapi/porttree.py
index 53edcd18f..f5979d2d0 100644
--- a/pym/portage/dbapi/porttree.py
+++ b/pym/portage/dbapi/porttree.py
@@ -16,7 +16,7 @@ portage.proxy.lazyimport.lazyimport(globals(),
'portage.package.ebuild.doebuild:doebuild',
'portage.util:ensure_dirs,shlex_split,writemsg,writemsg_level',
'portage.util.listdir:listdir',
- 'portage.versions:best,catpkgsplit,_pkgsplit@pkgsplit,ver_regexp,_pkg_str',
+ 'portage.versions:best,catsplit,catpkgsplit,_pkgsplit@pkgsplit,ver_regexp,_pkg_str',
)
from portage.cache import volatile
@@ -103,6 +103,68 @@ class _dummy_list(list):
except ValueError:
pass
+
+class _better_cache(object):
+
+ """
+ The purpose of better_cache is to locate catpkgs in repositories using ``os.listdir()`` as much as possible, which
+ is less expensive IO-wise than exhaustively doing a stat on each repo for a particular catpkg. better_cache stores a
+ list of repos in which particular catpkgs appear. Various dbapi methods use better_cache to locate repositories of
+ interest related to particular catpkg rather than performing an exhaustive scan of all repos/overlays.
+
+ Better_cache.items data may look like this::
+
+ { "sys-apps/portage" : [ repo1, repo2 ] }
+
+ Without better_cache, Portage will get slower and slower (due to excessive IO) as more overlays are added.
+
+ Also note that it is OK if this cache has some 'false positive' catpkgs in it. We use it to search for specific
+ catpkgs listed in ebuilds. The likelihood of a false positive catpkg in our cache causing a problem is extremely
+ low, because the user of our cache is passing us a catpkg that came from somewhere and has already undergone some
+ validation, and even then will further interrogate the short-list of repos we return to gather more information
+ on the catpkg.
+
+ Thus, the code below is optimized for speed rather than painstaking correctness. I have added a note to
+ ``dbapi.getRepositories()`` to ensure that developers are aware of this just in case.
+
+ The better_cache has been redesigned to perform on-demand scans -- it will only scan a category at a time, as
+ needed. This should further optimize IO performance by not scanning category directories that are not needed by
+ Portage.
+ """
+
+ def __init__(self, repositories):
+ self._items = collections.defaultdict(list)
+ self._scanned_cats = set()
+
+ # ordered list of all portree locations we'll scan:
+ self._repo_list = [repo for repo in reversed(list(repositories))
+ if repo.location is not None]
+
+ def __getitem__(self, catpkg):
+ result = self._items.get(catpkg)
+ if result is not None:
+ return result
+
+ cat, pkg = catsplit(catpkg)
+ if cat not in self._scanned_cats:
+ self._scan_cat(cat)
+ return self._items[catpkg]
+
+ def _scan_cat(self, cat):
+ for repo in self._repo_list:
+ cat_dir = repo.location + "/" + cat
+ try:
+ pkg_list = os.listdir(cat_dir)
+ except OSError as e:
+ if e.errno not in (errno.ENOTDIR, errno.ENOENT, errno.ESTALE):
+ raise
+ continue
+ for p in pkg_list:
+ if os.path.isdir(cat_dir + "/" + p):
+ self._items[cat + "/" + p].append(repo)
+ self._scanned_cats.add(cat)
+
+
class portdbapi(dbapi):
"""this tree will scan a portage directory located at root (passed to init)"""
portdbapi_instances = _dummy_list()
@@ -346,11 +408,14 @@ class portdbapi(dbapi):
return None
def getRepositories(self, catpkg=None):
+
"""
With catpkg=None, this will return a complete list of repositories in this dbapi. With catpkg set to a value,
this method will return a short-list of repositories that contain this catpkg. Use this second approach if
possible, to avoid exhaustively searching all repos for a particular catpkg. It's faster for this method to
- find the catpkg than for you do it yourself.
+ find the catpkg than for you do it yourself. When specifying catpkg, you should have reasonable assurance that
+ the category is valid and PMS-compliant as the caching mechanism we use does not perform validation checks for
+ categories.
This function is required for GLEP 42 compliance.
@@ -358,7 +423,8 @@ class portdbapi(dbapi):
catpkg; if None, return a list of all Repositories that contain a particular catpkg.
@return: a list of repositories.
"""
- if catpkg is not None and self._better_cache is not None and catpkg in self._better_cache:
+
+ if catpkg is not None and self._better_cache is not None:
return [repo.name for repo in self._better_cache[catpkg]]
return self._ordered_repo_name_list
@@ -796,12 +862,7 @@ class portdbapi(dbapi):
elif self._better_cache is None:
mytrees = self.porttrees
else:
- try:
- repos = self._better_cache[mycp]
- except KeyError:
- mytrees = []
- else:
- mytrees = [repo.location for repo in repos]
+ mytrees = [repo.location for repo in self._better_cache[mycp]]
for oroot in mytrees:
try:
file_list = os.listdir(os.path.join(oroot, mycp))
@@ -850,50 +911,7 @@ class portdbapi(dbapi):
"minimum-all-ignore-profile", "minimum-visible"):
self.xcache[x]={}
self.frozen=1
- self._better_cache = better_cache = collections.defaultdict(list)
-
- # The purpose of self._better_cache is to perform an initial quick scan of all repositories
- # using os.listdir(), which is less expensive IO-wise than exhaustively doing a stat on each
- # repo. self._better_cache stores a list of repos in which particular catpkgs appear.
- #
- # For example, better_cache data may look like this:
- #
- # { "sys-apps/portage" : [ repo1, repo2 ] }
- #
- # Without this tweak, Portage will get slower and slower as more overlays are added.
- #
- # Also note that it is OK if this cache has some 'false positive' catpkgs in it. We use it
- # to search for specific catpkgs listed in ebuilds. The likelihood of a false positive catpkg
- # in our cache causing a problem is extremely low. Thus, the code below is optimized for
- # speed rather than painstaking correctness.
-
- valid_categories = self.settings.categories
- for repo_loc in reversed(self.porttrees):
- repo = self.repositories.get_repo_for_location(repo_loc)
- try:
- categories = os.listdir(repo_loc)
- except OSError as e:
- if e.errno not in (errno.ENOTDIR, errno.ENOENT, errno.ESTALE):
- raise
- continue
-
- for cat in categories:
- if cat not in valid_categories:
- continue
- cat_dir = repo_loc + "/" + cat
- try:
- pkg_list = os.listdir(cat_dir)
- except OSError as e:
- if e.errno != errno.ENOTDIR:
- raise
- continue
-
- for p in pkg_list:
- catpkg_dir = cat_dir + "/" + p
- if not os.path.isdir(catpkg_dir):
- continue
- catpkg = cat + "/" + p
- better_cache[catpkg].append(repo)
+ self._better_cache = _better_cache(self.repositories)
def melt(self):
self.xcache = {}