diff options
author | Zac Medico <zmedico@gentoo.org> | 2021-02-20 15:11:46 -0800 |
---|---|---|
committer | Zac Medico <zmedico@gentoo.org> | 2021-02-22 03:48:41 -0800 |
commit | b9ef191c74982b0e8d837aa7dd256dc3c52f7d2c (patch) | |
tree | 3fc22eeaaab74207c1f9cd200851c23c4aaf901c /lib | |
parent | make.defaults: prevent USE="${USE} ..." misbehavior (diff) | |
download | portage-b9ef191c74982b0e8d837aa7dd256dc3c52f7d2c.tar.gz portage-b9ef191c74982b0e8d837aa7dd256dc3c52f7d2c.tar.bz2 portage-b9ef191c74982b0e8d837aa7dd256dc3c52f7d2c.zip |
MirrorLayoutConfig: content digest support (bug 756778)
In order to support mirror layouts that use content
digests, extend MirrorLayoutConfig validate_structure and
get_best_supported_layout methods to support an optional
filename parameter of type DistfileName which includes a digests
attribute. Use the new parameter to account for availablility
of specific distfile content digests when validating and selecting
mirror layouts which require those digests.
The DistfileName type represents a distfile name and associated
content digests, used by MirrorLayoutConfig and related layout
implementations.
The path of a distfile within a layout must be dependent on
nothing more than the distfile name and its associated content
digests. For filename-hash layout, path is dependent on distfile
name alone, and the get_filenames implementation yields strings
corresponding to distfile names. For content-hash layout, path is
dependent on content digest alone, and the get_filenames
implementation yields DistfileName instances whose names are equal
to content digest values. The content-hash layout simply lacks
the filename-hash layout's innate ability to translate a distfile
path to a distfile name, and instead caries an innate ability
to translate a distfile path to a content digest.
In order to prepare for a migration from filename-hash to
content-hash layout, all consumers of the layout get_filenames
method need to be updated to work with content digests as a
substitute for distfile names. For example, in order to prepare
emirrordist for content-hash, a key-value store needs to be
added as a means to associate distfile names with content
digest values yielded by the content-hash get_filenames
implementation.
Bug: https://bugs.gentoo.org/756778
Signed-off-by: Zac Medico <zmedico@gentoo.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/portage/package/ebuild/fetch.py | 98 | ||||
-rw-r--r-- | lib/portage/tests/ebuild/test_fetch.py | 33 |
2 files changed, 114 insertions, 17 deletions
diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py index e0fecaf23..af9edd91e 100644 --- a/lib/portage/package/ebuild/fetch.py +++ b/lib/portage/package/ebuild/fetch.py @@ -1,4 +1,4 @@ -# Copyright 2010-2020 Gentoo Authors +# Copyright 2010-2021 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 __all__ = ['fetch'] @@ -344,6 +344,57 @@ _size_suffix_map = { } +class DistfileName(str): + """ + The DistfileName type represents a distfile name and associated + content digests, used by MirrorLayoutConfig and related layout + implementations. + + The path of a distfile within a layout must be dependent on + nothing more than the distfile name and its associated content + digests. For filename-hash layout, path is dependent on distfile + name alone, and the get_filenames implementation yields strings + corresponding to distfile names. For content-hash layout, path is + dependent on content digest alone, and the get_filenames + implementation yields DistfileName instances whose names are equal + to content digest values. The content-hash layout simply lacks + the filename-hash layout's innate ability to translate a distfile + path to a distfile name, and instead caries an innate ability + to translate a distfile path to a content digest. + + In order to prepare for a migration from filename-hash to + content-hash layout, all consumers of the layout get_filenames + method need to be updated to work with content digests as a + substitute for distfile names. For example, in order to prepare + emirrordist for content-hash, a key-value store needs to be + added as a means to associate distfile names with content + digest values yielded by the content-hash get_filenames + implementation. + """ + def __new__(cls, s, digests=None): + return str.__new__(cls, s) + + def __init__(self, s, digests=None): + super().__init__() + self.digests = {} if digests is None else digests + + def digests_equal(self, other): + """ + Test if digests compare equal to those of another instance. + """ + if not isinstance(other, DistfileName): + return False + matches = [] + for algo, digest in self.digests.items(): + other_digest = other.digests.get(algo) + if other_digest is not None: + if other_digest == digest: + matches.append(algo) + else: + return False + return bool(matches) + + class FlatLayout: def get_path(self, filename): return filename @@ -439,19 +490,36 @@ class MirrorLayoutConfig: self.structure = data @staticmethod - def validate_structure(val): + def validate_structure(val, filename=None): + """ + If the filename argument is given, then supported hash + algorithms are constrained by digests available in the filename + digests attribute. + + @param val: layout.conf entry args + @param filename: filename with digests attribute + @return: True if args are valid for available digest algorithms, + and False otherwise + """ if val[0] == 'flat': return FlatLayout.verify_args(val) - if val[0] == 'filename-hash': + elif val[0] == 'filename-hash': return FilenameHashLayout.verify_args(val) return False - def get_best_supported_layout(self): + def get_best_supported_layout(self, filename=None): + """ + If the filename argument is given, then acceptable hash + algorithms are constrained by digests available in the filename + digests attribute. + + @param filename: filename with digests attribute + """ for val in self.structure: - if self.validate_structure(val): + if self.validate_structure(val, filename=filename): if val[0] == 'flat': return FlatLayout(*val[1:]) - if val[0] == 'filename-hash': + elif val[0] == 'filename-hash': return FilenameHashLayout(*val[1:]) # fallback return FlatLayout() @@ -515,7 +583,7 @@ def get_mirror_url(mirror_url, filename, mysettings, cache_path=None): # For some protocols, urlquote is required for correct behavior, # and it must not be used for other protocols like rsync and sftp. - path = mirror_conf.get_best_supported_layout().get_path(filename) + path = mirror_conf.get_best_supported_layout(filename=filename).get_path(filename) if urlparse(mirror_url).scheme in ('ftp', 'http', 'https'): path = urlquote(path) return mirror_url + "/distfiles/" + path @@ -722,15 +790,23 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, if hasattr(myuris, 'items'): for myfile, uri_set in myuris.items(): for myuri in uri_set: - file_uri_tuples.append((myfile, myuri)) + file_uri_tuples.append( + (DistfileName(myfile, digests=mydigests.get(myfile)), myuri) + ) if not uri_set: - file_uri_tuples.append((myfile, None)) + file_uri_tuples.append( + (DistfileName(myfile, digests=mydigests.get(myfile)), None) + ) else: for myuri in myuris: if urlparse(myuri).scheme: - file_uri_tuples.append((os.path.basename(myuri), myuri)) + file_uri_tuples.append( + (DistfileName(myfile, digests=mydigests.get(myfile)), myuri) + ) else: - file_uri_tuples.append((os.path.basename(myuri), None)) + file_uri_tuples.append( + (DistfileName(myfile, digests=mydigests.get(myfile)), None) + ) filedict = OrderedDict() primaryuri_dict = {} diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py index c5ea8253b..b88ae3efb 100644 --- a/lib/portage/tests/ebuild/test_fetch.py +++ b/lib/portage/tests/ebuild/test_fetch.py @@ -7,7 +7,8 @@ import tempfile import portage from portage import shutil, os -from portage.const import BASH_BINARY, PORTAGE_PYM_PATH +from portage.checksum import checksum_str +from portage.const import BASH_BINARY, MANIFEST2_HASH_DEFAULTS, PORTAGE_PYM_PATH from portage.tests import TestCase from portage.tests.resolver.ResolverPlayground import ResolverPlayground from portage.tests.util.test_socks5 import AsyncHTTPServer @@ -18,8 +19,14 @@ from portage.util._async.SchedulerInterface import SchedulerInterface from portage.util._eventloop.global_event_loop import global_event_loop from portage.package.ebuild.config import config from portage.package.ebuild.digestgen import digestgen -from portage.package.ebuild.fetch import (_download_suffix, fetch, FlatLayout, - FilenameHashLayout, MirrorLayoutConfig) +from portage.package.ebuild.fetch import ( + DistfileName, + _download_suffix, + fetch, + FilenameHashLayout, + FlatLayout, + MirrorLayoutConfig, +) from _emerge.EbuildFetcher import EbuildFetcher from _emerge.Package import Package @@ -142,9 +149,14 @@ class EbuildFetchTestCase(TestCase): content["/distfiles/layout.conf"] = layout_data.encode("utf8") for k, v in distfiles.items(): + filename = DistfileName( + k, + digests=dict((algo, checksum_str(v, hashname=algo)) for algo in MANIFEST2_HASH_DEFAULTS), + ) + # mirror path for layout in layouts: - content["/distfiles/" + layout.get_path(k)] = v + content["/distfiles/" + layout.get_path(filename)] = v # upstream path content["/distfiles/{}.txt".format(k)] = v @@ -499,6 +511,10 @@ class EbuildFetchTestCase(TestCase): io.StringIO(conf)) def test_filename_hash_layout_get_filenames(self): + filename = DistfileName( + 'foo-1.tar.gz', + digests=dict((algo, checksum_str(b'', hashname=algo)) for algo in MANIFEST2_HASH_DEFAULTS), + ) layouts = ( FlatLayout(), FilenameHashLayout('SHA1', '4'), @@ -506,7 +522,6 @@ class EbuildFetchTestCase(TestCase): FilenameHashLayout('SHA1', '8:16'), FilenameHashLayout('SHA1', '8:16:24'), ) - filename = 'foo-1.tar.gz' for layout in layouts: distdir = tempfile.mkdtemp() @@ -520,6 +535,12 @@ class EbuildFetchTestCase(TestCase): with open(path, 'wb') as f: pass - self.assertEqual([filename], list(layout.get_filenames(distdir))) + file_list = list(layout.get_filenames(distdir)) + self.assertTrue(len(file_list) > 0) + for filename_result in file_list: + if isinstance(filename_result, DistfileName): + self.assertTrue(filename_result.digests_equal(filename)) + else: + self.assertEqual(filename_result, str(filename)) finally: shutil.rmtree(distdir) |