diff options
Diffstat (limited to 'lib/portage/sync')
22 files changed, 944 insertions, 239 deletions
diff --git a/lib/portage/sync/controller.py b/lib/portage/sync/controller.py index 987aa5481..1d55c8a5d 100644 --- a/lib/portage/sync/controller.py +++ b/lib/portage/sync/controller.py @@ -1,4 +1,4 @@ -# Copyright 2014-2020 Gentoo Authors +# Copyright 2014-2024 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 import sys @@ -8,6 +8,11 @@ import pwd import warnings import portage + +portage.proxy.lazyimport.lazyimport( + globals(), + "portage.sync.revision_history:get_repo_revision_history", +) from portage import os from portage.progress import ProgressBar @@ -89,13 +94,20 @@ class SyncManager: # files have sane permissions. os.umask(0o22) - self.module_controller = portage.sync.module_controller - self.module_names = self.module_controller.module_names self.hooks = {} for _dir in ["repo.postsync.d", "postsync.d"]: hooks = get_hooks_from_dir(_dir, prefix=self.settings["PORTAGE_CONFIGROOT"]) self.hooks[_dir] = hooks + @property + def module_controller(self): + # Not stored as local attribute because it's not picklable. + return portage.sync.module_controller + + @property + def module_names(self): + return self.module_controller.module_names + def __getattr__(self, name): if name == "async": warnings.warn( @@ -137,7 +149,7 @@ class SyncManager: if repo.sync_type in self.module_names: tasks = [self.module_controller.get_class(repo.sync_type)] else: - msg = "\n%s: Sync module '%s' is not an installed/known type'\n" % ( + msg = "\n{}: Sync module '{}' is not an installed/known type'\n".format( bad("ERROR"), repo.sync_type, ) @@ -163,6 +175,7 @@ class SyncManager: status = None taskmaster = TaskHandler(callback=self.do_callback) taskmaster.run_tasks(tasks, func, status, options=task_opts) + get_repo_revision_history(self.settings["EROOT"], [repo]) if master_hooks or self.updatecache_flg or not repo.sync_hooks_only_on_change: hooks_enabled = True @@ -176,7 +189,7 @@ class SyncManager: self.exitcode = exitcode self.updatecache_flg = updatecache_flg if exitcode == 0: - msg = "=== Sync completed for %s" % self.repo.name + msg = f"=== Sync completed for {self.repo.name}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n") if self.callback: @@ -190,7 +203,7 @@ class SyncManager: _hooks = self.hooks["postsync.d"] for filepath in _hooks: writemsg_level( - "Spawning post_sync hook: %s\n" % (_unicode_decode(_hooks[filepath])), + f"Spawning post_sync hook: {_unicode_decode(_hooks[filepath])}\n", level=logging.ERROR, noiselevel=4, ) @@ -212,7 +225,7 @@ class SyncManager: return succeeded def pre_sync(self, repo): - msg = ">>> Syncing repository '%s' into '%s'..." % (repo.name, repo.location) + msg = f">>> Syncing repository '{repo.name}' into '{repo.location}'..." self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n") try: @@ -248,7 +261,7 @@ class SyncManager: pw = pwd.getpwuid(int(username)) except (ValueError, KeyError): writemsg( - "!!! User '%s' invalid or does not exist\n" % username, + f"!!! User '{username}' invalid or does not exist\n", noiselevel=-1, ) return (logname, user, group, home) @@ -266,7 +279,7 @@ class SyncManager: pw = grp.getgrgid(int(groupname)) except (ValueError, KeyError): writemsg( - "!!! Group '%s' invalid or does not exist\n" % groupname, + f"!!! Group '{groupname}' invalid or does not exist\n", noiselevel=-1, ) return (logname, user, group, home) @@ -364,7 +377,6 @@ class SyncManager: if updatecache_flg and os.path.exists( os.path.join(repo.location, "metadata", "md5-cache") ): - # Only update cache for repo.location since that's # the only one that's been synced here. action_metadata( diff --git a/lib/portage/sync/meson.build b/lib/portage/sync/meson.build new file mode 100644 index 000000000..59af12561 --- /dev/null +++ b/lib/portage/sync/meson.build @@ -0,0 +1,15 @@ +py.install_sources( + [ + 'config_checks.py', + 'controller.py', + 'getaddrinfo_validate.py', + 'old_tree_timestamp.py', + 'revision_history.py', + 'syncbase.py', + '__init__.py', + ], + subdir : 'portage/sync', + pure : not native_extensions +) + +subdir('modules') diff --git a/lib/portage/sync/modules/cvs/cvs.py b/lib/portage/sync/modules/cvs/cvs.py index 722f54ab4..e2e3a38a8 100644 --- a/lib/portage/sync/modules/cvs/cvs.py +++ b/lib/portage/sync/modules/cvs/cvs.py @@ -41,7 +41,7 @@ class CVSSync(NewBase): self.repo.module_specific_options["sync-cvs-repo"] ), ), - **self.spawn_kwargs + **self.spawn_kwargs, ) != os.EX_OK ): @@ -64,7 +64,7 @@ class CVSSync(NewBase): exitcode = portage.process.spawn_bash( "cd %s; exec cvs -z0 -q update -dP" % (portage._shell_quote(self.repo.location),), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: msg = "!!! cvs update error; exiting." diff --git a/lib/portage/sync/modules/cvs/meson.build b/lib/portage/sync/modules/cvs/meson.build new file mode 100644 index 000000000..cdf54e9bd --- /dev/null +++ b/lib/portage/sync/modules/cvs/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'cvs.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/cvs', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/git/__init__.py b/lib/portage/sync/modules/git/__init__.py index ef32a9da0..121494215 100644 --- a/lib/portage/sync/modules/git/__init__.py +++ b/lib/portage/sync/modules/git/__init__.py @@ -77,6 +77,7 @@ module_spec = { "sync-git-pull-env", "sync-git-pull-extra-opts", "sync-git-verify-commit-signature", + "sync-git-verify-max-age-days", ), } }, diff --git a/lib/portage/sync/modules/git/git.py b/lib/portage/sync/modules/git/git.py index 98670e1f9..a06ca60a8 100644 --- a/lib/portage/sync/modules/git/git.py +++ b/lib/portage/sync/modules/git/git.py @@ -1,15 +1,18 @@ -# Copyright 2005-2020 Gentoo Authors +# Copyright 2005-2023 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 -import io import logging +import re +import shlex import subprocess +import datetime import portage from portage import os -from portage.util import writemsg_level, shlex_split +from portage.util import writemsg_level from portage.util.futures import asyncio from portage.output import create_color_func, EOutput +from portage.const import TIMESTAMP_FORMAT good = create_color_func("GOOD") bad = create_color_func("BAD") @@ -35,11 +38,11 @@ class GitSync(NewBase): def __init__(self): NewBase.__init__(self, "git", portage.const.GIT_PACKAGE_ATOM) - def exists(self, **kwargs): + def exists(self, **kwargs) -> bool: """Tests whether the repo actually exists""" return os.path.exists(os.path.join(self.repo.location, ".git")) - def new(self, **kwargs): + def new(self, **kwargs) -> tuple[int, bool]: """Do the initial clone of the repository""" if kwargs: self._kwargs(kwargs) @@ -49,9 +52,9 @@ class GitSync(NewBase): if not os.path.exists(self.repo.location): os.makedirs(self.repo.location) self.logger( - self.xterm_titles, "Created new directory %s" % self.repo.location + self.xterm_titles, f"Created new directory {self.repo.location}" ) - except IOError: + except OSError: return (1, False) sync_uri = self.repo.sync_uri @@ -60,23 +63,23 @@ class GitSync(NewBase): git_cmd_opts = "" if self.repo.module_specific_options.get("sync-git-env"): - shlexed_env = shlex_split(self.repo.module_specific_options["sync-git-env"]) - env = dict( - (k, v) + shlexed_env = shlex.split(self.repo.module_specific_options["sync-git-env"]) + env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(env) if self.repo.module_specific_options.get("sync-git-clone-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-git-clone-env"] ) - clone_env = dict( - (k, v) + clone_env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(clone_env) if self.settings.get("PORTAGE_QUIET") == "1": @@ -84,18 +87,15 @@ class GitSync(NewBase): if self.repo.clone_depth is not None: if self.repo.clone_depth != 0: git_cmd_opts += " --depth %d" % self.repo.clone_depth - elif self.repo.sync_depth is not None: - if self.repo.sync_depth != 0: - git_cmd_opts += " --depth %d" % self.repo.sync_depth else: # default git_cmd_opts += " --depth 1" if self.repo.module_specific_options.get("sync-git-clone-extra-opts"): git_cmd_opts += ( - " %s" % self.repo.module_specific_options["sync-git-clone-extra-opts"] + f" {self.repo.module_specific_options['sync-git-clone-extra-opts']}" ) - git_cmd = "%s clone%s %s ." % ( + git_cmd = "{} clone{} {} .".format( self.bin_command, git_cmd_opts, portage._shell_quote(sync_uri), @@ -103,19 +103,38 @@ class GitSync(NewBase): writemsg_level(git_cmd + "\n") exitcode = portage.process.spawn_bash( - "cd %s ; exec %s" % (portage._shell_quote(self.repo.location), git_cmd), - **self.spawn_kwargs + f"cd {portage._shell_quote(self.repo.location)} ; exec {git_cmd}", + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! git clone error in %s" % self.repo.location + msg = f"!!! git clone error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) + + self.add_safe_directory() + if not self.verify_head(): return (1, False) + return (os.EX_OK, True) - def update(self): + def _gen_ceiling_string(self, path: str) -> str: + """ + Iteratively generate a colon delimited string of all of the + given path's parents, for use with GIT_CEILING_DIRECTORIES + """ + directories = [] + + while True: + if path == "/": + break + path = os.path.dirname(path) + directories.append(path) + + return ":".join(directories) + + def update(self) -> tuple[int, bool]: """Update existing git repository, and ignore the syncuri. We are going to trust the user and assume that the user is in the branch that he/she wants updated. We'll let the user manage branches with @@ -123,33 +142,128 @@ class GitSync(NewBase): """ if not self.has_bin: return (1, False) + + opts = self.options.get("emerge_config").opts + git_cmd_opts = "" quiet = self.settings.get("PORTAGE_QUIET") == "1" + verbose = "--verbose" in opts + + # We don't want to operate with a .git outside of the given + # repo in any circumstances. + self.spawn_kwargs["env"].update( + {"GIT_CEILING_DIRECTORIES": self._gen_ceiling_string(self.repo.location)} + ) + + self.add_safe_directory() + if self.repo.module_specific_options.get("sync-git-env"): - shlexed_env = shlex_split(self.repo.module_specific_options["sync-git-env"]) - env = dict( - (k, v) + shlexed_env = shlex.split(self.repo.module_specific_options["sync-git-env"]) + env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(env) if self.repo.module_specific_options.get("sync-git-pull-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-git-pull-env"] ) - pull_env = dict( - (k, v) + pull_env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(pull_env) - if self.settings.get("PORTAGE_QUIET") == "1": + if quiet: git_cmd_opts += " --quiet" + elif verbose: + git_cmd_opts += " --verbose" + + # The logic here is a bit delicate. We need to balance two things: + # 1. Having a robust sync mechanism which works unattended. + # 2. Allowing users to have the flexibility they might expect when using + # a git repository in repos.conf for syncing. + # + # For sync-type=git repositories, we've seen a problem in the wild + # where shallow clones end up "breaking themselves" especially when + # the origin is behind a CDN. 'git pull' might return state X, + # but on a subsequent pull, return state X-1. git will then (sometimes) + # leave orphaned untracked files in the repository. On a subsequent pull, + # when state >= X is returned where those files exist in the origin, + # git then refuses to write over them and aborts to avoid clobbering + # local work. + # + # To mitigate this, Portage will aggressively clobber any changes + # in the local directory, as its priority is to keep syncing working, + # by running 'git clean' and 'git reset --hard'. + # + # Portage performs this clobbering if: + # 1. sync-type=git + # 2. + # - volatile=no (explicitly set to no), OR + # - volatile is unset AND the repository owner is either root or portage + # 3. Portage is syncing the repository (rather than e.g. auto-sync=no + # and never running 'emaint sync -r foo') + # + # Portage will not clobber if: + # 1. volatile=yes (explicitly set in the config), OR + # 2. volatile is unset and the repository owner is neither root nor + # portage. + # + # 'volatile' refers to whether the repository is volatile and may + # only be safely changed by Portage itself, i.e. whether Portage + # should expect the user to change it or not. + # + # - volatile=yes: + # The repository is volatile and may be changed at any time by the user. + # Portage will not perform destructive operations on the repository. + # - volatile=no + # The repository is not volatile. Only Portage may modify the + # repository. User changes may be lost. + # Portage may perform destructive operations on the repository + # to keep sync working. + # + # References: + # bug #887025 + # bug #824782 + # https://archives.gentoo.org/gentoo-dev/message/f58a97027252458ad0a44090a2602897 + + # Default: Perform shallow updates (but only if the target is + # already a shallow repository). + sync_depth = 1 + if self.repo.sync_depth is not None: + sync_depth = self.repo.sync_depth + else: + if self.repo.volatile: + # If sync-depth is not explicitly set by the user, + # then check if the target repository is already a + # shallow one. And do not perform a shallow update if + # the target repository is not shallow. + is_shallow_cmd = ["git", "rev-parse", "--is-shallow-repository"] + is_shallow_res = portage._unicode_decode( + subprocess.check_output( + is_shallow_cmd, + cwd=portage._unicode_encode(self.repo.location), + ) + ).rstrip("\n") + if is_shallow_res == "false": + sync_depth = 0 + else: + # If the repository is marked as non-volatile, we assume + # it's fine to Portage to do what it wishes to it. + sync_depth = 1 + + shallow = False + if sync_depth > 0: + git_cmd_opts += f" --depth {sync_depth}" + shallow = True + if self.repo.module_specific_options.get("sync-git-pull-extra-opts"): git_cmd_opts += ( - " %s" % self.repo.module_specific_options["sync-git-pull-extra-opts"] + f" {self.repo.module_specific_options['sync-git-pull-extra-opts']}" ) try: @@ -166,15 +280,12 @@ class GitSync(NewBase): ) ).rstrip("\n") except subprocess.CalledProcessError as e: - msg = "!!! git rev-parse error in %s" % self.repo.location + msg = f"!!! git rev-parse error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (e.returncode, False) - shallow = self.repo.sync_depth is not None and self.repo.sync_depth != 0 if shallow: - git_cmd_opts += " --depth %d" % self.repo.sync_depth - # For shallow fetch, unreachable objects may need to be pruned # manually, in order to prevent automatic git gc calls from # eventually failing (see bug 599008). @@ -184,21 +295,49 @@ class GitSync(NewBase): exitcode = portage.process.spawn( gc_cmd, cwd=portage._unicode_encode(self.repo.location), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! git gc error in %s" % self.repo.location + msg = f"!!! git gc error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) - git_cmd = "%s fetch %s%s" % ( - self.bin_command, - remote_branch.partition("/")[0], - git_cmd_opts, - ) + git_remote = remote_branch.partition("/")[0] - writemsg_level(git_cmd + "\n") + if not self.repo.volatile: + git_get_remote_url_cmd = ["git", "ls-remote", "--get-url", git_remote] + git_remote_url = portage._unicode_decode( + subprocess.check_output( + git_get_remote_url_cmd, + cwd=portage._unicode_encode(self.repo.location), + ) + ).strip() + if git_remote_url != self.repo.sync_uri: + git_set_remote_url_cmd = [ + "git", + "remote", + "set-url", + git_remote, + self.repo.sync_uri, + ] + exitcode = portage.process.spawn( + git_set_remote_url_cmd, + cwd=portage._unicode_encode(self.repo.location), + **self.spawn_kwargs, + ) + if exitcode != os.EX_OK: + msg = f"!!! could not update git remote {git_remote}'s url to {self.repo.sync_uri}" + self.logger(self.xterm_titles, msg) + writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) + return (exitcode, False) + elif not quiet: + writemsg_level(" ".join(git_set_remote_url_cmd) + "\n") + + git_cmd = f"{self.bin_command} fetch {git_remote}{git_cmd_opts}" + + if not quiet: + writemsg_level(git_cmd + "\n") rev_cmd = [self.bin_command, "rev-list", "--max-count=1", "HEAD"] previous_rev = subprocess.check_output( @@ -206,39 +345,92 @@ class GitSync(NewBase): ) exitcode = portage.process.spawn_bash( - "cd %s ; exec %s" % (portage._shell_quote(self.repo.location), git_cmd), - **self.spawn_kwargs + f"cd {portage._shell_quote(self.repo.location)} ; exec {git_cmd}", + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! git fetch error in %s" % self.repo.location + msg = f"!!! git fetch error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) - if not self.verify_head(revision="refs/remotes/%s" % remote_branch): + if not self.verify_head(revision=f"refs/remotes/{remote_branch}"): return (1, False) - if shallow: + if not self.repo.volatile: + # Clean up the repo before trying to sync to upstream. + # - Only done for volatile=false repositories to avoid losing + # data. + # - This is needed to avoid orphaned files preventing further syncs + # on shallow clones. + clean_cmd = [self.bin_command, "clean", "--force", "-d", "-x"] + + if quiet: + clean_cmd.append("--quiet") + + exitcode = portage.process.spawn( + clean_cmd, + cwd=portage._unicode_encode(self.repo.location), + **self.spawn_kwargs, + ) + + if exitcode != os.EX_OK: + msg = f"!!! git clean error in {self.repo.location}" + self.logger(self.xterm_titles, msg) + writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) + return (exitcode, False) + + # `git diff --quiet` returns 0 on a clean tree and 1 otherwise + is_clean = ( + portage.process.spawn( + f"{self.bin_command} diff --quiet", + cwd=portage._unicode_encode(self.repo.location), + **self.spawn_kwargs, + ) + == 0 + ) + + if not is_clean and not self.repo.volatile: + # If the repo isn't clean, clobber any changes for parity + # with rsync. Only do this for non-volatile repositories. + merge_cmd = [self.bin_command, "reset", "--hard"] + elif shallow: # Since the default merge strategy typically fails when # the depth is not unlimited, `git reset --merge`. merge_cmd = [self.bin_command, "reset", "--merge"] else: merge_cmd = [self.bin_command, "merge"] - merge_cmd.append("refs/remotes/%s" % remote_branch) + + merge_cmd.append(f"refs/remotes/{remote_branch}") if quiet: merge_cmd.append("--quiet") + + if not quiet: + writemsg_level(" ".join(merge_cmd) + "\n") + exitcode = portage.process.spawn( merge_cmd, cwd=portage._unicode_encode(self.repo.location), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! git merge error in %s" % self.repo.location - self.logger(self.xterm_titles, msg) - writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) - return (exitcode, False) + if not self.repo.volatile: + # HACK - sometimes merging results in a tree diverged from + # upstream, so try to hack around it + # https://stackoverflow.com/questions/41075972/how-to-update-a-git-shallow-clone/41081908#41081908 + exitcode = portage.process.spawn( + f"{self.bin_command} reset --hard refs/remotes/{remote_branch}", + cwd=portage._unicode_encode(self.repo.location), + **self.spawn_kwargs, + ) + + if exitcode != os.EX_OK: + msg = f"!!! git merge error in {self.repo.location}" + self.logger(self.xterm_titles, msg) + writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) + return (exitcode, False) current_rev = subprocess.check_output( rev_cmd, cwd=portage._unicode_encode(self.repo.location) @@ -246,7 +438,53 @@ class GitSync(NewBase): return (os.EX_OK, current_rev != previous_rev) - def verify_head(self, revision="-1"): + def verify_head(self, revision="-1") -> bool: + max_age_days = self.repo.module_specific_options.get( + "sync-git-verify-max-age-days", "" + ) + if max_age_days: + try: + max_age_days = int(max_age_days) + if max_age_days <= 0: + raise ValueError(max_age_days) + except ValueError: + writemsg_level( + f"!!! sync-git-max-age-days must be a positive non-zero integer: {max_age_days}\n", + level=logging.ERROR, + noiselevel=-1, + ) + return False + show_timestamp_chk_file_cmd = [ + self.bin_command, + "show", + f"{revision}:metadata/timestamp.chk", + ] + try: + timestamp_chk = portage._unicode_decode( + subprocess.check_output( + show_timestamp_chk_file_cmd, + cwd=portage._unicode_encode(self.repo.location), + ) + ).strip() + except subprocess.CalledProcessError as e: + writemsg_level( + f"!!! {show_timestamp_chk_file_cmd} failed with {e.returncode}", + level=logging.ERROR, + noiselevel=-1, + ) + return False + timestamp = datetime.datetime.strptime(timestamp_chk, TIMESTAMP_FORMAT) + max_timestamp_age = datetime.datetime.now() - datetime.timedelta( + days=max_age_days + ) + if timestamp < max_timestamp_age: + writemsg_level( + f"!!! timestamp (from timestamp.chk) {timestamp} is older than max age {max_timestamp_age}\n", + level=logging.ERROR, + noiselevel=-1, + ) + return False + if self.repo.module_specific_options.get( "sync-git-verify-commit-signature", "false" ).lower() not in ("true", "yes"): @@ -260,21 +498,30 @@ class GitSync(NewBase): ) return False - openpgp_env = self._get_openpgp_env(self.repo.sync_openpgp_key_path) + opts = self.options.get("emerge_config").opts + debug = "--debug" in opts + quiet = self.settings.get("PORTAGE_QUIET") == "1" + verbose = "--verbose" in opts + + openpgp_env = self._get_openpgp_env(self.repo.sync_openpgp_key_path, debug) + + if debug: + old_level = logging.getLogger().getEffectiveLevel() + logging.getLogger().setLevel(logging.DEBUG) + logging.getLogger("gemato").setLevel(logging.DEBUG) try: out = EOutput() env = None if openpgp_env is not None and self.repo.sync_openpgp_key_path is not None: try: - out.einfo("Using keys from %s" % (self.repo.sync_openpgp_key_path,)) - with io.open(self.repo.sync_openpgp_key_path, "rb") as f: + out.einfo(f"Using keys from {self.repo.sync_openpgp_key_path}") + with open(self.repo.sync_openpgp_key_path, "rb") as f: openpgp_env.import_key(f) self._refresh_keys(openpgp_env) except (GematoException, asyncio.TimeoutError) as e: writemsg_level( - "!!! Verification impossible due to keyring problem:\n%s\n" - % (e,), + f"!!! Verification impossible due to keyring problem:\n{e}\n", level=logging.ERROR, noiselevel=-1, ) @@ -283,51 +530,84 @@ class GitSync(NewBase): env = os.environ.copy() env["GNUPGHOME"] = openpgp_env.home - rev_cmd = [self.bin_command, "log", "-n1", "--pretty=format:%G?", revision] + rev_cmd = [ + self.bin_command, + "-c", + "log.showsignature=0", + "log", + "-n1", + "--pretty=format:%G?%n%GF", + revision, + ] try: - status = portage._unicode_decode( + lines = portage._unicode_decode( subprocess.check_output( rev_cmd, cwd=portage._unicode_encode(self.repo.location), env=env, ) - ).strip() + ).splitlines() except subprocess.CalledProcessError: return False + status = lines[0].strip() + if len(lines) > 1: + signing_key = lines[1].strip() + if status == "G": # good signature is good - out.einfo("Trusted signature found on top commit") + if not quiet: + message = "Trusted signature found on top commit" + if verbose: + message += ( + f" (git revision: {revision}, signing key: {signing_key})" + ) + out.einfo(message) return True if status == "U": # untrusted - out.ewarn("Top commit signature is valid but not trusted") + out.ewarn( + f"Top commit signature is valid but not trusted (git revision: {revision}, signing key: {signing_key})" + ) return True if status == "B": - expl = "bad signature" + expl = ( + f"bad signature using key {signing_key} on git revision {revision}" + ) elif status == "X": - expl = "expired signature" + expl = f"expired signature using key {signing_key} on git revision {revision}" elif status == "Y": - expl = "expired key" + expl = f"expired key using key {signing_key} on git revision {revision}" elif status == "R": - expl = "revoked key" + expl = f"revoked key using key {signing_key} on git revision {revision}" elif status == "E": expl = "unable to verify signature (missing key?)" elif status == "N": expl = "no signature" else: expl = "unknown issue" - out.eerror("No valid signature found: %s" % (expl,)) + out.eerror(f"No valid signature found: {expl}") + + if debug: + writemsg_level( + f"!!! Got following output from gpg: {status}\n", + level=logging.DEBUG, + noiselevel=-1, + ) + return False finally: if openpgp_env is not None: openpgp_env.close() + if debug: + logging.getLogger().setLevel(old_level) - def retrieve_head(self, **kwargs): + def retrieve_head(self, **kwargs) -> tuple[int, bool]: """Get information about the head commit""" if kwargs: self._kwargs(kwargs) if self.bin_command is None: # return quietly so that we don't pollute emerge --info output return (1, False) + self.add_safe_directory() rev_cmd = [self.bin_command, "rev-list", "--max-count=1", "HEAD"] try: ret = ( @@ -341,3 +621,31 @@ class GitSync(NewBase): except subprocess.CalledProcessError: ret = (1, False) return ret + + def add_safe_directory(self) -> bool: + # Add safe.directory to system gitconfig if not already configured. + # Workaround for bug #838271 and bug #838223. + location_escaped = re.escape(self.repo.location) + result = subprocess.run( + [ + self.bin_command, + "config", + "--get", + "safe.directory", + f"^{location_escaped}$", + ], + stdout=subprocess.DEVNULL, + ) + if result.returncode == 1: + result = subprocess.run( + [ + self.bin_command, + "config", + "--system", + "--add", + "safe.directory", + self.repo.location, + ], + stdout=subprocess.DEVNULL, + ) + return result.returncode == 0 diff --git a/lib/portage/sync/modules/git/meson.build b/lib/portage/sync/modules/git/meson.build new file mode 100644 index 000000000..fb683c53e --- /dev/null +++ b/lib/portage/sync/modules/git/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'git.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/git', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/mercurial/mercurial.py b/lib/portage/sync/modules/mercurial/mercurial.py index 486b4fdd6..aad8ff94a 100644 --- a/lib/portage/sync/modules/mercurial/mercurial.py +++ b/lib/portage/sync/modules/mercurial/mercurial.py @@ -2,11 +2,12 @@ # Distributed under the terms of the GNU General Public License v2 import logging +import shlex import subprocess import portage from portage import os -from portage.util import writemsg_level, shlex_split +from portage.util import writemsg_level from portage.sync.syncbase import NewBase @@ -35,9 +36,9 @@ class MercurialSync(NewBase): if not os.path.exists(self.repo.location): os.makedirs(self.repo.location) self.logger( - self.xterm_titles, "Created new directory %s" % self.repo.location + self.xterm_titles, f"Created new directory {self.repo.location}" ) - except IOError: + except OSError: return (1, False) sync_uri = self.repo.sync_uri @@ -46,25 +47,25 @@ class MercurialSync(NewBase): hg_cmd_opts = "" if self.repo.module_specific_options.get("sync-mercurial-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-mercurial-env"] ) - env = dict( - (k, v) + env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(env) if self.repo.module_specific_options.get("sync-mercurial-clone-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-mercurial-clone-env"] ) - clone_env = dict( - (k, v) + clone_env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(clone_env) if self.settings.get("PORTAGE_QUIET") == "1": @@ -74,7 +75,7 @@ class MercurialSync(NewBase): " %s" % self.repo.module_specific_options["sync-mercurial-clone-extra-opts"] ) - hg_cmd = "%s clone%s %s ." % ( + hg_cmd = "{} clone{} {} .".format( self.bin_command, hg_cmd_opts, portage._shell_quote(sync_uri), @@ -82,12 +83,12 @@ class MercurialSync(NewBase): writemsg_level(hg_cmd + "\n") exitcode = portage.process.spawn( - shlex_split(hg_cmd), + shlex.split(hg_cmd), cwd=portage._unicode_encode(self.repo.location), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! hg clone error in %s" % self.repo.location + msg = f"!!! hg clone error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) @@ -102,25 +103,25 @@ class MercurialSync(NewBase): hg_cmd_opts = "" if self.repo.module_specific_options.get("sync-mercurial-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-mercurial-env"] ) - env = dict( - (k, v) + env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(env) if self.repo.module_specific_options.get("sync-mercurial-pull-env"): - shlexed_env = shlex_split( + shlexed_env = shlex.split( self.repo.module_specific_options["sync-mercurial-pull-env"] ) - pull_env = dict( - (k, v) + pull_env = { + k: v for k, _, v in (assignment.partition("=") for assignment in shlexed_env) if k - ) + } self.spawn_kwargs["env"].update(pull_env) if self.settings.get("PORTAGE_QUIET") == "1": @@ -130,7 +131,7 @@ class MercurialSync(NewBase): " %s" % self.repo.module_specific_options["sync-mercurial-pull-extra-opts"] ) - hg_cmd = "%s pull -u%s" % (self.bin_command, hg_cmd_opts) + hg_cmd = f"{self.bin_command} pull -u{hg_cmd_opts}" writemsg_level(hg_cmd + "\n") rev_cmd = [self.bin_command, "id", "--id", "--rev", "tip"] @@ -139,12 +140,12 @@ class MercurialSync(NewBase): ) exitcode = portage.process.spawn( - shlex_split(hg_cmd), + shlex.split(hg_cmd), cwd=portage._unicode_encode(self.repo.location), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: - msg = "!!! hg pull error in %s" % self.repo.location + msg = f"!!! hg pull error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) diff --git a/lib/portage/sync/modules/mercurial/meson.build b/lib/portage/sync/modules/mercurial/meson.build new file mode 100644 index 000000000..4e4897ed3 --- /dev/null +++ b/lib/portage/sync/modules/mercurial/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'mercurial.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/mercurial', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/meson.build b/lib/portage/sync/modules/meson.build new file mode 100644 index 000000000..fab2878e9 --- /dev/null +++ b/lib/portage/sync/modules/meson.build @@ -0,0 +1,14 @@ +py.install_sources( + [ + '__init__.py', + ], + subdir : 'portage/sync/modules', + pure : not native_extensions +) + +subdir('cvs') +subdir('git') +subdir('mercurial') +subdir('rsync') +subdir('svn') +subdir('webrsync') diff --git a/lib/portage/sync/modules/rsync/meson.build b/lib/portage/sync/modules/rsync/meson.build new file mode 100644 index 000000000..ab95e7cfc --- /dev/null +++ b/lib/portage/sync/modules/rsync/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'rsync.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/rsync', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/rsync/rsync.py b/lib/portage/sync/modules/rsync/rsync.py index 5f4cf1aeb..e89221ebc 100644 --- a/lib/portage/sync/modules/rsync/rsync.py +++ b/lib/portage/sync/modules/rsync/rsync.py @@ -1,11 +1,11 @@ -# Copyright 1999-2020 Gentoo Authors +# Copyright 1999-2024 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 import datetime -import io import logging import random import re +import shlex import signal import socket import sys @@ -19,6 +19,7 @@ from portage import _unicode_decode from portage import os from portage.const import VCS_DIRS, TIMESTAMP_FORMAT, RSYNC_PACKAGE_ATOM from portage.output import create_color_func, yellow, blue, bold +from portage.process import has_ipv6 from portage.sync.getaddrinfo_validate import getaddrinfo_validate from portage.sync.syncbase import NewBase from portage.util import writemsg, writemsg_level, writemsg_stdout @@ -92,9 +93,7 @@ class RsyncSync(NewBase): self.extra_rsync_opts = list() if self.repo.module_specific_options.get("sync-rsync-extra-opts"): self.extra_rsync_opts.extend( - portage.util.shlex_split( - self.repo.module_specific_options["sync-rsync-extra-opts"] - ) + shlex.split(self.repo.module_specific_options["sync-rsync-extra-opts"]) ) exitcode = 0 @@ -148,11 +147,16 @@ class RsyncSync(NewBase): else: self.max_age = 0 + debug = "--debug" in opts + if debug: + old_level = logging.getLogger().getEffectiveLevel() + logging.getLogger().setLevel(logging.DEBUG) + openpgp_env = None if self.verify_metamanifest and gemato is not None: # Use isolated environment if key is specified, # system environment otherwise - openpgp_env = self._get_openpgp_env(self.repo.sync_openpgp_key_path) + openpgp_env = self._get_openpgp_env(self.repo.sync_openpgp_key_path, debug) try: # Load and update the keyring early. If it fails, then verification @@ -160,8 +164,8 @@ class RsyncSync(NewBase): # so we may as well bail out before actual rsync happens. if openpgp_env is not None and self.repo.sync_openpgp_key_path is not None: try: - out.einfo("Using keys from %s" % (self.repo.sync_openpgp_key_path,)) - with io.open(self.repo.sync_openpgp_key_path, "rb") as f: + out.einfo(f"Using keys from {self.repo.sync_openpgp_key_path}") + with open(self.repo.sync_openpgp_key_path, "rb") as f: openpgp_env.import_key(f) self._refresh_keys(openpgp_env) except (GematoException, asyncio.TimeoutError) as e: @@ -225,7 +229,7 @@ class RsyncSync(NewBase): )[1:5] except ValueError: writemsg_level( - "!!! sync-uri is invalid: %s\n" % syncuri, + f"!!! sync-uri is invalid: {syncuri}\n", noiselevel=-1, level=logging.ERROR, ) @@ -249,9 +253,7 @@ class RsyncSync(NewBase): family = socket.AF_UNSPEC if "-4" in all_rsync_opts or "--ipv4" in all_rsync_opts: family = socket.AF_INET - elif socket.has_ipv6 and ( - "-6" in all_rsync_opts or "--ipv6" in all_rsync_opts - ): + elif has_ipv6() and ("-6" in all_rsync_opts or "--ipv6" in all_rsync_opts): family = socket.AF_INET6 addrinfos = None @@ -264,7 +266,7 @@ class RsyncSync(NewBase): getaddrinfo_host, None, family, socket.SOCK_STREAM ) ) - except socket.error as e: + except OSError as e: writemsg_level( "!!! getaddrinfo failed for '%s': %s\n" % (_unicode_decode(hostname), str(e)), @@ -273,10 +275,9 @@ class RsyncSync(NewBase): ) if addrinfos: - AF_INET = socket.AF_INET AF_INET6 = None - if socket.has_ipv6: + if has_ipv6(): AF_INET6 = socket.AF_INET6 ips_v4 = [] @@ -284,10 +285,10 @@ class RsyncSync(NewBase): for addrinfo in addrinfos: if addrinfo[0] == AF_INET: - ips_v4.append("%s" % addrinfo[4][0]) + ips_v4.append(f"{addrinfo[4][0]}") elif AF_INET6 is not None and addrinfo[0] == AF_INET6: # IPv6 addresses need to be enclosed in square brackets - ips_v6.append("[%s]" % addrinfo[4][0]) + ips_v6.append(f"[{addrinfo[4][0]}]") random.shuffle(ips_v4) random.shuffle(ips_v6) @@ -334,7 +335,7 @@ class RsyncSync(NewBase): dosyncuri = uris.pop() elif maxretries < 0 or retries > maxretries: writemsg( - "!!! Exhausted addresses for %s\n" % _unicode_decode(hostname), + f"!!! Exhausted addresses for {_unicode_decode(hostname)}\n", noiselevel=-1, ) return (1, False) @@ -446,30 +447,28 @@ class RsyncSync(NewBase): out.ewarn( "You may want to try using another mirror and/or reporting this one:" ) - out.ewarn(" %s" % (dosyncuri,)) + out.ewarn(f" {dosyncuri}") out.ewarn("") out.quiet = quiet - out.einfo("Manifest timestamp: %s UTC" % (ts.ts,)) + out.einfo(f"Manifest timestamp: {ts.ts} UTC") out.einfo("Valid OpenPGP signature found:") out.einfo( "- primary key: %s" % (m.openpgp_signature.primary_key_fingerprint) ) - out.einfo("- subkey: %s" % (m.openpgp_signature.fingerprint)) - out.einfo( - "- timestamp: %s UTC" % (m.openpgp_signature.timestamp) - ) + out.einfo(f"- subkey: {m.openpgp_signature.fingerprint}") + out.einfo(f"- timestamp: {m.openpgp_signature.timestamp} UTC") # if nothing has changed, skip the actual Manifest # verification if not local_state_unchanged: - out.ebegin("Verifying %s" % (download_dir,)) + out.ebegin(f"Verifying {download_dir}") m.assert_directory_verifies() out.eend(0) except GematoException as e: writemsg_level( - "!!! Manifest verification failed:\n%s\n" % (e,), + f"!!! Manifest verification failed:\n{e}\n", level=logging.ERROR, noiselevel=-1, ) @@ -488,6 +487,8 @@ class RsyncSync(NewBase): self.repo_storage.abort_update() if openpgp_env is not None: openpgp_env.close() + if debug: + logging.getLogger().setLevel(old_level) def _process_exitcode(self, exitcode, syncuri, out, maxretries): if exitcode == 0: @@ -495,7 +496,7 @@ class RsyncSync(NewBase): elif exitcode == SERVER_OUT_OF_DATE: exitcode = 1 elif exitcode == EXCEEDED_MAX_RETRIES: - sys.stderr.write(">>> Exceeded PORTAGE_RSYNC_RETRIES: %s\n" % maxretries) + sys.stderr.write(f">>> Exceeded PORTAGE_RSYNC_RETRIES: {maxretries}\n") exitcode = 1 elif exitcode > 0: msg = [] @@ -507,7 +508,7 @@ class RsyncSync(NewBase): "that sync-uri attribute for repository '%s' is proper." % self.repo.name ) - msg.append("sync-uri: '%s'" % self.repo.sync_uri) + msg.append(f"sync-uri: '{self.repo.sync_uri}'") elif exitcode == 11: msg.append("Rsync has reported that there is a File IO error. Normally") msg.append( @@ -518,7 +519,7 @@ class RsyncSync(NewBase): % self.repo.name ) msg.append("and try again after the problem has been fixed.") - msg.append("Location of repository: '%s'" % self.repo.location) + msg.append(f"Location of repository: '{self.repo.location}'") elif exitcode == 20: msg.append("Rsync was killed before it finished.") else: @@ -546,9 +547,9 @@ class RsyncSync(NewBase): os.makedirs(self.repo.location) self.logger( self.self.xterm_titles, - "Created New Directory %s " % self.repo.location, + f"Created New Directory {self.repo.location} ", ) - except IOError: + except OSError: return (1, False) return self.update() @@ -582,7 +583,7 @@ class RsyncSync(NewBase): "--force", # Force deletion on non-empty dirs "--whole-file", # Don't do block transfers, only entire files "--delete", # Delete files that aren't in the master tree - "--stats", # Show final statistics about what was transfered + "--stats", # Show final statistics about what was transferred "--human-readable", "--timeout=" + str(self.timeout), # IO timeout if not done in X seconds "--exclude=/distfiles", # Exclude distfiles from consideration @@ -596,24 +597,22 @@ class RsyncSync(NewBase): # defaults. portage.writemsg("Using PORTAGE_RSYNC_OPTS instead of hardcoded defaults\n", 1) - rsync_opts.extend( - portage.util.shlex_split(self.settings.get("PORTAGE_RSYNC_OPTS", "")) - ) + rsync_opts.extend(shlex.split(self.settings.get("PORTAGE_RSYNC_OPTS", ""))) for opt in ("--recursive", "--times"): if opt not in rsync_opts: portage.writemsg( yellow("WARNING:") + " adding required option " - + "%s not included in PORTAGE_RSYNC_OPTS\n" % opt + + f"{opt} not included in PORTAGE_RSYNC_OPTS\n" ) rsync_opts.append(opt) for exclude in ("distfiles", "local", "packages"): - opt = "--exclude=/%s" % exclude + opt = f"--exclude=/{exclude}" if opt not in rsync_opts: portage.writemsg( yellow("WARNING:") - + " adding required option %s not included in " % opt + + f" adding required option {opt} not included in " + "PORTAGE_RSYNC_OPTS (can be overridden with --exclude='!')\n" ) rsync_opts.append(opt) @@ -634,7 +633,7 @@ class RsyncSync(NewBase): portage.writemsg( yellow("WARNING:") + " adding required option " - + "%s not included in PORTAGE_RSYNC_OPTS\n" % opt + + f"{opt} not included in PORTAGE_RSYNC_OPTS\n" ) rsync_opts.append(opt) return rsync_opts @@ -705,48 +704,47 @@ class RsyncSync(NewBase): command.append(syncuri.rstrip("/") + "/metadata/timestamp.chk") command.append(tmpservertimestampfile) content = None - pids = [] + proc = None + proc_waiter = None + loop = asyncio.get_event_loop() try: # Timeout here in case the server is unresponsive. The # --timeout rsync option doesn't apply to the initial # connection attempt. try: - if self.rsync_initial_timeout: - portage.exception.AlarmSignal.register(self.rsync_initial_timeout) - - pids.extend( - portage.process.spawn(command, returnpid=True, **self.spawn_kwargs) + proc = portage.process.spawn( + command, returnproc=True, **self.spawn_kwargs + ) + proc_waiter = asyncio.ensure_future(proc.wait(), loop) + future = ( + asyncio.wait_for( + asyncio.shield(proc_waiter), self.rsync_initial_timeout + ) + if self.rsync_initial_timeout + else proc_waiter ) - exitcode = os.waitpid(pids[0], 0)[1] + exitcode = loop.run_until_complete(future) if self.usersync_uid is not None: portage.util.apply_permissions( tmpservertimestampfile, uid=os.getuid() ) content = portage.grabfile(tmpservertimestampfile) finally: - if self.rsync_initial_timeout: - portage.exception.AlarmSignal.unregister() try: os.unlink(tmpservertimestampfile) except OSError: pass - except portage.exception.AlarmSignal: + except (TimeoutError, asyncio.TimeoutError): # timed out print("timed out") # With waitpid and WNOHANG, only check the # first element of the tuple since the second # element may vary (bug #337465). - if pids and os.waitpid(pids[0], os.WNOHANG)[0] == 0: - os.kill(pids[0], signal.SIGTERM) - os.waitpid(pids[0], 0) + if proc_waiter and not proc_waiter.done(): + proc.terminate() + loop.run_until_complete(proc_waiter) # This is the same code rsync uses for timeout. exitcode = 30 - else: - if exitcode != os.EX_OK: - if exitcode & 0xFF: - exitcode = (exitcode & 0xFF) << 8 - else: - exitcode = exitcode >> 8 if content: try: @@ -755,7 +753,6 @@ class RsyncSync(NewBase): ) except (OverflowError, ValueError): pass - del command, pids, content if exitcode == os.EX_OK: if (servertimestamp != 0) and (servertimestamp == timestamp): @@ -774,20 +771,18 @@ class RsyncSync(NewBase): ) print(">>>") print( - ">>> In order to force sync, remove '%s'." - % self.servertimestampfile + f">>> In order to force sync, remove '{self.servertimestampfile}'." ) print(">>>") print() elif (servertimestamp != 0) and (servertimestamp < timestamp): - self.logger(self.xterm_titles, ">>> Server out of date: %s" % syncuri) + self.logger(self.xterm_titles, f">>> Server out of date: {syncuri}") print() print(">>>") - print(">>> SERVER OUT OF DATE: %s" % syncuri) + print(f">>> SERVER OUT OF DATE: {syncuri}") print(">>>") print( - ">>> In order to force sync, remove '%s'." - % self.servertimestampfile + f">>> In order to force sync, remove '{self.servertimestampfile}'." ) print(">>>") print() diff --git a/lib/portage/sync/modules/svn/meson.build b/lib/portage/sync/modules/svn/meson.build new file mode 100644 index 000000000..0b1b86f3e --- /dev/null +++ b/lib/portage/sync/modules/svn/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'svn.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/svn', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/svn/svn.py b/lib/portage/sync/modules/svn/svn.py index 788e394cd..ec4bdb006 100644 --- a/lib/portage/sync/modules/svn/svn.py +++ b/lib/portage/sync/modules/svn/svn.py @@ -36,7 +36,7 @@ class SVNSync(NewBase): portage._shell_quote(self.repo.location), portage._shell_quote(svn_root), ), - **self.spawn_kwargs + **self.spawn_kwargs, ) if exitcode != os.EX_OK: msg = "!!! svn checkout error; exiting." @@ -59,8 +59,8 @@ class SVNSync(NewBase): # svn update exitcode = portage.process.spawn_bash( - "cd %s; exec svn update" % (portage._shell_quote(self.repo.location),), - **self.spawn_kwargs + f"cd {portage._shell_quote(self.repo.location)}; exec svn update", + **self.spawn_kwargs, ) if exitcode != os.EX_OK: msg = "!!! svn update error; exiting." @@ -77,8 +77,8 @@ class SVNSync(NewBase): @rtype: (int, bool) """ exitcode = portage.process.spawn_bash( - "cd %s; exec svn upgrade" % (portage._shell_quote(self.repo.location),), - **self.spawn_kwargs + f"cd {portage._shell_quote(self.repo.location)}; exec svn upgrade", + **self.spawn_kwargs, ) if exitcode != os.EX_OK: msg = "!!! svn upgrade error; exiting." diff --git a/lib/portage/sync/modules/webrsync/__init__.py b/lib/portage/sync/modules/webrsync/__init__.py index bc0cdf43c..534a1d562 100644 --- a/lib/portage/sync/modules/webrsync/__init__.py +++ b/lib/portage/sync/modules/webrsync/__init__.py @@ -13,8 +13,8 @@ from portage.sync.config_checks import CheckSyncConfig DEFAULT_CLASS = "WebRsync" -AVAILABLE_CLASSES = ["WebRsync", "PyWebsync"] -options = {"1": "WebRsync", "2": "PyWebsync"} +AVAILABLE_CLASSES = ["WebRsync", "PyWebRsync"] +options = {"1": "WebRsync", "2": "PyWebRsync"} config_class = DEFAULT_CLASS diff --git a/lib/portage/sync/modules/webrsync/meson.build b/lib/portage/sync/modules/webrsync/meson.build new file mode 100644 index 000000000..d3e42cbf1 --- /dev/null +++ b/lib/portage/sync/modules/webrsync/meson.build @@ -0,0 +1,8 @@ +py.install_sources( + [ + 'webrsync.py', + '__init__.py', + ], + subdir : 'portage/sync/modules/webrsync', + pure : not native_extensions +) diff --git a/lib/portage/sync/modules/webrsync/webrsync.py b/lib/portage/sync/modules/webrsync/webrsync.py index 0e2f63472..ca0416fa4 100644 --- a/lib/portage/sync/modules/webrsync/webrsync.py +++ b/lib/portage/sync/modules/webrsync/webrsync.py @@ -1,28 +1,33 @@ """WebRsync module for portage""" -import io import logging import portage from portage import os from portage.util import writemsg_level -from portage.util.futures import asyncio from portage.output import create_color_func +from portage.sync.syncbase import SyncBase good = create_color_func("GOOD") bad = create_color_func("BAD") warn = create_color_func("WARN") -from portage.sync.syncbase import SyncBase try: - from gemato.exceptions import GematoException import gemato.openpgp except ImportError: gemato = None class WebRsync(SyncBase): - """WebRSync sync class""" + """WebRSync sync class + + This class implements syncing via calls to an external binary, either: + - emerge-delta-webrsync (if sync-webrsync-delta is set), or + - emerge-webrsync + + It wraps them and performs PGP verification if sync-webrsync-verify-signature + is set via gemato. + """ short_desc = "Perform sync operations on webrsync based repositories" @@ -46,7 +51,7 @@ class WebRsync(SyncBase): self.bin_command = portage.process.find_binary(self._bin_command) self.bin_pkg = ">=app-portage/emerge-delta-webrsync-3.7.5" - return super(WebRsync, self).has_bin + return super().has_bin def sync(self, **kwargs): """Sync the repository""" @@ -67,7 +72,6 @@ class WebRsync(SyncBase): if self.repo.module_specific_options.get( "sync-webrsync-verify-signature", "false" ).lower() in ("true", "yes"): - if not self.repo.sync_openpgp_key_path: writemsg_level( "!!! sync-openpgp-key-path is not set\n", @@ -93,24 +97,13 @@ class WebRsync(SyncBase): ) return (1, False) - openpgp_env = self._get_openpgp_env(self.repo.sync_openpgp_key_path) - - out = portage.output.EOutput(quiet=quiet) - try: - out.einfo("Using keys from %s" % (self.repo.sync_openpgp_key_path,)) - with io.open(self.repo.sync_openpgp_key_path, "rb") as f: - openpgp_env.import_key(f) - self._refresh_keys(openpgp_env) - self.spawn_kwargs["env"]["PORTAGE_GPG_DIR"] = openpgp_env.home - self.spawn_kwargs["env"]["PORTAGE_TEMP_GPG_DIR"] = openpgp_env.home - except (GematoException, asyncio.TimeoutError) as e: - writemsg_level( - "!!! Verification impossible due to keyring problem:\n%s\n" - % (e,), - level=logging.ERROR, - noiselevel=-1, - ) - return (1, False) + self.spawn_kwargs["env"]["PORTAGE_SYNC_WEBRSYNC_GPG"] = "1" + self.spawn_kwargs["env"][ + "PORTAGE_GPG_KEY" + ] = self.repo.sync_openpgp_key_path + self.spawn_kwargs["env"][ + "PORTAGE_GPG_KEY_SERVER" + ] = self.repo.sync_openpgp_keyserver webrsync_cmd = [self.bin_command] if verbose: @@ -125,7 +118,7 @@ class WebRsync(SyncBase): exitcode = portage.process.spawn(webrsync_cmd, **self.spawn_kwargs) if exitcode != os.EX_OK: - msg = "!!! emerge-webrsync error in %s" % self.repo.location + msg = f"!!! emerge-webrsync error in {self.repo.location}" self.logger(self.xterm_titles, msg) writemsg_level(msg + "\n", level=logging.ERROR, noiselevel=-1) return (exitcode, False) @@ -136,7 +129,12 @@ class WebRsync(SyncBase): class PyWebRsync(SyncBase): - """WebRSync sync class""" + """PyWebRsync sync class + + TODO: Implement the sync parts from the emerge-webrsync external + binary to avoid split logic for various components, which + is how we ended up with bug #597800. + """ short_desc = "Perform sync operations on webrsync based repositories" @@ -149,4 +147,6 @@ class PyWebRsync(SyncBase): def sync(self, **kwargs): """Sync the repository""" - pass + raise NotImplementedError( + "Python impl. of webrsync backend is not yet implemented" + ) diff --git a/lib/portage/sync/modules/zipfile/__init__.py b/lib/portage/sync/modules/zipfile/__init__.py new file mode 100644 index 000000000..e44833088 --- /dev/null +++ b/lib/portage/sync/modules/zipfile/__init__.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2024 Alexey Gladkov <gladkov.alexey@gmail.com> + +doc = """Zipfile plug-in module for portage. +Performs a http download of a portage snapshot and unpacks it to the repo +location.""" +__doc__ = doc[:] + + +import os + +from portage.sync.config_checks import CheckSyncConfig + + +module_spec = { + "name": "zipfile", + "description": doc, + "provides": { + "zipfile-module": { + "name": "zipfile", + "sourcefile": "zipfile", + "class": "ZipFile", + "description": doc, + "functions": ["sync", "retrieve_head"], + "func_desc": { + "sync": "Performs an archived http download of the " + + "repository, then unpacks it.", + "retrieve_head": "Returns the checksum of the unpacked archive.", + }, + "validate_config": CheckSyncConfig, + "module_specific_options": (), + }, + }, +} diff --git a/lib/portage/sync/modules/zipfile/zipfile.py b/lib/portage/sync/modules/zipfile/zipfile.py new file mode 100644 index 000000000..edfb5aa68 --- /dev/null +++ b/lib/portage/sync/modules/zipfile/zipfile.py @@ -0,0 +1,143 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2024 Alexey Gladkov <gladkov.alexey@gmail.com> + +import os +import os.path +import logging +import zipfile +import shutil +import tempfile +import urllib.request + +import portage +from portage.util import writemsg_level, writemsg_stdout +from portage.sync.syncbase import SyncBase + + +class ZipFile(SyncBase): + """ZipFile sync module""" + + short_desc = "Perform sync operations on GitHub repositories" + + @staticmethod + def name(): + return "ZipFile" + + def __init__(self): + SyncBase.__init__(self, "emerge", ">=sys-apps/portage-2.3") + + def retrieve_head(self, **kwargs): + """Get information about the checksum of the unpacked archive""" + if kwargs: + self._kwargs(kwargs) + info = portage.grabdict(os.path.join(self.repo.location, ".info")) + if "etag" in info: + return (os.EX_OK, info["etag"][0]) + return (1, False) + + def _do_cmp(self, f1, f2): + bufsize = 8 * 1024 + while True: + b1 = f1.read(bufsize) + b2 = f2.read(bufsize) + if b1 != b2: + return False + if not b1: + return True + + def sync(self, **kwargs): + """Sync the repository""" + if kwargs: + self._kwargs(kwargs) + + req = urllib.request.Request(url=self.repo.sync_uri) + + info = portage.grabdict(os.path.join(self.repo.location, ".info")) + if "etag" in info: + req.add_header("If-None-Match", info["etag"][0]) + + try: + with urllib.request.urlopen(req) as response: + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + shutil.copyfileobj(response, tmp_file) + + zip_file = tmp_file.name + etag = response.headers.get("etag") + + except urllib.error.HTTPError as resp: + if resp.code == 304: + writemsg_stdout(">>> The repository has not changed.\n", noiselevel=-1) + return (os.EX_OK, False) + + writemsg_level( + f"!!! Unable to obtain zip archive: {resp}\n", + noiselevel=-1, + level=logging.ERROR, + ) + return (1, False) + + if not zipfile.is_zipfile(zip_file): + msg = "!!! file is not a zip archive." + self.logger(self.xterm_titles, msg) + writemsg_level(msg + "\n", noiselevel=-1, level=logging.ERROR) + + os.unlink(zip_file) + + return (1, False) + + # Drop previous tree + tempdir = tempfile.mkdtemp(prefix=".temp", dir=self.repo.location) + tmpname = os.path.basename(tempdir) + + for name in os.listdir(self.repo.location): + if name != tmpname: + os.rename( + os.path.join(self.repo.location, name), + os.path.join(tempdir, name), + ) + + with zipfile.ZipFile(zip_file) as archive: + strip_comp = 0 + + for f in archive.namelist(): + f = os.path.normpath(f) + if os.path.basename(f) == "profiles": + strip_comp = f.count("/") + break + + for n in archive.infolist(): + p = os.path.normpath(n.filename) + + if os.path.isabs(p): + continue + + parts = p.split("/") + dstpath = os.path.join(self.repo.location, *parts[strip_comp:]) + + if n.is_dir(): + os.makedirs(dstpath, mode=0o755, exist_ok=True) + continue + + with archive.open(n) as srcfile: + prvpath = os.path.join(tempdir, *parts[strip_comp:]) + + if os.path.exists(prvpath): + with open(prvpath, "rb") as prvfile: + if self._do_cmp(prvfile, srcfile): + os.rename(prvpath, dstpath) + continue + srcfile.seek(0) + + with open(dstpath, "wb") as dstfile: + shutil.copyfileobj(srcfile, dstfile) + + # Drop previous tree + shutil.rmtree(tempdir) + + with open(os.path.join(self.repo.location, ".info"), "w") as infofile: + if etag: + infofile.write(f"etag {etag}\n") + + os.unlink(zip_file) + + return (os.EX_OK, True) diff --git a/lib/portage/sync/old_tree_timestamp.py b/lib/portage/sync/old_tree_timestamp.py index 3558a25ad..950878ce5 100644 --- a/lib/portage/sync/old_tree_timestamp.py +++ b/lib/portage/sync/old_tree_timestamp.py @@ -82,7 +82,7 @@ def old_tree_timestamp_warn(portdir, settings): warnsync = float(settings.get(var_name, default_warnsync)) except ValueError: writemsg_level( - "!!! %s contains non-numeric value: %s\n" % (var_name, settings[var_name]), + f"!!! {var_name} contains non-numeric value: {settings[var_name]}\n", level=logging.ERROR, noiselevel=-1, ) @@ -94,7 +94,7 @@ def old_tree_timestamp_warn(portdir, settings): if (unixtime - 86400 * warnsync) > lastsync: out = EOutput() if have_english_locale(): - out.ewarn("Last emerge --sync was %s ago." % whenago(unixtime - lastsync)) + out.ewarn(f"Last emerge --sync was {whenago(unixtime - lastsync)} ago.") else: out.ewarn( _("Last emerge --sync was %s.") diff --git a/lib/portage/sync/revision_history.py b/lib/portage/sync/revision_history.py new file mode 100644 index 000000000..3d909d94e --- /dev/null +++ b/lib/portage/sync/revision_history.py @@ -0,0 +1,133 @@ +# Copyright 2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +import json +import os +from typing import Optional + +import portage +from portage.locks import lockfile, unlockfile +from portage.repository.config import RepoConfig +from portage.util.path import first_existing + +_HISTORY_LIMIT = 25 + + +def get_repo_revision_history( + eroot: str, repos: Optional[list[RepoConfig]] = None +) -> dict[str, list[str]]: + """ + Get revision history of synced repos. Returns a dict that maps + a repo name to list of revisions in descending order by time. + If a change is detected and the current process has permission + to update the repo_revisions file, then the file will be updated + with any newly detected revisions. + + This functions detects revisions which are not yet visible to the + current process due to the sync-rcu option. + + @param eroot: EROOT to query + @type eroot: string + @param repos: list of RepoConfig instances to check for new revisions + @type repos: list + @rtype: dict + @return: mapping of repo name to list of revisions in descending + order by time + """ + items = [] + for repo in repos or (): + if repo.volatile: + items.append((repo, None)) + continue + if repo.sync_type: + try: + sync_mod = portage.sync.module_controller.get_class(repo.sync_type) + except portage.exception.PortageException: + continue + else: + continue + repo_location_orig = repo.location + try: + if repo.user_location is not None: + # Temporarily override sync-rcu behavior which pins the + # location to a previous snapshot, since we want the + # latest available revision here. + repo.location = repo.user_location + status, repo_revision = sync_mod().retrieve_head(options={"repo": repo}) + except NotImplementedError: + repo_revision = None + else: + repo_revision = repo_revision.strip() if status == os.EX_OK else None + finally: + repo.location = repo_location_orig + + if repo_revision is not None: + items.append((repo, repo_revision)) + + return _maybe_update_revisions(eroot, items) + + +def _update_revisions(repo_revisions, items): + modified = False + for repo, repo_revision in items: + if repo.volatile: + # For volatile repos the revisions may be unordered, + # which makes them unusable here where revisions are + # intended to be ordered, so discard them. + rev_list = repo_revisions.pop(repo.name, None) + if rev_list: + modified = True + continue + + rev_list = repo_revisions.setdefault(repo.name, []) + if not rev_list or rev_list[0] != repo_revision: + rev_list.insert(0, repo_revision) + del rev_list[_HISTORY_LIMIT:] + modified = True + return modified + + +def _maybe_update_revisions(eroot, items): + repo_revisions_file = os.path.join(eroot, portage.const.REPO_REVISIONS) + repo_revisions_lock = None + try: + previous_revisions = None + try: + with open(repo_revisions_file, encoding="utf8") as f: + if os.fstat(f.fileno()).st_size: + previous_revisions = json.load(f) + except FileNotFoundError: + pass + + repo_revisions = {} if previous_revisions is None else previous_revisions.copy() + modified = _update_revisions(repo_revisions, items) + + # If modified then do over with lock if permissions allow. + if modified and os.access( + first_existing(os.path.dirname(repo_revisions_file)), os.W_OK + ): + # This is a bit redundant since the config._init_dirs method + # is supposed to create PRIVATE_PATH with these permissions. + portage.util.ensure_dirs( + os.path.dirname(repo_revisions_file), + gid=portage.data.portage_gid, + mode=0o2750, + mask=0o2, + ) + repo_revisions_lock = lockfile(repo_revisions_file) + previous_revisions = None + with open(repo_revisions_file, encoding="utf8") as f: + if os.fstat(f.fileno()).st_size: + previous_revisions = json.load(f) + repo_revisions = ( + {} if previous_revisions is None else previous_revisions.copy() + ) + _update_revisions(repo_revisions, items) + f = portage.util.atomic_ofstream(repo_revisions_file) + json.dump(repo_revisions, f, ensure_ascii=False, sort_keys=True) + f.close() + finally: + if repo_revisions_lock is not None: + unlockfile(repo_revisions_lock) + + return repo_revisions diff --git a/lib/portage/sync/syncbase.py b/lib/portage/sync/syncbase.py index 94c873e1f..c3a07da7d 100644 --- a/lib/portage/sync/syncbase.py +++ b/lib/portage/sync/syncbase.py @@ -1,4 +1,4 @@ -# Copyright 2014-2020 Gentoo Authors +# Copyright 2014-2023 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 """ @@ -55,25 +55,24 @@ class SyncBase: @property def has_bin(self): - """Checks for existance of the external binary, and also + """Checks for existence of the external binary, and also checks for storage driver configuration problems. MUST only be called after _kwargs() has set the logger """ if self.bin_command is None: msg = [ - "Command not found: %s" % self._bin_command, - 'Type "emerge %s" to enable %s support.' - % (self.bin_pkg, self._bin_command), + f"Command not found: {self._bin_command}", + f'Type "emerge {self.bin_pkg}" to enable {self._bin_command} support.', ] for l in msg: - writemsg_level("!!! %s\n" % l, level=logging.ERROR, noiselevel=-1) + writemsg_level(f"!!! {l}\n", level=logging.ERROR, noiselevel=-1) return False try: self.repo_storage except RepoStorageException as e: - writemsg_level("!!! %s\n" % (e,), level=logging.ERROR, noiselevel=-1) + writemsg_level(f"!!! {e}\n", level=logging.ERROR, noiselevel=-1) return False return True @@ -178,7 +177,7 @@ class SyncBase: try: retry_count = int(self.repo.sync_openpgp_key_refresh_retry_count) except Exception as e: - errors.append("sync-openpgp-key-refresh-retry-count: {}".format(e)) + errors.append(f"sync-openpgp-key-refresh-retry-count: {e}") else: if retry_count <= 0: return None @@ -191,9 +190,7 @@ class SyncBase: self.repo.sync_openpgp_key_refresh_retry_overall_timeout ) except Exception as e: - errors.append( - "sync-openpgp-key-refresh-retry-overall-timeout: {}".format(e) - ) + errors.append(f"sync-openpgp-key-refresh-retry-overall-timeout: {e}") else: if retry_overall_timeout < 0: errors.append( @@ -213,7 +210,7 @@ class SyncBase: self.repo.sync_openpgp_key_refresh_retry_delay_mult ) except Exception as e: - errors.append("sync-openpgp-key-refresh-retry-delay-mult: {}".format(e)) + errors.append(f"sync-openpgp-key-refresh-retry-delay-mult: {e}") else: if retry_delay_mult <= 0: errors.append( @@ -229,7 +226,7 @@ class SyncBase: self.repo.sync_openpgp_key_refresh_retry_delay_exp_base ) except Exception as e: - errors.append("sync-openpgp-key-refresh-retry-delay-exp: {}".format(e)) + errors.append(f"sync-openpgp-key-refresh-retry-delay-exp: {e}") else: if retry_delay_exp_base <= 0: errors.append( @@ -243,11 +240,11 @@ class SyncBase: lines.append("!!! Retry disabled for openpgp key refresh:") lines.append("") for msg in errors: - lines.append(" {}".format(msg)) + lines.append(f" {msg}") lines.append("") for line in lines: - writemsg_level("{}\n".format(line), level=logging.ERROR, noiselevel=-1) + writemsg_level(f"{line}\n", level=logging.ERROR, noiselevel=-1) return None @@ -293,11 +290,9 @@ class SyncBase: out.ebegin( "Refreshing keys from keyserver{}".format( - ( - "" - if self.repo.sync_openpgp_keyserver is None - else " " + self.repo.sync_openpgp_keyserver - ) + "" + if self.repo.sync_openpgp_keyserver is None + else " " + self.repo.sync_openpgp_keyserver ) ) retry_decorator = self._key_refresh_retry_decorator() @@ -317,7 +312,7 @@ class SyncBase: keyserver=self.repo.sync_openpgp_keyserver ) except Exception as e: - writemsg_level("%s\n" % (e,), level=logging.ERROR, noiselevel=-1) + writemsg_level(f"{e}\n", level=logging.ERROR, noiselevel=-1) raise # retry # The ThreadPoolExecutor that asyncio uses by default @@ -333,18 +328,24 @@ class SyncBase: loop.run_until_complete(decorated_func()) out.eend(0) - def _get_openpgp_env(self, openpgp_key_path=None): + def _get_openpgp_env(self, openpgp_key_path=None, debug=False): if gemato is not None: # Override global proxy setting with one provided in emerge configuration if "http_proxy" in self.spawn_kwargs["env"]: proxy = self.spawn_kwargs["env"]["http_proxy"] + elif "https_proxy" in self.spawn_kwargs["env"]: + proxy = self.spawn_kwargs["env"]["https_proxy"] else: proxy = None if openpgp_key_path: - openpgp_env = gemato.openpgp.OpenPGPEnvironment(proxy=proxy) + openpgp_env = gemato.openpgp.OpenPGPEnvironment( + proxy=proxy, debug=debug + ) else: - openpgp_env = gemato.openpgp.OpenPGPSystemEnvironment(proxy=proxy) + openpgp_env = gemato.openpgp.OpenPGPSystemEnvironment( + proxy=proxy, debug=debug + ) return openpgp_env |