aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2018-07-31 00:28:45 -0700
committerZac Medico <zmedico@gentoo.org>2018-09-23 22:52:52 -0700
commit36f50e3b64756179758a8e3a11a3c6c666550cf5 (patch)
tree678a954c9b9339ab2c7fcf12782ca0d310c54ec5
parentrsync: split out repo storage framework (diff)
downloadportage-36f50e3b.tar.gz
portage-36f50e3b.tar.bz2
portage-36f50e3b.zip
Add sync-rcu support for rsync (bug 662070)
Add a boolean sync-rcu repos.conf setting that behaves as follows: Enable read-copy-update (RCU) behavior for sync operations. The current latest immutable version of a repository will be referenced by a symlink found where the repository would normally be located (see the location setting). Repository consumers should resolve the cannonical path of this symlink before attempt to access the repository, and all operations should be read-only, since the repository is considered immutable. Updates occur by atomic replacement of the symlink, which causes new consumers to use the new immutable version, while any earlier consumers continue to use the cannonical path that was resolved earlier. This option requires sync-allow-hardlinks and sync-rcu-store-dir options to be enabled, and currently also requires that sync-type is set to rsync. This option is disabled by default, since the symlink usage would require special handling for scenarios involving bind mounts and chroots. Bug: https://bugs.gentoo.org/662070 Reviewed-by: Brian Dolbec <dolsen@gentoo.org> Signed-off-by: Zac Medico <zmedico@gentoo.org>
-rw-r--r--lib/portage/repository/config.py36
-rw-r--r--lib/portage/repository/storage/hardlink_rcu.py251
-rw-r--r--lib/portage/sync/syncbase.py4
-rw-r--r--lib/portage/tests/sync/test_sync_local.py40
-rw-r--r--man/portage.535
5 files changed, 360 insertions, 6 deletions
diff --git a/lib/portage/repository/config.py b/lib/portage/repository/config.py
index f790f9392..8cdc2a696 100644
--- a/lib/portage/repository/config.py
+++ b/lib/portage/repository/config.py
@@ -84,7 +84,7 @@ class RepoConfig(object):
'profile_formats', 'sign_commit', 'sign_manifest', 'strict_misc_digests',
'sync_depth', 'sync_hooks_only_on_change',
'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest',
- 'update_changelog', '_eapis_banned', '_eapis_deprecated',
+ 'update_changelog', 'user_location', '_eapis_banned', '_eapis_deprecated',
'_masters_orig', 'module_specific_options', 'manifest_required_hashes',
'sync_allow_hardlinks',
'sync_openpgp_key_path',
@@ -93,6 +93,10 @@ class RepoConfig(object):
'sync_openpgp_key_refresh_retry_delay_exp_base',
'sync_openpgp_key_refresh_retry_delay_mult',
'sync_openpgp_key_refresh_retry_overall_timeout',
+ 'sync_rcu',
+ 'sync_rcu_store_dir',
+ 'sync_rcu_spare_snapshots',
+ 'sync_rcu_ttl_days',
)
def __init__(self, name, repo_opts, local_config=True):
@@ -198,6 +202,22 @@ class RepoConfig(object):
'sync_openpgp_key_refresh_retry_overall_timeout'):
setattr(self, k, repo_opts.get(k.replace('_', '-'), None))
+ self.sync_rcu = repo_opts.get(
+ 'sync-rcu', 'false').lower() in ('true', 'yes')
+
+ self.sync_rcu_store_dir = repo_opts.get('sync-rcu-store-dir')
+
+ for k in ('sync-rcu-spare-snapshots', 'sync-rcu-ttl-days'):
+ v = repo_opts.get(k, '').strip() or None
+ if v:
+ try:
+ v = int(v)
+ except (OverflowError, ValueError):
+ writemsg(_("!!! Invalid %s setting for repo"
+ " %s: %s\n") % (k, name, v), noiselevel=-1)
+ v = None
+ setattr(self, k.replace('-', '_'), v)
+
self.module_specific_options = {}
# Not implemented.
@@ -206,9 +226,14 @@ class RepoConfig(object):
format = format.strip()
self.format = format
+ self.user_location = None
location = repo_opts.get('location')
if location is not None and location.strip():
if os.path.isdir(location) or portage._sync_mode:
+ # The user_location is required for sync-rcu support,
+ # since it manages a symlink which resides at that
+ # location (and realpath is irreversible).
+ self.user_location = location
location = os.path.realpath(location)
else:
location = None
@@ -542,6 +567,10 @@ class RepoConfigLoader(object):
'sync_openpgp_key_refresh_retry_delay_exp_base',
'sync_openpgp_key_refresh_retry_delay_mult',
'sync_openpgp_key_refresh_retry_overall_timeout',
+ 'sync_rcu',
+ 'sync_rcu_store_dir',
+ 'sync_rcu_spare_snapshots',
+ 'sync_rcu_ttl_days',
'sync_type', 'sync_umask', 'sync_uri', 'sync_user',
'module_specific_options'):
v = getattr(repos_conf_opts, k, None)
@@ -962,7 +991,7 @@ class RepoConfigLoader(object):
return repo_name in self.prepos
def config_string(self):
- bool_keys = ("strict_misc_digests", "sync_allow_hardlinks")
+ bool_keys = ("strict_misc_digests", "sync_allow_hardlinks", "sync_rcu")
str_or_int_keys = ("auto_sync", "clone_depth", "format", "location",
"main_repo", "priority", "sync_depth", "sync_openpgp_key_path",
"sync_openpgp_key_refresh_retry_count",
@@ -970,6 +999,9 @@ class RepoConfigLoader(object):
"sync_openpgp_key_refresh_retry_delay_exp_base",
"sync_openpgp_key_refresh_retry_delay_mult",
"sync_openpgp_key_refresh_retry_overall_timeout",
+ "sync_rcu_store_dir",
+ "sync_rcu_spare_snapshots",
+ "sync_rcu_ttl_days",
"sync_type", "sync_umask", "sync_uri", 'sync_user')
str_tuple_keys = ("aliases", "eclass_overrides", "force")
repo_config_tuple_keys = ("masters",)
diff --git a/lib/portage/repository/storage/hardlink_rcu.py b/lib/portage/repository/storage/hardlink_rcu.py
new file mode 100644
index 000000000..80cdbb0d7
--- /dev/null
+++ b/lib/portage/repository/storage/hardlink_rcu.py
@@ -0,0 +1,251 @@
+# Copyright 2018 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import datetime
+
+import portage
+from portage import os
+from portage.repository.storage.interface import (
+ RepoStorageException,
+ RepoStorageInterface,
+)
+from portage.util.futures import asyncio
+from portage.util.futures.compat_coroutine import (
+ coroutine,
+ coroutine_return,
+)
+
+from _emerge.SpawnProcess import SpawnProcess
+
+
+class HardlinkRcuRepoStorage(RepoStorageInterface):
+ """
+ Enable read-copy-update (RCU) behavior for sync operations. The
+ current latest immutable version of a repository will be
+ reference by a symlink found where the repository would normally
+ be located. Repository consumers should resolve the cannonical
+ path of this symlink before attempt to access the repository,
+ and all operations should be read-only, since the repository
+ is considered immutable. Updates occur by atomic replacement
+ of the symlink, which causes new consumers to use the new
+ immutable version, while any earlier consumers continue to use
+ the cannonical path that was resolved earlier.
+
+ Performance is better than HardlinkQuarantineRepoStorage,
+ since commit involves atomic replacement of a symlink. Since
+ the symlink usage would require special handling for scenarios
+ involving bind mounts and chroots, this module is not enabled
+ by default.
+
+ repos.conf parameters:
+
+ sync-rcu-store-dir
+
+ Directory path reserved for sync-rcu storage. This
+ directory must have a unique value for each repository
+ (do not set it in the DEFAULT section). This directory
+ must not contain any other files or directories aside
+ from those that are created automatically when sync-rcu
+ is enabled.
+
+ sync-rcu-spare-snapshots = 1
+
+ Number of spare snapshots for sync-rcu to retain with
+ expired ttl. This protects the previous latest snapshot
+ from being removed immediately after a new version
+ becomes available, since it might still be used by
+ running processes.
+
+ sync-rcu-ttl-days = 7
+
+ Number of days for sync-rcu to retain previous immutable
+ snapshots of a repository. After the ttl of a particular
+ snapshot has expired, it will be remove automatically (the
+ latest snapshot is exempt, and sync-rcu-spare-snapshots
+ configures the number of previous snapshots that are
+ exempt). If the ttl is set too low, then a snapshot could
+ expire while it is in use by a running process.
+
+ """
+ def __init__(self, repo, spawn_kwargs):
+ # Note that repo.location cannot substitute for repo.user_location here,
+ # since we manage a symlink that resides at repo.user_location, and
+ # repo.location is the irreversible result of realpath(repo.user_location).
+ self._user_location = repo.user_location
+ self._spawn_kwargs = spawn_kwargs
+
+ if not repo.sync_allow_hardlinks:
+ raise RepoStorageException("repos.conf sync-rcu setting"
+ " for repo '%s' requires that sync-allow-hardlinks be enabled" % repo.name)
+
+ # Raise an exception if repo.sync_rcu_store_dir is unset, since the
+ # user needs to be aware of this location for bind mount and chroot
+ # scenarios
+ if not repo.sync_rcu_store_dir:
+ raise RepoStorageException("repos.conf sync-rcu setting"
+ " for repo '%s' requires that sync-rcu-store-dir be set" % repo.name)
+
+ self._storage_location = repo.sync_rcu_store_dir
+ if repo.sync_rcu_spare_snapshots is None or repo.sync_rcu_spare_snapshots < 0:
+ self._spare_snapshots = 1
+ else:
+ self._spare_snapshots = repo.sync_rcu_spare_snapshots
+ if self._spare_snapshots < 0:
+ self._spare_snapshots = 0
+ if repo.sync_rcu_ttl_days is None or repo.sync_rcu_ttl_days < 0:
+ self._ttl_days = 1
+ else:
+ self._ttl_days = repo.sync_rcu_ttl_days
+ self._update_location = None
+ self._latest_symlink = os.path.join(self._storage_location, 'latest')
+ self._latest_canonical = os.path.realpath(self._latest_symlink)
+ if not os.path.exists(self._latest_canonical) or os.path.islink(self._latest_canonical):
+ # It doesn't exist, or it's a broken symlink.
+ self._latest_canonical = None
+ self._snapshots_dir = os.path.join(self._storage_location, 'snapshots')
+
+ @coroutine
+ def _check_call(self, cmd, privileged=False):
+ """
+ Run cmd and raise RepoStorageException on failure.
+
+ @param cmd: command to executre
+ @type cmd: list
+ @param privileged: run with maximum privileges
+ @type privileged: bool
+ """
+ if privileged:
+ kwargs = dict(fd_pipes=self._spawn_kwargs.get('fd_pipes'))
+ else:
+ kwargs = self._spawn_kwargs
+ p = SpawnProcess(args=cmd, scheduler=asyncio._wrap_loop(), **kwargs)
+ p.start()
+ if (yield p.async_wait()) != os.EX_OK:
+ raise RepoStorageException('command exited with status {}: {}'.\
+ format(p.returncode, ' '.join(cmd)))
+
+ @coroutine
+ def init_update(self):
+ update_location = os.path.join(self._storage_location, 'update')
+ yield self._check_call(['rm', '-rf', update_location])
+
+ # This assumes normal umask permissions if it doesn't exist yet.
+ portage.util.ensure_dirs(self._storage_location)
+
+ if self._latest_canonical is not None:
+ portage.util.ensure_dirs(update_location)
+ portage.util.apply_stat_permissions(update_location,
+ os.stat(self._user_location))
+ # Use rsync --link-dest to hardlink a files into update_location,
+ # since cp -l is not portable.
+ yield self._check_call(['rsync', '-a', '--link-dest', self._latest_canonical,
+ self._latest_canonical + '/', update_location + '/'])
+
+ elif not os.path.islink(self._user_location):
+ yield self._migrate(update_location)
+ update_location = (yield self.init_update())
+
+ self._update_location = update_location
+
+ coroutine_return(self._update_location)
+
+ @coroutine
+ def _migrate(self, update_location):
+ """
+ When repo.user_location is a normal directory, migrate it to
+ storage so that it can be replaced with a symlink. After migration,
+ commit the content as the latest snapshot.
+ """
+ try:
+ os.rename(self._user_location, update_location)
+ except OSError:
+ portage.util.ensure_dirs(update_location)
+ portage.util.apply_stat_permissions(update_location,
+ os.stat(self._user_location))
+ # It's probably on a different device, so copy it.
+ yield self._check_call(['rsync', '-a',
+ self._user_location + '/', update_location + '/'])
+
+ # Remove the old copy so that symlink can be created. Run with
+ # maximum privileges, since removal requires write access to
+ # the parent directory.
+ yield self._check_call(['rm', '-rf', user_location], privileged=True)
+
+ self._update_location = update_location
+
+ # Make this copy the latest snapshot
+ yield self.commit_update()
+
+ @property
+ def current_update(self):
+ if self._update_location is None:
+ raise RepoStorageException('current update does not exist')
+ return self._update_location
+
+ @coroutine
+ def commit_update(self):
+ update_location = self.current_update
+ self._update_location = None
+ try:
+ snapshots = [int(name) for name in os.listdir(self._snapshots_dir)]
+ except OSError:
+ snapshots = []
+ portage.util.ensure_dirs(self._snapshots_dir)
+ portage.util.apply_stat_permissions(self._snapshots_dir,
+ os.stat(self._storage_location))
+ if snapshots:
+ new_id = max(snapshots) + 1
+ else:
+ new_id = 1
+ os.rename(update_location, os.path.join(self._snapshots_dir, str(new_id)))
+ new_symlink = self._latest_symlink + '.new'
+ try:
+ os.unlink(new_symlink)
+ except OSError:
+ pass
+ os.symlink('snapshots/{}'.format(new_id), new_symlink)
+ os.rename(new_symlink, self._latest_symlink)
+
+ try:
+ user_location_correct = os.path.samefile(self._user_location, self._latest_symlink)
+ except OSError:
+ user_location_correct = False
+
+ if not user_location_correct:
+ new_symlink = self._user_location + '.new'
+ try:
+ os.unlink(new_symlink)
+ except OSError:
+ pass
+ os.symlink(self._latest_symlink, new_symlink)
+ os.rename(new_symlink, self._user_location)
+
+ coroutine_return()
+ yield None
+
+ @coroutine
+ def abort_update(self):
+ if self._update_location is not None:
+ update_location = self._update_location
+ self._update_location = None
+ yield self._check_call(['rm', '-rf', update_location])
+
+ @coroutine
+ def garbage_collection(self):
+ snap_ttl = datetime.timedelta(days=self._ttl_days)
+ snapshots = sorted(int(name) for name in os.listdir(self._snapshots_dir))
+ # always preserve the latest snapshot
+ protect_count = self._spare_snapshots + 1
+ while snapshots and protect_count:
+ protect_count -= 1
+ snapshots.pop()
+ for snap_id in snapshots:
+ snap_path = os.path.join(self._snapshots_dir, str(snap_id))
+ try:
+ st = os.stat(snap_path)
+ except OSError:
+ continue
+ snap_timestamp = datetime.datetime.utcfromtimestamp(st.st_mtime)
+ if (datetime.datetime.utcnow() - snap_timestamp) < snap_ttl:
+ continue
+ yield self._check_call(['rm', '-rf', snap_path])
diff --git a/lib/portage/sync/syncbase.py b/lib/portage/sync/syncbase.py
index e9b6ede4e..83b35c667 100644
--- a/lib/portage/sync/syncbase.py
+++ b/lib/portage/sync/syncbase.py
@@ -93,7 +93,9 @@ class SyncBase(object):
@rtype: str
@return: name of the selected repo storage constructor
'''
- if self.repo.sync_allow_hardlinks:
+ if self.repo.sync_rcu:
+ mod_name = 'portage.repository.storage.hardlink_rcu.HardlinkRcuRepoStorage'
+ elif self.repo.sync_allow_hardlinks:
mod_name = 'portage.repository.storage.hardlink_quarantine.HardlinkQuarantineRepoStorage'
else:
mod_name = 'portage.repository.storage.inplace.InplaceRepoStorage'
diff --git a/lib/portage/tests/sync/test_sync_local.py b/lib/portage/tests/sync/test_sync_local.py
index 17ff6f200..49c7a992d 100644
--- a/lib/portage/tests/sync/test_sync_local.py
+++ b/lib/portage/tests/sync/test_sync_local.py
@@ -1,6 +1,7 @@
# Copyright 2014-2015 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
+import datetime
import subprocess
import sys
import textwrap
@@ -42,6 +43,8 @@ class SyncLocalTestCase(TestCase):
location = %(EPREFIX)s/var/repositories/test_repo
sync-type = %(sync-type)s
sync-uri = file://%(EPREFIX)s/var/repositories/test_repo_sync
+ sync-rcu = %(sync-rcu)s
+ sync-rcu-store-dir = %(EPREFIX)s/var/repositories/test_repo_rcu_storedir
auto-sync = %(auto-sync)s
%(repo_extra_keys)s
""")
@@ -88,9 +91,10 @@ class SyncLocalTestCase(TestCase):
committer_email = "gentoo-dev@gentoo.org"
def repos_set_conf(sync_type, dflt_keys=None, xtra_keys=None,
- auto_sync="yes"):
+ auto_sync="yes", sync_rcu=False):
env["PORTAGE_REPOSITORIES"] = repos_conf % {\
"EPREFIX": eprefix, "sync-type": sync_type,
+ "sync-rcu": "yes" if sync_rcu else "no",
"auto-sync": auto_sync,
"default_keys": "" if dflt_keys is None else dflt_keys,
"repo_extra_keys": "" if xtra_keys is None else xtra_keys}
@@ -99,7 +103,18 @@ class SyncLocalTestCase(TestCase):
with open(os.path.join(repo.location + "_sync",
"dev-libs", "A", "A-0.ebuild"), "a") as f:
f.write("\n")
- os.unlink(os.path.join(metadata_dir, 'timestamp.chk'))
+ bump_timestamp()
+
+ def bump_timestamp():
+ bump_timestamp.timestamp += datetime.timedelta(seconds=1)
+ with open(os.path.join(repo.location + '_sync', 'metadata', 'timestamp.chk'), 'w') as f:
+ f.write(bump_timestamp.timestamp.strftime('%s\n' % TIMESTAMP_FORMAT,))
+
+ bump_timestamp.timestamp = datetime.datetime.utcnow()
+
+ bump_timestamp_cmds = (
+ (homedir, bump_timestamp),
+ )
sync_cmds = (
(homedir, cmds["emerge"] + ("--sync",)),
@@ -170,6 +185,18 @@ class SyncLocalTestCase(TestCase):
(homedir, lambda: repos_set_conf("rsync")),
)
+ delete_repo_location = (
+ (homedir, lambda: shutil.rmtree(repo.user_location)),
+ (homedir, lambda: os.mkdir(repo.user_location)),
+ )
+
+ revert_rcu_layout = (
+ (homedir, lambda: os.rename(repo.user_location, repo.user_location + '.bak')),
+ (homedir, lambda: os.rename(os.path.realpath(repo.user_location + '.bak'), repo.user_location)),
+ (homedir, lambda: os.unlink(repo.user_location + '.bak')),
+ (homedir, lambda: shutil.rmtree(repo.user_location + '_rcu_storedir')),
+ )
+
delete_sync_repo = (
(homedir, lambda: shutil.rmtree(
repo.location + "_sync")),
@@ -190,6 +217,10 @@ class SyncLocalTestCase(TestCase):
(homedir, lambda: repos_set_conf("git")),
)
+ sync_rsync_rcu = (
+ (homedir, lambda: repos_set_conf("rsync", sync_rcu=True)),
+ )
+
pythonpath = os.environ.get("PYTHONPATH")
if pythonpath is not None and not pythonpath.strip():
pythonpath = None
@@ -228,7 +259,7 @@ class SyncLocalTestCase(TestCase):
timestamp_path = os.path.join(metadata_dir, 'timestamp.chk')
with open(timestamp_path, 'w') as f:
- f.write(time.strftime('%s\n' % TIMESTAMP_FORMAT, time.gmtime()))
+ f.write(bump_timestamp.timestamp.strftime('%s\n' % TIMESTAMP_FORMAT,))
if debug:
# The subprocess inherits both stdout and stderr, for
@@ -242,6 +273,9 @@ class SyncLocalTestCase(TestCase):
for cwd, cmd in rename_repo + sync_cmds_auto_sync + sync_cmds + \
rsync_opts_repos + rsync_opts_repos_default + \
rsync_opts_repos_default_ovr + rsync_opts_repos_default_cancel + \
+ bump_timestamp_cmds + sync_rsync_rcu + sync_cmds + revert_rcu_layout + \
+ delete_repo_location + sync_cmds + sync_cmds + \
+ bump_timestamp_cmds + sync_cmds + revert_rcu_layout + \
delete_sync_repo + git_repo_create + sync_type_git + \
rename_repo + sync_cmds:
diff --git a/man/portage.5 b/man/portage.5
index c3c610a6c..62943fb76 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -1025,6 +1025,41 @@ If set to true, then sync of a given repository will not trigger postsync
hooks unless hooks would have executed for a master repository or the
repository has changed since the previous sync operation.
.TP
+.B sync\-rcu = yes|no
+Enable read\-copy\-update (RCU) behavior for sync operations. The current
+latest immutable version of a repository will be referenced by a symlink
+found where the repository would normally be located (see the \fBlocation\fR
+setting). Repository consumers should resolve the cannonical path of this
+symlink before attempt to access the repository, and all operations should
+be read\-only, since the repository is considered immutable. Updates occur
+by atomic replacement of the symlink, which causes new consumers to use the
+new immutable version, while any earlier consumers continue to use the
+cannonical path that was resolved earlier. This option requires
+sync\-allow\-hardlinks and sync\-rcu\-store\-dir options to be enabled, and
+currently also requires that sync\-type is set to rsync. This option is
+disabled by default, since the symlink usage would require special handling
+for scenarios involving bind mounts and chroots.
+.TP
+.B sync\-rcu\-store\-dir
+Directory path reserved for sync\-rcu storage. This directory must have a
+unique value for each repository (do not set it in the DEFAULT section).
+This directory must not contain any other files or directories aside from
+those that are created automatically when sync\-rcu is enabled.
+.TP
+.B sync\-rcu\-spare\-snapshots = 1
+Number of spare snapshots for sync\-rcu to retain with expired ttl. This
+protects the previous latest snapshot from being removed immediately after
+a new version becomes available, since it might still be used by running
+processes.
+.TP
+.B sync\-rcu\-ttl\-days = 7
+Number of days for sync\-rcu to retain previous immutable snapshots of
+a repository. After the ttl of a particular snapshot has expired, it
+will be remove automatically (the latest snapshot is exempt, and
+sync\-rcu\-spare\-snapshots configures the number of previous snapshots
+that are exempt). If the ttl is set too low, then a snapshot could
+expire while it is in use by a running process.
+.TP
.B sync\-type
Specifies type of synchronization performed by `emerge \-\-sync`.
.br