diff options
-rw-r--r-- | roverlay/remote/__init__.py | 1 | ||||
-rw-r--r-- | roverlay/remote/basicrepo.py | 245 | ||||
-rw-r--r-- | roverlay/remote/repo.py | 54 | ||||
-rw-r--r-- | roverlay/remote/repolist.py | 55 | ||||
-rw-r--r-- | roverlay/remote/repoloader.py | 66 | ||||
-rw-r--r-- | roverlay/remote/rsync.py | 86 |
6 files changed, 507 insertions, 0 deletions
diff --git a/roverlay/remote/__init__.py b/roverlay/remote/__init__.py new file mode 100644 index 0000000..e7521be --- /dev/null +++ b/roverlay/remote/__init__.py @@ -0,0 +1 @@ +from roverlay.remote.repolist import RepoList diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py new file mode 100644 index 0000000..9ade3a2 --- /dev/null +++ b/roverlay/remote/basicrepo.py @@ -0,0 +1,245 @@ +import os.path + +from roverlay import config +from roverlay.packageinfo import PackageInfo + +URI_SEPARATOR = '://' +DEFAULT_PROTOCOL = 'http' + +LOCALREPO_SRC_URI = 'http://localhost/R-Packages' + +def normalize_uri ( uri, protocol, force_protocol=False ): + + if not protocol: + return uri + + proto, sep, base_uri = uri.partition ( URI_SEPARATOR ) + if sep != URI_SEPARATOR: + return URI_SEPARATOR.join ( ( protocol, uri ) ) + elif force_protocol: + return URI_SEPARATOR.join ( ( protocol, base_uri ) ) + else: + return uri +# --- end of normalize_uri (...) --- + +class LocalRepo ( object ): + """ + This class represents a local repository - all packages are assumed + to exist in its distfiles dir and no remote syncing will occur. + It's the base class for remote repos. + """ + + def __init__ ( self, name, directory=None, src_uri=None ): + """Initializes a LocalRepo. + + arguments: + * name -- + * directory -- distfiles dir, defaults to <DISTFILES root>/<name> + * src_uri -- SRC_URI, defaults to http://localhost/R-Packages/<name> + """ + self.name = name + if directory is None: + self.distdir = os.path.join ( + config.get_or_fail ( [ 'DISTFILES', 'root' ] ), + # subdir repo names like CRAN/contrib are ok, + # but make sure to use the correct path separator + self.name.replace ( '/', os.path.sep ), + ) + else: + self.distdir = directory + + if src_uri is None: + self.src_uri = '/'.join ( ( LOCALREPO_SRC_URI, self.name ) ) + else: + self.src_uri = src_uri + + # --- end of __init__ (...) --- + + def __str__ ( self ): + return "repo '%s': DISTDIR '%s', SRC_URI '%s'" % ( + self.name, self.distdir, self.src_uri + ) + + def get_name ( self ): + """Returns the name of this repository.""" + return self.name + # --- end of get_name (...) --- + + def get_distdir ( self ): + """Returns the distfiles directory of this repository.""" + return self.distdir + # --- end of get_distdir (...) --- + + def get_src_uri ( self, package_file=None ): + """Returns the SRC_URI of this repository. + + arguments: + * package_file -- if set and not None: returns a SRC_URI for this pkg + """ + if package_file is None: + return self.src_uri + else: + return '/'.join ( self.src_uri, package_file ) + # --- end of get_src_uri (...) --- + + # get_src(...) -> get_src_uri(...) + get_src = get_src_uri + + def exists ( self ): + """Returns True if this repo locally exists.""" + return os.path.isdir ( self.distdir ) + # --- end of exists (...) --- + + def nosync ( self ): + """Returns True if the repo is ready for overlay creation, else False. + Useful for basic local distfiles verification without downloading + anything. + """ + return self.exists() + + # --- end of nosync (...) --- + + # sync() -> nosync(), LocalRepos don't have anything to sync + sync = nosync + + def scan_distdir ( self, is_package=None ): + """Generator that scans the local distfiles dir of this repo and + yields PackageInfo instances. + + arguments: + * is_package -- function returning True if the given file is a package + or None which means that all files are packages. + Defaults to None. + """ + if is_package is None: + # unfiltered variant + + for dirpath, dirnames, filenames in os.walk ( self.distdir ): + for pkg in filenames: + yield PackageInfo ( filename=pkg, origin=self ) + + elif hasattr ( is_package, '__call__' ): + # filtered variant (adds an if is_package... before yield) + for dirpath, dirnames, filenames in os.walk ( self.distdir ): + for pkg in filenames: + if is_package ( os.path.join ( dirpath, pkg ) ): + yield PackageInfo ( filename=pkg, origin=self ) + + + else: + # faulty variant, raises Exception + raise Exception ( "is_package should either be None or a function." ) + #yield None + + # --- end of scan_distdir (...) --- + +# --- end of LocalRepo --- + + +class RemoteRepo ( LocalRepo ): + """A template for remote repositories.""" + + def __init__ ( + self, name, sync_proto, + directory=None, + src_uri=None, remote_uri=None, base_uri=None + ): + """Initializes a RemoteRepo. + Mainly consists of URI calculation that derived classes may find useful. + + arguments: + * name -- + * sync_proto -- protocol used for syncing (e.g. 'rsync') + * directory -- + * src_uri -- src uri, if set, else calculated using base/remote uri, + the leading <proto>:// can be left out in which case + http is assumed + * remote_uri -- uri used for syncing, if set, else calculated using + base/src uri, the leading <proto>:// can be left out + * base_uri -- used to calculate remote/src uri, + example: localhost/R-packages/something + + keyword condition: + * | { x : x in union(src,remote,base) and x not None } | >= 1 + ^= at least one out of src/remote/base uri is not None + """ + super ( RemoteRepo, self ) . __init__ ( name, directory, src_uri='' ) + + self.sync_proto = sync_proto + + # detemerine uris + if src_uri is None and remote_uri is None: + if base_uri is None: + # keyword condition not met + raise Exception ( "Bad initialization of RemoteRepo!" ) + + else: + # using base_uri for src,remote + self.src_uri = URI_SEPARATOR.join ( + ( DEFAULT_PROTOCOL, base_uri ) + ) + + self.remote_uri = URI_SEPARATOR.join ( + ( sync_proto, base_uri ) + ) + + elif src_uri is None: + # remote_uri is not None + self.remote_uri = normalize_uri ( remote_uri, self.sync_proto ) + + if base_uri is not None: + # using base_uri for src_uri + self.src_uri = URI_SEPARATOR.join ( + ( DEFAULT_PROTOCOL, base_uri ) + ) + else: + # using remote_uri for src_uri + self.src_uri = normalize_uri ( + self.remote_uri, DEFAULT_PROTOCOL, force_protocol=True + ) + + elif remote_uri is None: + # src_uri is not None + self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL ) + + if base_uri is not None: + # using base_uri for remote_uri + self.remote_uri = URI_SEPARATOR.join ( + ( self.sync_proto, base_uri ) + ) + else: + # using src_uri for remote_uri + self.remote_uri = normalize_uri ( + self.src_uri, self.sync_proto, force_protocol=True + ) + else: + # remote and src not None + self.remote_uri = normalize_uri ( remote_uri, self.sync_proto ) + self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL ) + + # --- end of __init__ (...) --- + + def get_remote_uri ( self ): + """Returns the remote uri of this RemoteRepo which used for syncing.""" + return self.remote_uri + # --- end of get_remote_uri (...) --- + + # get_remote(...) -> get_remote_uri(...) + get_remote = get_remote_uri + + def sync ( self ): + """Gets packages from remote(s) and returns True if the repo is ready + for overlay creation, else False. + + Derived classes have to implement this method. + """ + raise Exception ( "RemoteRepo does not implement sync()." ) + # --- end of sync (...) --- + + def __str__ ( self ): + return "repo '%s': DISTDIR '%s', SRC_URI '%s', REMOTE_URI '%s'" % ( + self.name, self.distdir, self.src_uri, self.remote_uri + ) + +# --- end of RemoteRepo --- + diff --git a/roverlay/remote/repo.py b/roverlay/remote/repo.py new file mode 100644 index 0000000..f54f448 --- /dev/null +++ b/roverlay/remote/repo.py @@ -0,0 +1,54 @@ + +import logging + +#from roverlay.remote.basicrepo import LocalRepo, RemoteRepo +from roverlay.remote.basicrepo import RemoteRepo + +from roverlay.remote.rsync import RsyncJob + +class RsyncRepo ( RemoteRepo ): + + def __init__ ( + self, name, + directory=None, src_uri=None, rsync_uri=None, base_uri=None, + extra_rsync_opts=None + ): + # super's init: name, remote protocol, directory_kw, **uri_kw + # using '' as remote protocol which leaves uris unchanged when + # normalizing them for rsync usage + super ( RsyncRepo, self ) . __init__ ( + name, '', directory=directory, + src_uri=src_uri, remote_uri=rsync_uri, base_uri=base_uri + ) + self.extra_rsync_opts = extra_rsync_opts + # --- end of __init__ (...) --- + + + def sync ( self ): + retcode = None + try: + job = RsyncJob ( + remote=self.remote_uri, distdir=self.distdir, + run_now=True, + extra_opts=self.extra_rsync_opts + ) + if job.returncode == 0: return True + + retcode = job.returncode + except Exception as e: + # catch exceptions, log them and return False + ## TODO: which exceptions to catch||pass? + logging.exception ( e ) + retcode = '<undef>' + + + logging.error ( + 'Repo %s cannot be used for ebuild creation due to errors ' + 'while running rsync (return code was %s).' % ( self.name, retcode ) + ) + return False + # --- end of sync (...) --- + + def __str__ ( self ): + return "rsync repo '%s': DISTDIR '%s', SRC_URI '%s', RSYNC_URI '%s'" \ + % ( self.name, self.distdir, self.src_uri, self.remote_uri ) diff --git a/roverlay/remote/repolist.py b/roverlay/remote/repolist.py new file mode 100644 index 0000000..4617057 --- /dev/null +++ b/roverlay/remote/repolist.py @@ -0,0 +1,55 @@ + +from roverlay import config + +from roverlay.remote.repoloader import read_repofile + +class RepoList ( object ): + + def __init__ ( self ): + self.repos = list() + self.sync_enabled = True + self.use_broken_repos = False + + def sort ( self ): + raise Exception ( "method stub." ) + + def load_file ( self, _file ): + new_repos = read_repofile ( _file ) + if new_repos: + self.repos.extend ( new_repos ) + # --- end of load_file (...) --- + + def load ( self ): + files = config.get_or_fail ( 'REPO.config_files' ) + for f in files: + self.load_file ( f ) + # --- end of load (...) --- + + def sync_all ( self, package_queue=None ): + q = None + if package_queue is None: + q = list() + add = q.append + else: + # TODO: _nowait? raises Exception when queue is full which is + # good in non-threaded execution + # -> timeout,.. + add = q.put + + + # !! TODO resume here. + + for repo in self.repos: + if repo.sync() if self.sync_enabled else repo.nosync(): + # scan repo and create package infos + for p in repo.scan_distdir(): add ( p ) + elif self.use_broken_repos: + # warn and scan repo + ## .. + for p in repo.scan_distdir(): add ( p ) + + # --- end of sync_all (...) --- + + def __str__ ( self ): + return '\n'.join ( ( str ( x ) for x in self.repos ) ) + diff --git a/roverlay/remote/repoloader.py b/roverlay/remote/repoloader.py new file mode 100644 index 0000000..eae35c5 --- /dev/null +++ b/roverlay/remote/repoloader.py @@ -0,0 +1,66 @@ + +import logging + +try: + import configparser +except ImportError as running_python2: + # configparser is named ConfigParser in python2 + import ConfigParser as configparser + + +from roverlay import config + +from roverlay.remote.basicrepo import LocalRepo +from roverlay.remote.repo import RsyncRepo + +LOGGER = logging.getLogger ( 'repoloader' ) + +def read_repofile ( repo_file, lenient=False ): + + parser = configparser.SafeConfigParser ( allow_no_value=False ) + + if lenient: + parser.read ( repo_file ) + else: + fh = None + try: + fh = open ( repo_file, 'r' ) + parser.readfp ( fh ) + finally: + if fh: fh.close() + + repos = list() + + for name in parser.sections(): + + get = lambda a, b=None : parser.get ( name, a, raw=True, fallback=b ) + + repo_type = get ( 'type', 'rsync' ).lower() + + if repo_type == 'local': + repo = LocalRepo ( + name = get ( 'name', name ), + directory = get ( 'directory' ), + src_uri = get ( 'src_uri' ) + ) + elif repo_type == 'rsync': + repo = RsyncRepo ( + name = get ( 'name', name ), + directory = get ( 'directory' ), + src_uri = get ( 'src_uri' ), + rsync_uri = get ( 'rsync_uri' ), + base_uri = get ( 'base_uri' ), + extra_rsync_opts = get ( 'extra_rsync_opts' ) + ) + else: + LOGGER.error ( "Unknown repo type %s for %s" % ( repo_type, name ) ) + continue + + LOGGER.debug ( 'New entry, ' + str ( repo ) ) + + repos.append ( repo ) + repo = None + + + return repos +# --- end of read_repofile (...) --- diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py new file mode 100644 index 0000000..e46d1db --- /dev/null +++ b/roverlay/remote/rsync.py @@ -0,0 +1,86 @@ +import os +import subprocess + +from roverlay import config +from roverlay.util import keepenv + + +RSYNC_ENV = keepenv ( + 'PATH', + 'USER', + 'LOGNAME', + 'RSYNC_PROXY', + 'RSYNC_PASSWORD', +) + + +# --recursive is not in the default opts, subdirs in CRAN/contrib are +# either R release (2.xx.x[-patches] or the package archive) +DEFAULT_RSYNC_OPTS = ( + '--links', # copy symlinks as symlinks, + '--safe-links', # but ignore links outside of tree + '--times', # + '--compress', # FIXME: add lzo if necessary + '--delete', # + '--force', # allow deletion of non-empty dirs + '--human-readable', # + '--stats', # + '--chmod=ugo=r,u+w,Dugo+x', # 0755 for transferred dirs, 0644 for files +) + +class RsyncJob ( object ): + def __init__ ( + self, remote=None, distdir=None, run_now=True, extra_opts=None + ): + self.remote = remote + self.distdir = distdir + self.extra_opts = None + + if run_now: self.run() + # --- end of __init__ (...) --- + + def _rsync_argv ( self ): + if self.remote is None or self.distdir is None: + raise Exception ( "None in (remote,distdir)." ) + + argv = [ 'rsync' ] + + argv.extend ( DEFAULT_RSYNC_OPTS ) + + max_bw = config.get ( 'RSYNC_BWLIMIT', None ) + if max_bw is not None: + argv.append ( '--bwlimit=%i' % max_bw ) + + if self.extra_opts is not None: + if isinstance ( self.extra_opts, str ) or \ + not hasattr ( self.extra_opts, '__iter__' )\ + : + argv.append ( self.extra_opts ) + else: + argv.extend ( self.extra_opts ) + + argv.extend ( ( self.remote, self.distdir ) ) + + return argv + # --- end of _rsync_argv (...) --- + + def run ( self ): + + rsync_cmd = self._rsync_argv() + + os.makedirs ( self.distdir, exist_ok=True ) + + # TODO pipe/log/.., running this in blocking mode until implemented + + proc = subprocess.Popen ( + rsync_cmd, + stdin=None, stdout=None, stderr=None, + env=RSYNC_ENV + ) + + if proc.communicate() != ( None, None ): + raise AssertionError ( "expected None,None from communicate!" ) + + self.returncode = proc.returncode + + # --- end of start (...) --- |