aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndré Erdmann <dywi@mailerd.de>2012-06-25 18:15:38 (GMT)
committerAndré Erdmann <dywi@mailerd.de>2012-06-25 18:15:38 (GMT)
commit9a0b0c9e1740e88ee2426fefa2c97fb7e629bdb1 (patch)
tree4ed0379f1fadc27d4fcb1472ffd0527e00899045
parentadd repo config entries to the config (diff)
downloadR_overlay-9a0b0c9e.zip
R_overlay-9a0b0c9e.tar.gz
R_overlay-9a0b0c9e.tar.bz2
the remote module (used for repo syncing)
* this modules handles repositories, both local (directory) and remote (currently only rsync) * some parts, mainly integration into roverlay, are todo new file: roverlay/remote/__init__.py new file: roverlay/remote/basicrepo.py new file: roverlay/remote/repo.py new file: roverlay/remote/repolist.py new file: roverlay/remote/repoloader.py new file: roverlay/remote/rsync.py
-rw-r--r--roverlay/remote/__init__.py1
-rw-r--r--roverlay/remote/basicrepo.py245
-rw-r--r--roverlay/remote/repo.py54
-rw-r--r--roverlay/remote/repolist.py55
-rw-r--r--roverlay/remote/repoloader.py66
-rw-r--r--roverlay/remote/rsync.py86
6 files changed, 507 insertions, 0 deletions
diff --git a/roverlay/remote/__init__.py b/roverlay/remote/__init__.py
new file mode 100644
index 0000000..e7521be
--- /dev/null
+++ b/roverlay/remote/__init__.py
@@ -0,0 +1 @@
+from roverlay.remote.repolist import RepoList
diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py
new file mode 100644
index 0000000..9ade3a2
--- /dev/null
+++ b/roverlay/remote/basicrepo.py
@@ -0,0 +1,245 @@
+import os.path
+
+from roverlay import config
+from roverlay.packageinfo import PackageInfo
+
+URI_SEPARATOR = '://'
+DEFAULT_PROTOCOL = 'http'
+
+LOCALREPO_SRC_URI = 'http://localhost/R-Packages'
+
+def normalize_uri ( uri, protocol, force_protocol=False ):
+
+ if not protocol:
+ return uri
+
+ proto, sep, base_uri = uri.partition ( URI_SEPARATOR )
+ if sep != URI_SEPARATOR:
+ return URI_SEPARATOR.join ( ( protocol, uri ) )
+ elif force_protocol:
+ return URI_SEPARATOR.join ( ( protocol, base_uri ) )
+ else:
+ return uri
+# --- end of normalize_uri (...) ---
+
+class LocalRepo ( object ):
+ """
+ This class represents a local repository - all packages are assumed
+ to exist in its distfiles dir and no remote syncing will occur.
+ It's the base class for remote repos.
+ """
+
+ def __init__ ( self, name, directory=None, src_uri=None ):
+ """Initializes a LocalRepo.
+
+ arguments:
+ * name --
+ * directory -- distfiles dir, defaults to <DISTFILES root>/<name>
+ * src_uri -- SRC_URI, defaults to http://localhost/R-Packages/<name>
+ """
+ self.name = name
+ if directory is None:
+ self.distdir = os.path.join (
+ config.get_or_fail ( [ 'DISTFILES', 'root' ] ),
+ # subdir repo names like CRAN/contrib are ok,
+ # but make sure to use the correct path separator
+ self.name.replace ( '/', os.path.sep ),
+ )
+ else:
+ self.distdir = directory
+
+ if src_uri is None:
+ self.src_uri = '/'.join ( ( LOCALREPO_SRC_URI, self.name ) )
+ else:
+ self.src_uri = src_uri
+
+ # --- end of __init__ (...) ---
+
+ def __str__ ( self ):
+ return "repo '%s': DISTDIR '%s', SRC_URI '%s'" % (
+ self.name, self.distdir, self.src_uri
+ )
+
+ def get_name ( self ):
+ """Returns the name of this repository."""
+ return self.name
+ # --- end of get_name (...) ---
+
+ def get_distdir ( self ):
+ """Returns the distfiles directory of this repository."""
+ return self.distdir
+ # --- end of get_distdir (...) ---
+
+ def get_src_uri ( self, package_file=None ):
+ """Returns the SRC_URI of this repository.
+
+ arguments:
+ * package_file -- if set and not None: returns a SRC_URI for this pkg
+ """
+ if package_file is None:
+ return self.src_uri
+ else:
+ return '/'.join ( self.src_uri, package_file )
+ # --- end of get_src_uri (...) ---
+
+ # get_src(...) -> get_src_uri(...)
+ get_src = get_src_uri
+
+ def exists ( self ):
+ """Returns True if this repo locally exists."""
+ return os.path.isdir ( self.distdir )
+ # --- end of exists (...) ---
+
+ def nosync ( self ):
+ """Returns True if the repo is ready for overlay creation, else False.
+ Useful for basic local distfiles verification without downloading
+ anything.
+ """
+ return self.exists()
+
+ # --- end of nosync (...) ---
+
+ # sync() -> nosync(), LocalRepos don't have anything to sync
+ sync = nosync
+
+ def scan_distdir ( self, is_package=None ):
+ """Generator that scans the local distfiles dir of this repo and
+ yields PackageInfo instances.
+
+ arguments:
+ * is_package -- function returning True if the given file is a package
+ or None which means that all files are packages.
+ Defaults to None.
+ """
+ if is_package is None:
+ # unfiltered variant
+
+ for dirpath, dirnames, filenames in os.walk ( self.distdir ):
+ for pkg in filenames:
+ yield PackageInfo ( filename=pkg, origin=self )
+
+ elif hasattr ( is_package, '__call__' ):
+ # filtered variant (adds an if is_package... before yield)
+ for dirpath, dirnames, filenames in os.walk ( self.distdir ):
+ for pkg in filenames:
+ if is_package ( os.path.join ( dirpath, pkg ) ):
+ yield PackageInfo ( filename=pkg, origin=self )
+
+
+ else:
+ # faulty variant, raises Exception
+ raise Exception ( "is_package should either be None or a function." )
+ #yield None
+
+ # --- end of scan_distdir (...) ---
+
+# --- end of LocalRepo ---
+
+
+class RemoteRepo ( LocalRepo ):
+ """A template for remote repositories."""
+
+ def __init__ (
+ self, name, sync_proto,
+ directory=None,
+ src_uri=None, remote_uri=None, base_uri=None
+ ):
+ """Initializes a RemoteRepo.
+ Mainly consists of URI calculation that derived classes may find useful.
+
+ arguments:
+ * name --
+ * sync_proto -- protocol used for syncing (e.g. 'rsync')
+ * directory --
+ * src_uri -- src uri, if set, else calculated using base/remote uri,
+ the leading <proto>:// can be left out in which case
+ http is assumed
+ * remote_uri -- uri used for syncing, if set, else calculated using
+ base/src uri, the leading <proto>:// can be left out
+ * base_uri -- used to calculate remote/src uri,
+ example: localhost/R-packages/something
+
+ keyword condition:
+ * | { x : x in union(src,remote,base) and x not None } | >= 1
+ ^= at least one out of src/remote/base uri is not None
+ """
+ super ( RemoteRepo, self ) . __init__ ( name, directory, src_uri='' )
+
+ self.sync_proto = sync_proto
+
+ # detemerine uris
+ if src_uri is None and remote_uri is None:
+ if base_uri is None:
+ # keyword condition not met
+ raise Exception ( "Bad initialization of RemoteRepo!" )
+
+ else:
+ # using base_uri for src,remote
+ self.src_uri = URI_SEPARATOR.join (
+ ( DEFAULT_PROTOCOL, base_uri )
+ )
+
+ self.remote_uri = URI_SEPARATOR.join (
+ ( sync_proto, base_uri )
+ )
+
+ elif src_uri is None:
+ # remote_uri is not None
+ self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
+
+ if base_uri is not None:
+ # using base_uri for src_uri
+ self.src_uri = URI_SEPARATOR.join (
+ ( DEFAULT_PROTOCOL, base_uri )
+ )
+ else:
+ # using remote_uri for src_uri
+ self.src_uri = normalize_uri (
+ self.remote_uri, DEFAULT_PROTOCOL, force_protocol=True
+ )
+
+ elif remote_uri is None:
+ # src_uri is not None
+ self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
+
+ if base_uri is not None:
+ # using base_uri for remote_uri
+ self.remote_uri = URI_SEPARATOR.join (
+ ( self.sync_proto, base_uri )
+ )
+ else:
+ # using src_uri for remote_uri
+ self.remote_uri = normalize_uri (
+ self.src_uri, self.sync_proto, force_protocol=True
+ )
+ else:
+ # remote and src not None
+ self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
+ self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
+
+ # --- end of __init__ (...) ---
+
+ def get_remote_uri ( self ):
+ """Returns the remote uri of this RemoteRepo which used for syncing."""
+ return self.remote_uri
+ # --- end of get_remote_uri (...) ---
+
+ # get_remote(...) -> get_remote_uri(...)
+ get_remote = get_remote_uri
+
+ def sync ( self ):
+ """Gets packages from remote(s) and returns True if the repo is ready
+ for overlay creation, else False.
+
+ Derived classes have to implement this method.
+ """
+ raise Exception ( "RemoteRepo does not implement sync()." )
+ # --- end of sync (...) ---
+
+ def __str__ ( self ):
+ return "repo '%s': DISTDIR '%s', SRC_URI '%s', REMOTE_URI '%s'" % (
+ self.name, self.distdir, self.src_uri, self.remote_uri
+ )
+
+# --- end of RemoteRepo ---
+
diff --git a/roverlay/remote/repo.py b/roverlay/remote/repo.py
new file mode 100644
index 0000000..f54f448
--- /dev/null
+++ b/roverlay/remote/repo.py
@@ -0,0 +1,54 @@
+
+import logging
+
+#from roverlay.remote.basicrepo import LocalRepo, RemoteRepo
+from roverlay.remote.basicrepo import RemoteRepo
+
+from roverlay.remote.rsync import RsyncJob
+
+class RsyncRepo ( RemoteRepo ):
+
+ def __init__ (
+ self, name,
+ directory=None, src_uri=None, rsync_uri=None, base_uri=None,
+ extra_rsync_opts=None
+ ):
+ # super's init: name, remote protocol, directory_kw, **uri_kw
+ # using '' as remote protocol which leaves uris unchanged when
+ # normalizing them for rsync usage
+ super ( RsyncRepo, self ) . __init__ (
+ name, '', directory=directory,
+ src_uri=src_uri, remote_uri=rsync_uri, base_uri=base_uri
+ )
+ self.extra_rsync_opts = extra_rsync_opts
+ # --- end of __init__ (...) ---
+
+
+ def sync ( self ):
+ retcode = None
+ try:
+ job = RsyncJob (
+ remote=self.remote_uri, distdir=self.distdir,
+ run_now=True,
+ extra_opts=self.extra_rsync_opts
+ )
+ if job.returncode == 0: return True
+
+ retcode = job.returncode
+ except Exception as e:
+ # catch exceptions, log them and return False
+ ## TODO: which exceptions to catch||pass?
+ logging.exception ( e )
+ retcode = '<undef>'
+
+
+ logging.error (
+ 'Repo %s cannot be used for ebuild creation due to errors '
+ 'while running rsync (return code was %s).' % ( self.name, retcode )
+ )
+ return False
+ # --- end of sync (...) ---
+
+ def __str__ ( self ):
+ return "rsync repo '%s': DISTDIR '%s', SRC_URI '%s', RSYNC_URI '%s'" \
+ % ( self.name, self.distdir, self.src_uri, self.remote_uri )
diff --git a/roverlay/remote/repolist.py b/roverlay/remote/repolist.py
new file mode 100644
index 0000000..4617057
--- /dev/null
+++ b/roverlay/remote/repolist.py
@@ -0,0 +1,55 @@
+
+from roverlay import config
+
+from roverlay.remote.repoloader import read_repofile
+
+class RepoList ( object ):
+
+ def __init__ ( self ):
+ self.repos = list()
+ self.sync_enabled = True
+ self.use_broken_repos = False
+
+ def sort ( self ):
+ raise Exception ( "method stub." )
+
+ def load_file ( self, _file ):
+ new_repos = read_repofile ( _file )
+ if new_repos:
+ self.repos.extend ( new_repos )
+ # --- end of load_file (...) ---
+
+ def load ( self ):
+ files = config.get_or_fail ( 'REPO.config_files' )
+ for f in files:
+ self.load_file ( f )
+ # --- end of load (...) ---
+
+ def sync_all ( self, package_queue=None ):
+ q = None
+ if package_queue is None:
+ q = list()
+ add = q.append
+ else:
+ # TODO: _nowait? raises Exception when queue is full which is
+ # good in non-threaded execution
+ # -> timeout,..
+ add = q.put
+
+
+ # !! TODO resume here.
+
+ for repo in self.repos:
+ if repo.sync() if self.sync_enabled else repo.nosync():
+ # scan repo and create package infos
+ for p in repo.scan_distdir(): add ( p )
+ elif self.use_broken_repos:
+ # warn and scan repo
+ ## ..
+ for p in repo.scan_distdir(): add ( p )
+
+ # --- end of sync_all (...) ---
+
+ def __str__ ( self ):
+ return '\n'.join ( ( str ( x ) for x in self.repos ) )
+
diff --git a/roverlay/remote/repoloader.py b/roverlay/remote/repoloader.py
new file mode 100644
index 0000000..eae35c5
--- /dev/null
+++ b/roverlay/remote/repoloader.py
@@ -0,0 +1,66 @@
+
+import logging
+
+try:
+ import configparser
+except ImportError as running_python2:
+ # configparser is named ConfigParser in python2
+ import ConfigParser as configparser
+
+
+from roverlay import config
+
+from roverlay.remote.basicrepo import LocalRepo
+from roverlay.remote.repo import RsyncRepo
+
+LOGGER = logging.getLogger ( 'repoloader' )
+
+def read_repofile ( repo_file, lenient=False ):
+
+ parser = configparser.SafeConfigParser ( allow_no_value=False )
+
+ if lenient:
+ parser.read ( repo_file )
+ else:
+ fh = None
+ try:
+ fh = open ( repo_file, 'r' )
+ parser.readfp ( fh )
+ finally:
+ if fh: fh.close()
+
+ repos = list()
+
+ for name in parser.sections():
+
+ get = lambda a, b=None : parser.get ( name, a, raw=True, fallback=b )
+
+ repo_type = get ( 'type', 'rsync' ).lower()
+
+ if repo_type == 'local':
+ repo = LocalRepo (
+ name = get ( 'name', name ),
+ directory = get ( 'directory' ),
+ src_uri = get ( 'src_uri' )
+ )
+ elif repo_type == 'rsync':
+ repo = RsyncRepo (
+ name = get ( 'name', name ),
+ directory = get ( 'directory' ),
+ src_uri = get ( 'src_uri' ),
+ rsync_uri = get ( 'rsync_uri' ),
+ base_uri = get ( 'base_uri' ),
+ extra_rsync_opts = get ( 'extra_rsync_opts' )
+ )
+ else:
+ LOGGER.error ( "Unknown repo type %s for %s" % ( repo_type, name ) )
+ continue
+
+ LOGGER.debug ( 'New entry, ' + str ( repo ) )
+
+ repos.append ( repo )
+ repo = None
+
+
+ return repos
+# --- end of read_repofile (...) ---
diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py
new file mode 100644
index 0000000..e46d1db
--- /dev/null
+++ b/roverlay/remote/rsync.py
@@ -0,0 +1,86 @@
+import os
+import subprocess
+
+from roverlay import config
+from roverlay.util import keepenv
+
+
+RSYNC_ENV = keepenv (
+ 'PATH',
+ 'USER',
+ 'LOGNAME',
+ 'RSYNC_PROXY',
+ 'RSYNC_PASSWORD',
+)
+
+
+# --recursive is not in the default opts, subdirs in CRAN/contrib are
+# either R release (2.xx.x[-patches] or the package archive)
+DEFAULT_RSYNC_OPTS = (
+ '--links', # copy symlinks as symlinks,
+ '--safe-links', # but ignore links outside of tree
+ '--times', #
+ '--compress', # FIXME: add lzo if necessary
+ '--delete', #
+ '--force', # allow deletion of non-empty dirs
+ '--human-readable', #
+ '--stats', #
+ '--chmod=ugo=r,u+w,Dugo+x', # 0755 for transferred dirs, 0644 for files
+)
+
+class RsyncJob ( object ):
+ def __init__ (
+ self, remote=None, distdir=None, run_now=True, extra_opts=None
+ ):
+ self.remote = remote
+ self.distdir = distdir
+ self.extra_opts = None
+
+ if run_now: self.run()
+ # --- end of __init__ (...) ---
+
+ def _rsync_argv ( self ):
+ if self.remote is None or self.distdir is None:
+ raise Exception ( "None in (remote,distdir)." )
+
+ argv = [ 'rsync' ]
+
+ argv.extend ( DEFAULT_RSYNC_OPTS )
+
+ max_bw = config.get ( 'RSYNC_BWLIMIT', None )
+ if max_bw is not None:
+ argv.append ( '--bwlimit=%i' % max_bw )
+
+ if self.extra_opts is not None:
+ if isinstance ( self.extra_opts, str ) or \
+ not hasattr ( self.extra_opts, '__iter__' )\
+ :
+ argv.append ( self.extra_opts )
+ else:
+ argv.extend ( self.extra_opts )
+
+ argv.extend ( ( self.remote, self.distdir ) )
+
+ return argv
+ # --- end of _rsync_argv (...) ---
+
+ def run ( self ):
+
+ rsync_cmd = self._rsync_argv()
+
+ os.makedirs ( self.distdir, exist_ok=True )
+
+ # TODO pipe/log/.., running this in blocking mode until implemented
+
+ proc = subprocess.Popen (
+ rsync_cmd,
+ stdin=None, stdout=None, stderr=None,
+ env=RSYNC_ENV
+ )
+
+ if proc.communicate() != ( None, None ):
+ raise AssertionError ( "expected None,None from communicate!" )
+
+ self.returncode = proc.returncode
+
+ # --- end of start (...) ---