From e06cb6d66db37ac7ab77acf65038b1f770c13c96 Mon Sep 17 00:00:00 2001 From: W-Mark Kubacki Date: Wed, 1 Aug 2012 19:49:34 +0200 Subject: Use If-Modified-Since HTTP-header and avoid downloading a remote index if the local copy is recent enough. --- pym/portage/dbapi/bintree.py | 24 ++++++++++++++++++++--- pym/portage/util/_urlopen.py | 45 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 7 deletions(-) (limited to 'pym') diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py index 9527b0766..16ae8eccf 100644 --- a/pym/portage/dbapi/bintree.py +++ b/pym/portage/dbapi/bintree.py @@ -54,6 +54,11 @@ if sys.hexversion >= 0x3000000: else: _unicode = unicode +class UseCachedCopyOfRemoteIndex(Exception): + # If the local copy is recent enough + # then fetching the remote index can be skipped. + pass + class bindbapi(fakedbapi): _known_keys = frozenset(list(fakedbapi._known_keys) + \ ["CHOST", "repository", "USE"]) @@ -852,6 +857,7 @@ class binarytree(object): if e.errno != errno.ENOENT: raise local_timestamp = pkgindex.header.get("TIMESTAMP", None) + remote_timestamp = None rmt_idx = self._new_pkgindex() proc = None tmp_filename = None @@ -861,8 +867,13 @@ class binarytree(object): # slash, so join manually... url = base_url.rstrip("/") + "/Packages" try: - f = _urlopen(url) - except IOError: + f = _urlopen(url, if_modified_since=local_timestamp) + if hasattr(f, 'headers') and f.headers.get('timestamp', ''): + remote_timestamp = f.headers.get('timestamp') + except IOError as err: + if hasattr(err, 'code') and err.code == 304: # not modified (since local_timestamp) + raise UseCachedCopyOfRemoteIndex() + path = parsed_url.path.rstrip("/") + "/Packages" if parsed_url.scheme == 'sftp': @@ -903,7 +914,8 @@ class binarytree(object): _encodings['repo.content'], errors='replace') try: rmt_idx.readHeader(f_dec) - remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) + if not remote_timestamp: # in case it had not been read from HTTP header + remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) if not remote_timestamp: # no timestamp in the header, something's wrong pkgindex = None @@ -931,6 +943,12 @@ class binarytree(object): writemsg("\n\n!!! %s\n" % \ _("Timed out while closing connection to binhost"), noiselevel=-1) + except UseCachedCopyOfRemoteIndex: + writemsg_stdout("\n") + writemsg_stdout( + colorize("GOOD", _("Local copy of remote index is up-to-date and will be used.")) + \ + "\n") + rmt_idx = pkgindex except EnvironmentError as e: writemsg(_("\n\n!!! Error fetching binhost package" \ " info from '%s'\n") % _hide_url_passwd(base_url)) diff --git a/pym/portage/util/_urlopen.py b/pym/portage/util/_urlopen.py index 307624bc4..42961883d 100644 --- a/pym/portage/util/_urlopen.py +++ b/pym/portage/util/_urlopen.py @@ -2,6 +2,9 @@ # Distributed under the terms of the GNU General Public License v2 import sys +from datetime import datetime +from time import mktime +from email.utils import formatdate, parsedate try: from urllib.request import urlopen as _urlopen @@ -14,15 +17,39 @@ except ImportError: import urllib2 as urllib_request from urllib import splituser as urllib_parse_splituser -def urlopen(url): +if sys.hexversion >= 0x3000000: + long = int + +# to account for the difference between TIMESTAMP of the index' contents +# and the file-'mtime' +TIMESTAMP_TOLERANCE=5 + +def urlopen(url, if_modified_since=None): + parse_result = urllib_parse.urlparse(url) try: - return _urlopen(url) + if parse_result.scheme not in ("http", "https"): + return _urlopen(url) + request = urllib_request.Request(url) + request.add_header('User-Agent', 'Gentoo Portage') + if if_modified_since: + request.add_header('If-Modified-Since', _timestamp_to_http(if_modified_since)) + opener = urllib_request.build_opener() + hdl = opener.open(request) + if hdl.headers.get('last-modified', ''): + try: + add_header = hdl.headers.add_header + except AttributeError: + # Python 2 + add_header = hdl.headers.addheader + add_header('timestamp', _http_to_timestamp(hdl.headers.get('last-modified'))) + return hdl except SystemExit: raise - except Exception: + except Exception as e: + if hasattr(e, 'code') and e.code == 304: # HTTPError 304: not modified + raise if sys.hexversion < 0x3000000: raise - parse_result = urllib_parse.urlparse(url) if parse_result.scheme not in ("http", "https") or \ not parse_result.username: raise @@ -40,3 +67,13 @@ def _new_urlopen(url): auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager) opener = urllib_request.build_opener(auth_handler) return opener.open(url) + +def _timestamp_to_http(timestamp): + dt = datetime.fromtimestamp(float(long(timestamp)+TIMESTAMP_TOLERANCE)) + stamp = mktime(dt.timetuple()) + return formatdate(timeval=stamp, localtime=False, usegmt=True) + +def _http_to_timestamp(http_datetime_string): + tuple = parsedate(http_datetime_string) + timestamp = mktime(tuple) + return str(long(timestamp)) -- cgit v1.2.3-65-gdbad