aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'pym/getbinpkg.py')
-rw-r--r--pym/getbinpkg.py541
1 files changed, 541 insertions, 0 deletions
diff --git a/pym/getbinpkg.py b/pym/getbinpkg.py
new file mode 100644
index 000000000..7145d3adb
--- /dev/null
+++ b/pym/getbinpkg.py
@@ -0,0 +1,541 @@
+# getbinpkg.py -- Portage binary-package helper functions
+# Copyright 2003-2004 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-src/portage/pym/getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $
+cvs_id_string="$Id: getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $"[5:-2]
+
+from output import *
+import htmllib,HTMLParser,string,formatter,sys,os,xpak,time,tempfile,cPickle,base64
+
+try:
+ import ftplib
+except SystemExit, e:
+ raise
+except Exception, e:
+ sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
+
+try:
+ import httplib
+except SystemExit, e:
+ raise
+except Exception, e:
+ sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n")
+
+def make_metadata_dict(data):
+ myid,myglob = data
+
+ mydict = {}
+ for x in xpak.getindex_mem(myid):
+ mydict[x] = xpak.getitem(data,x)
+
+ return mydict
+
+class ParseLinks(HTMLParser.HTMLParser):
+ """Parser class that overrides HTMLParser to grab all anchors from an html
+ page and provide suffix and prefix limitors"""
+ def __init__(self):
+ self.PL_anchors = []
+ HTMLParser.HTMLParser.__init__(self)
+
+ def get_anchors(self):
+ return self.PL_anchors
+
+ def get_anchors_by_prefix(self,prefix):
+ newlist = []
+ for x in self.PL_anchors:
+ if (len(x) >= len(prefix)) and (x[:len(suffix)] == prefix):
+ if x not in newlist:
+ newlist.append(x[:])
+ return newlist
+
+ def get_anchors_by_suffix(self,suffix):
+ newlist = []
+ for x in self.PL_anchors:
+ if (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
+ if x not in newlist:
+ newlist.append(x[:])
+ return newlist
+
+ def handle_endtag(self,tag):
+ pass
+
+ def handle_starttag(self,tag,attrs):
+ if tag == "a":
+ for x in attrs:
+ if x[0] == 'href':
+ if x[1] not in self.PL_anchors:
+ self.PL_anchors.append(x[1])
+
+
+def create_conn(baseurl,conn=None):
+ """(baseurl,conn) --- Takes a protocol://site:port/address url, and an
+ optional connection. If connection is already active, it is passed on.
+ baseurl is reduced to address and is returned in tuple (conn,address)"""
+ parts = string.split(baseurl, "://", 1)
+ if len(parts) != 2:
+ raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl
+ protocol,url_parts = parts
+ del parts
+ host,address = string.split(url_parts, "/", 1)
+ del url_parts
+ address = "/"+address
+
+ userpass_host = string.split(host, "@", 1)
+ if len(userpass_host) == 1:
+ host = userpass_host[0]
+ userpass = ["anonymous"]
+ else:
+ host = userpass_host[1]
+ userpass = string.split(userpass_host[0], ":")
+ del userpass_host
+
+ if len(userpass) > 2:
+ raise ValueError, "Unable to interpret username/password provided."
+ elif len(userpass) == 2:
+ username = userpass[0]
+ password = userpass[1]
+ elif len(userpass) == 1:
+ username = userpass[0]
+ password = None
+ del userpass
+
+ http_headers = {}
+ http_params = {}
+ if username and password:
+ http_headers = {
+ "Authorization": "Basic %s" %
+ string.replace(
+ base64.encodestring("%s:%s" % (username, password)),
+ "\012",
+ ""
+ ),
+ }
+
+ if not conn:
+ if protocol == "https":
+ conn = httplib.HTTPSConnection(host)
+ elif protocol == "http":
+ conn = httplib.HTTPConnection(host)
+ elif protocol == "ftp":
+ passive = 1
+ if(host[-1] == "*"):
+ passive = 0
+ host = host[:-1]
+ conn = ftplib.FTP(host)
+ if password:
+ conn.login(username,password)
+ else:
+ sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n")
+ conn.login(username)
+ conn.set_pasv(passive)
+ conn.set_debuglevel(0)
+ else:
+ raise NotImplementedError, "%s is not a supported protocol." % protocol
+
+ return (conn,protocol,address, http_params, http_headers)
+
+def make_ftp_request(conn, address, rest=None, dest=None):
+ """(conn,address,rest) --- uses the conn object to request the data
+ from address and issuing a rest if it is passed."""
+ try:
+
+ if dest:
+ fstart_pos = dest.tell()
+
+ conn.voidcmd("TYPE I")
+ fsize = conn.size(address)
+
+ if (rest != None) and (rest < 0):
+ rest = fsize+int(rest)
+ if rest < 0:
+ rest = 0
+
+ if rest != None:
+ mysocket = conn.transfercmd("RETR "+str(address), rest)
+ else:
+ mysocket = conn.transfercmd("RETR "+str(address))
+
+ mydata = ""
+ while 1:
+ somedata = mysocket.recv(8192)
+ if somedata:
+ if dest:
+ dest.write(somedata)
+ else:
+ mydata = mydata + somedata
+ else:
+ break
+
+ if dest:
+ data_size = fstart_pos - dest.tell()
+ else:
+ data_size = len(mydata)
+
+ mysocket.close()
+ conn.voidresp()
+ conn.voidcmd("TYPE A")
+
+ return mydata,not (fsize==data_size),""
+
+ except ValueError, e:
+ return None,int(str(e)[:4]),str(e)
+
+
+def make_http_request(conn, address, params={}, headers={}, dest=None):
+ """(conn,address,params,headers) --- uses the conn object to request
+ the data from address, performing Location forwarding and using the
+ optional params and headers."""
+
+ rc = 0
+ response = None
+ while (rc == 0) or (rc == 301) or (rc == 302):
+ try:
+ if (rc != 0):
+ conn,ignore,ignore,ignore,ignore = create_conn(address)
+ conn.request("GET", address, params, headers)
+ except SystemExit, e:
+ raise
+ except Exception, e:
+ return None,None,"Server request failed: "+str(e)
+ response = conn.getresponse()
+ rc = response.status
+
+ # 301 means that the page address is wrong.
+ if ((rc == 301) or (rc == 302)):
+ ignored_data = response.read()
+ del ignored_data
+ for x in string.split(str(response.msg), "\n"):
+ parts = string.split(x, ": ", 1)
+ if parts[0] == "Location":
+ if (rc == 301):
+ sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n")
+ if (rc == 302):
+ sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n")
+ address = parts[1]
+ break
+
+ if (rc != 200) and (rc != 206):
+ sys.stderr.write(str(response.msg)+"\n")
+ sys.stderr.write(response.read()+"\n")
+ sys.stderr.write("address: "+address+"\n")
+ return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
+
+ if dest:
+ dest.write(response.read())
+ return "",0,""
+
+ return response.read(),0,""
+
+
+def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
+ myarray = []
+
+ if not (prefix and suffix):
+ match_both = 0
+
+ for x in array:
+ add_p = 0
+ if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
+ add_p = 1
+
+ if match_both:
+ if prefix and not add_p: # Require both, but don't have first one.
+ continue
+ else:
+ if add_p: # Only need one, and we have it.
+ myarray.append(x[:])
+ continue
+
+ if not allow_overlap: # Not allow to overlap prefix and suffix
+ if len(x) >= (len(prefix)+len(suffix)):
+ y = x[len(prefix):]
+ else:
+ continue # Too short to match.
+ else:
+ y = x # Do whatever... We're overlapping.
+
+ if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
+ myarray.append(x) # It matches
+ else:
+ continue # Doesn't match.
+
+ return myarray
+
+
+
+def dir_get_list(baseurl,conn=None):
+ """(baseurl[,connection]) -- Takes a base url to connect to and read from.
+ URL should be in the for <proto>://<site>[:port]<path>
+ Connection is used for persistent connection instances."""
+
+ if not conn:
+ keepconnection = 0
+ else:
+ keepconnection = 1
+
+ conn,protocol,address,params,headers = create_conn(baseurl, conn)
+
+ listing = None
+ if protocol in ["http","https"]:
+ page,rc,msg = make_http_request(conn,address,params,headers)
+
+ if page:
+ parser = ParseLinks()
+ parser.feed(page)
+ del page
+ listing = parser.get_anchors()
+ else:
+ raise Exception, "Unable to get listing: %s %s" % (rc,msg)
+ elif protocol in ["ftp"]:
+ if address[-1] == '/':
+ olddir = conn.pwd()
+ conn.cwd(address)
+ listing = conn.nlst()
+ conn.cwd(olddir)
+ del olddir
+ else:
+ listing = conn.nlst(address)
+ else:
+ raise TypeError, "Unknown protocol. '%s'" % protocol
+
+ if not keepconnection:
+ conn.close()
+
+ return listing
+
+def file_get_metadata(baseurl,conn=None, chunk_size=3000):
+ """(baseurl[,connection]) -- Takes a base url to connect to and read from.
+ URL should be in the for <proto>://<site>[:port]<path>
+ Connection is used for persistent connection instances."""
+
+ if not conn:
+ keepconnection = 0
+ else:
+ keepconnection = 1
+
+ conn,protocol,address,params,headers = create_conn(baseurl, conn)
+
+ if protocol in ["http","https"]:
+ headers["Range"] = "bytes=-"+str(chunk_size)
+ data,rc,msg = make_http_request(conn, address, params, headers)
+ elif protocol in ["ftp"]:
+ data,rc,msg = make_ftp_request(conn, address, -chunk_size)
+ else:
+ raise TypeError, "Unknown protocol. '%s'" % protocol
+
+ if data:
+ xpaksize = xpak.decodeint(data[-8:-4])
+ if (xpaksize+8) > chunk_size:
+ myid = file_get_metadata(baseurl, conn, (xpaksize+8))
+ if not keepconnection:
+ conn.close()
+ return myid
+ else:
+ xpak_data = data[len(data)-(xpaksize+8):-8]
+ del data
+
+ myid = xpak.xsplit_mem(xpak_data)
+ if not myid:
+ myid = None,None
+ del xpak_data
+ else:
+ myid = None,None
+
+ if not keepconnection:
+ conn.close()
+
+ return myid
+
+
+def file_get(baseurl,dest,conn=None,fcmd=None):
+ """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
+ URL should be in the for <proto>://[user[:pass]@]<site>[:port]<path>"""
+
+ if not fcmd:
+ return file_get_lib(baseurl,dest,conn)
+
+ fcmd = string.replace(fcmd, "${DISTDIR}", dest)
+ fcmd = string.replace(fcmd, "${URI}", baseurl)
+ fcmd = string.replace(fcmd, "${FILE}", os.path.basename(baseurl))
+ mysplit = string.split(fcmd)
+ mycmd = mysplit[0]
+ myargs = [os.path.basename(mycmd)]+mysplit[1:]
+ mypid=os.fork()
+ if mypid == 0:
+ os.execv(mycmd,myargs)
+ sys.stderr.write("!!! Failed to spawn fetcher.\n")
+ sys.exit(1)
+ retval=os.waitpid(mypid,0)[1]
+ if (retval & 0xff) == 0:
+ retval = retval >> 8
+ else:
+ sys.stderr.write("Spawned processes caught a signal.\n")
+ sys.exit(1)
+ if retval != 0:
+ sys.stderr.write("Fetcher exited with a failure condition.\n")
+ return 0
+ return 1
+
+def file_get_lib(baseurl,dest,conn=None):
+ """(baseurl[,connection]) -- Takes a base url to connect to and read from.
+ URL should be in the for <proto>://<site>[:port]<path>
+ Connection is used for persistent connection instances."""
+
+ if not conn:
+ keepconnection = 0
+ else:
+ keepconnection = 1
+
+ conn,protocol,address,params,headers = create_conn(baseurl, conn)
+
+ sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
+ if protocol in ["http","https"]:
+ data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
+ elif protocol in ["ftp"]:
+ data,rc,msg = make_ftp_request(conn, address, dest=dest)
+ else:
+ raise TypeError, "Unknown protocol. '%s'" % protocol
+
+ if not keepconnection:
+ conn.close()
+
+ return rc
+
+
+def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
+ """(baseurl,conn,chunk_size,verbose) --
+ """
+ if not conn:
+ keepconnection = 0
+ else:
+ keepconnection = 1
+
+ if makepickle == None:
+ makepickle = "/var/cache/edb/metadata.idx.most_recent"
+
+ conn,protocol,address,params,headers = create_conn(baseurl, conn)
+
+ filedict = {}
+
+ try:
+ metadatafile = open("/var/cache/edb/remote_metadata.pickle")
+ metadata = cPickle.load(metadatafile)
+ sys.stderr.write("Loaded metadata pickle.\n")
+ metadatafile.close()
+ except SystemExit, e:
+ raise
+ except:
+ metadata = {}
+ if not metadata.has_key(baseurl):
+ metadata[baseurl]={}
+ if not metadata[baseurl].has_key("indexname"):
+ metadata[baseurl]["indexname"]=""
+ if not metadata[baseurl].has_key("timestamp"):
+ metadata[baseurl]["timestamp"]=0
+ if not metadata[baseurl].has_key("unmodified"):
+ metadata[baseurl]["unmodified"]=0
+ if not metadata[baseurl].has_key("data"):
+ metadata[baseurl]["data"]={}
+
+ filelist = dir_get_list(baseurl, conn)
+ tbz2list = match_in_array(filelist, suffix=".tbz2")
+ metalist = match_in_array(filelist, prefix="metadata.idx")
+ del filelist
+
+ # Determine if our metadata file is current.
+ metalist.sort()
+ metalist.reverse() # makes the order new-to-old.
+ havecache=0
+ for mfile in metalist:
+ if usingcache and \
+ ((metadata[baseurl]["indexname"] != mfile) or \
+ (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
+ # Try to download new cache until we succeed on one.
+ data=""
+ for trynum in [1,2,3]:
+ mytempfile = tempfile.TemporaryFile()
+ try:
+ file_get(baseurl+"/"+mfile, mytempfile, conn)
+ if mytempfile.tell() > len(data):
+ mytempfile.seek(0)
+ data = mytempfile.read()
+ except ValueError, e:
+ sys.stderr.write("--- "+str(e)+"\n")
+ if trynum < 3:
+ sys.stderr.write("Retrying...\n")
+ mytempfile.close()
+ continue
+ if match_in_array([mfile],suffix=".gz"):
+ sys.stderr.write("gzip'd\n")
+ try:
+ import gzip
+ mytempfile.seek(0)
+ gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
+ data = gzindex.read()
+ except SystemExit, e:
+ raise
+ except Exception, e:
+ mytempfile.close()
+ sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n")
+ mytempfile.close()
+ try:
+ metadata[baseurl]["data"] = cPickle.loads(data)
+ del data
+ metadata[baseurl]["indexname"] = mfile
+ metadata[baseurl]["timestamp"] = int(time.time())
+ metadata[baseurl]["modified"] = 0 # It's not, right after download.
+ sys.stderr.write("Pickle loaded.\n")
+ break
+ except SystemExit, e:
+ raise
+ except Exception, e:
+ sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n")
+ sys.stderr.write("!!! "+str(e)+"\n")
+ try:
+ metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
+ cPickle.dump(metadata,metadatafile)
+ metadatafile.close()
+ except SystemExit, e:
+ raise
+ except Exception, e:
+ sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
+ sys.stderr.write("!!! "+str(e)+"\n")
+ break
+ # We may have metadata... now we run through the tbz2 list and check.
+ sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n")
+ for x in tbz2list:
+ x = os.path.basename(x)
+ if ((not metadata[baseurl]["data"].has_key(x)) or \
+ (x not in metadata[baseurl]["data"].keys())):
+ sys.stderr.write(yellow("x"))
+ metadata[baseurl]["modified"] = 1
+ myid = file_get_metadata(baseurl+"/"+x, conn, chunk_size)
+
+ if myid[0]:
+ metadata[baseurl]["data"][x] = make_metadata_dict(myid)
+ elif verbose:
+ sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n")
+ else:
+ sys.stderr.write(green("o"))
+ sys.stderr.write("\n")
+
+ try:
+ if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]:
+ metadata[baseurl]["timestamp"] = int(time.time())
+ metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
+ cPickle.dump(metadata,metadatafile)
+ metadatafile.close()
+ if makepickle:
+ metadatafile = open(makepickle, "w")
+ cPickle.dump(metadata[baseurl]["data"],metadatafile)
+ metadatafile.close()
+ except SystemExit, e:
+ raise
+ except Exception, e:
+ sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
+ sys.stderr.write("!!! "+str(e)+"\n")
+
+ if not keepconnection:
+ conn.close()
+
+ return metadata[baseurl]["data"]