diff options
author | Robert Buchholz <rbu@gentoo.org> | 2008-03-20 20:55:44 +0000 |
---|---|---|
committer | Robert Buchholz <rbu@gentoo.org> | 2008-03-20 20:55:44 +0000 |
commit | e6cc6776a13fab29b4000268d259176080569fb7 (patch) | |
tree | ccd46a8f83c5b2f77192365a039f79390945b10c /distfiles-indexer.py | |
download | distindex-e6cc6776a13fab29b4000268d259176080569fb7.tar.gz distindex-e6cc6776a13fab29b4000268d259176080569fb7.tar.bz2 distindex-e6cc6776a13fab29b4000268d259176080569fb7.zip |
Initial version
git-svn-id: https://overlays.gentoo.org/svn/dev/rbu/distindex@27 130f8837-a733-0410-98ec-ba4ccdad31c5
Diffstat (limited to 'distfiles-indexer.py')
-rwxr-xr-x | distfiles-indexer.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/distfiles-indexer.py b/distfiles-indexer.py new file mode 100755 index 0000000..0aeaa36 --- /dev/null +++ b/distfiles-indexer.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python2.4 + +import tarfile +import zipfile +import os +import sys +import datetime + +try: + # FIXME: this does not work + import hashlib + md5_cons = hashlib.md5 + sha1_cons = hashlib.sha1 +except: + import md5 + import sha + md5_cons = md5.new + sha1_cons = sha.new + + +def hash_file(fileobj, *hashobjects): + """ RAM efficient hashing implementation for stream-based file objects. """ + data = fileobj.read(1024*1024) + while data: + for ho in hashobjects: + ho.update(data) + data = fileobj.read(1024*1024) + + return (ho.hexdigest() for ho in hashobjects) + + +class ArchiveInfo: + def __init__(self, filename, url, ebuild): + self.filename = filename + self.filesize = os.path.getsize(filename) # this can throw an exception + self.mtime = os.path.getmtime(filename) + self.basename = os.path.basename(filename) + self.ebuild = ebuild + self.url = url + + + def write_info(self, outfiledir): + outfilename = "" + try: + (fnum, outfilename) = self.make_outfile(outfiledir) + self.out_fd = open(outfilename, "w") + self.write_header() + self.write_content() + self.out_fd.close() + return outfilename + except: + if outfilename: + os.remove(outfilename) + raise + + def write_header(self): + self.out_fd.write("File-dist-name: %s\n" % (self.basename)) + self.out_fd.write("File-dist-size: %s\n" % (self.filesize)) + self.out_fd.write("File-dist-present: True\n") + self.out_fd.write("File-dist-origin: %s\n" % (self.url)) + #self.out_fd.write("File-dist-ebuild: %s\n" % (self.ebuild)) + self.out_fd.write("File-dist-mtime: %s\n" %(self.mtime)) + + distfile = open(self.filename) + (md5, sha1) = hash_file(distfile, md5_cons(), sha1_cons()) + self.out_fd.write("File-dist-md5: %s\n" % (md5)) + self.out_fd.write("File-dist-sha1: %s\n" % (sha1)) + distfile.close() + + def make_outfile(self, tdir): + import tempfile + outfdname = tempfile.mkstemp(prefix=self.basename, dir=tdir) + return outfdname + +class TarInfo(ArchiveInfo): + def write_content(self): + tar = tarfile.TarFile.open(self.filename, 'r') + + file_info = tar.next() + filecount = 0 + while file_info != None: + if file_info.isfile(): + file_stream = tar.extractfile(file_info) + + if file_stream == None: + print "Filestream empty on %s" % (file_info.name) + pass + else: + self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.name)) + self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.size)) + self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_info.mtime)) + (md5, sha1) = hash_file(file_stream, md5_cons(), sha1_cons()) + self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5)) + self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1)) + filecount += 1 + file_info = tar.next() + tar.close() + +class ZipInfo(ArchiveInfo): + def write_content(self): + zip = zipfile.ZipFile(self.filename, 'r') + + filecount = 0 + for name in zip.namelist(): + file_info = zip.getinfo(name) + if os.path.basename(file_info.filename) == "": + # this is a directory + continue + file_content = zip.read(name) # Doesn't work stream based :-/ + file_time = datetime.datetime(*file_info.date_time) + self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.filename)) + self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.file_size)) + self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_time.strftime("%s"))) + + md5 = md5_cons(file_content).hexdigest() + sha1 = sha1_cons(file_content).hexdigest() + self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5)) + self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1)) + + filecount += 1 + + zip.close() + + + + +def main(): + import getopt + try: + optlist, list = getopt.getopt(sys.argv[1:], + 'f:P:hu:') + except getopt.GetoptError: + usage(sys.argv[0]) + sys.exit(2) + + infilename = None + outdir = "." + url = None + for opt, arg in optlist: + if opt == '-h': + usage(sys.argv[0]) + sys.exit(0) + if opt == '-f': + infilename = arg + if opt == '-P': + outdir = arg + if opt == '-u': + url = arg + + if not infilename: + print "Please specify a filename." + else: + tar_finished = False + try: + t = TarInfo(infilename, url, None) + filename = t.write_info(outdir) + tar_finished = True + except: + pass + if tar_finished: + sys.exit(0) + try: + t = ZipInfo(infilename, url, None) + filename = t.write_info(outdir) + except: + print infilename, " could not be opened" + +def usage(programname): + """ Print usage information """ + print "Usage: %s [-h] [-P <dir>] [-f <file>] [-u <url>]" % (programname) + print ''' +This script opens the file specified by -f, and writes the index to the directory specified by -P. + +Parameters: + -h Display this help + -f file Path to the file to index, must be present + -P path Directory to create the index file in (default: .) + -u url URL to record inside the index +''' + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print '\n ! Exiting.' + |