summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Buchholz <rbu@gentoo.org>2008-03-20 20:55:44 +0000
committerRobert Buchholz <rbu@gentoo.org>2008-03-20 20:55:44 +0000
commite6cc6776a13fab29b4000268d259176080569fb7 (patch)
treeccd46a8f83c5b2f77192365a039f79390945b10c /distfiles-indexer.py
downloaddistindex-e6cc6776a13fab29b4000268d259176080569fb7.tar.gz
distindex-e6cc6776a13fab29b4000268d259176080569fb7.tar.bz2
distindex-e6cc6776a13fab29b4000268d259176080569fb7.zip
Initial version
git-svn-id: https://overlays.gentoo.org/svn/dev/rbu/distindex@27 130f8837-a733-0410-98ec-ba4ccdad31c5
Diffstat (limited to 'distfiles-indexer.py')
-rwxr-xr-xdistfiles-indexer.py187
1 files changed, 187 insertions, 0 deletions
diff --git a/distfiles-indexer.py b/distfiles-indexer.py
new file mode 100755
index 0000000..0aeaa36
--- /dev/null
+++ b/distfiles-indexer.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python2.4
+
+import tarfile
+import zipfile
+import os
+import sys
+import datetime
+
+try:
+ # FIXME: this does not work
+ import hashlib
+ md5_cons = hashlib.md5
+ sha1_cons = hashlib.sha1
+except:
+ import md5
+ import sha
+ md5_cons = md5.new
+ sha1_cons = sha.new
+
+
+def hash_file(fileobj, *hashobjects):
+ """ RAM efficient hashing implementation for stream-based file objects. """
+ data = fileobj.read(1024*1024)
+ while data:
+ for ho in hashobjects:
+ ho.update(data)
+ data = fileobj.read(1024*1024)
+
+ return (ho.hexdigest() for ho in hashobjects)
+
+
+class ArchiveInfo:
+ def __init__(self, filename, url, ebuild):
+ self.filename = filename
+ self.filesize = os.path.getsize(filename) # this can throw an exception
+ self.mtime = os.path.getmtime(filename)
+ self.basename = os.path.basename(filename)
+ self.ebuild = ebuild
+ self.url = url
+
+
+ def write_info(self, outfiledir):
+ outfilename = ""
+ try:
+ (fnum, outfilename) = self.make_outfile(outfiledir)
+ self.out_fd = open(outfilename, "w")
+ self.write_header()
+ self.write_content()
+ self.out_fd.close()
+ return outfilename
+ except:
+ if outfilename:
+ os.remove(outfilename)
+ raise
+
+ def write_header(self):
+ self.out_fd.write("File-dist-name: %s\n" % (self.basename))
+ self.out_fd.write("File-dist-size: %s\n" % (self.filesize))
+ self.out_fd.write("File-dist-present: True\n")
+ self.out_fd.write("File-dist-origin: %s\n" % (self.url))
+ #self.out_fd.write("File-dist-ebuild: %s\n" % (self.ebuild))
+ self.out_fd.write("File-dist-mtime: %s\n" %(self.mtime))
+
+ distfile = open(self.filename)
+ (md5, sha1) = hash_file(distfile, md5_cons(), sha1_cons())
+ self.out_fd.write("File-dist-md5: %s\n" % (md5))
+ self.out_fd.write("File-dist-sha1: %s\n" % (sha1))
+ distfile.close()
+
+ def make_outfile(self, tdir):
+ import tempfile
+ outfdname = tempfile.mkstemp(prefix=self.basename, dir=tdir)
+ return outfdname
+
+class TarInfo(ArchiveInfo):
+ def write_content(self):
+ tar = tarfile.TarFile.open(self.filename, 'r')
+
+ file_info = tar.next()
+ filecount = 0
+ while file_info != None:
+ if file_info.isfile():
+ file_stream = tar.extractfile(file_info)
+
+ if file_stream == None:
+ print "Filestream empty on %s" % (file_info.name)
+ pass
+ else:
+ self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.name))
+ self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.size))
+ self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_info.mtime))
+ (md5, sha1) = hash_file(file_stream, md5_cons(), sha1_cons())
+ self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5))
+ self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1))
+ filecount += 1
+ file_info = tar.next()
+ tar.close()
+
+class ZipInfo(ArchiveInfo):
+ def write_content(self):
+ zip = zipfile.ZipFile(self.filename, 'r')
+
+ filecount = 0
+ for name in zip.namelist():
+ file_info = zip.getinfo(name)
+ if os.path.basename(file_info.filename) == "":
+ # this is a directory
+ continue
+ file_content = zip.read(name) # Doesn't work stream based :-/
+ file_time = datetime.datetime(*file_info.date_time)
+ self.out_fd.write("File-%05d-name: %s\n" %(filecount, file_info.filename))
+ self.out_fd.write("File-%05d-size: %s\n" %(filecount, file_info.file_size))
+ self.out_fd.write("File-%05d-mtime: %s\n" %(filecount, file_time.strftime("%s")))
+
+ md5 = md5_cons(file_content).hexdigest()
+ sha1 = sha1_cons(file_content).hexdigest()
+ self.out_fd.write("File-%05d-md5: %s\n" %(filecount, md5))
+ self.out_fd.write("File-%05d-sha1: %s\n" %(filecount, sha1))
+
+ filecount += 1
+
+ zip.close()
+
+
+
+
+def main():
+ import getopt
+ try:
+ optlist, list = getopt.getopt(sys.argv[1:],
+ 'f:P:hu:')
+ except getopt.GetoptError:
+ usage(sys.argv[0])
+ sys.exit(2)
+
+ infilename = None
+ outdir = "."
+ url = None
+ for opt, arg in optlist:
+ if opt == '-h':
+ usage(sys.argv[0])
+ sys.exit(0)
+ if opt == '-f':
+ infilename = arg
+ if opt == '-P':
+ outdir = arg
+ if opt == '-u':
+ url = arg
+
+ if not infilename:
+ print "Please specify a filename."
+ else:
+ tar_finished = False
+ try:
+ t = TarInfo(infilename, url, None)
+ filename = t.write_info(outdir)
+ tar_finished = True
+ except:
+ pass
+ if tar_finished:
+ sys.exit(0)
+ try:
+ t = ZipInfo(infilename, url, None)
+ filename = t.write_info(outdir)
+ except:
+ print infilename, " could not be opened"
+
+def usage(programname):
+ """ Print usage information """
+ print "Usage: %s [-h] [-P <dir>] [-f <file>] [-u <url>]" % (programname)
+ print '''
+This script opens the file specified by -f, and writes the index to the directory specified by -P.
+
+Parameters:
+ -h Display this help
+ -f file Path to the file to index, must be present
+ -P path Directory to create the index file in (default: .)
+ -u url URL to record inside the index
+'''
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ print '\n ! Exiting.'
+