aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Harring <ferringb@google.com>2012-10-22 19:30:33 -0700
committerBrian Harring <ferringb@google.com>2012-10-22 19:30:33 -0700
commit7e0f9e20c7b1e356a11502d3be88983388348873 (patch)
tree224bf7c5e8598965d5bf4eb2a9139e284aaa6fec /rewrite-git-blob.py
parentDrop the prune; it's unnecessary (diff)
downloadgit-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.gz
git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.bz2
git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.zip
Rework the blob rewriting of $Header.
Specifically, fix the replacement so it actually matches/replaces, additionally fix the oversight where the code wasn't rewriting the data statement to the new length.
Diffstat (limited to 'rewrite-git-blob.py')
-rwxr-xr-xrewrite-git-blob.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/rewrite-git-blob.py b/rewrite-git-blob.py
new file mode 100755
index 0000000..430e415
--- /dev/null
+++ b/rewrite-git-blob.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+import contextlib
+import collections
+import functools
+import itertools
+import mmap
+import multiprocessing
+import operator
+import os
+import re
+import subprocess
+import sys
+
+@contextlib.contextmanager
+def mmap_open(path):
+ handle = fd = None
+ try:
+ fd = os.open(path, os.O_RDONLY)
+ handle = mmap.mmap(fd, os.fstat(fd).st_size, mmap.MAP_SHARED, mmap.PROT_READ)
+ os.close(fd)
+ fd = None
+ yield handle
+ finally:
+ if fd:
+ os.close(fd)
+ if handle:
+ handle.close()
+
+def readline_iterate(handle):
+ line = handle.readline()
+ while line:
+ yield line
+ line = handle.readline()
+
+mangler = []
+mangler.append(functools.partial(
+ re.compile(r"^\(paludis (0.1.*)\)$", re.M|re.I).sub,
+ r"Package-Manager: paludis-\1/"))
+# Special case not covered by the main portage mangler.
+mangler.append(functools.partial(
+ re.compile('r^\(Portage (2\.1\.2[^\)]+)\)$', re.M|re.I).sub,
+ r'Package-Manager: portage-\1'))
+mangler.append(functools.partial(
+ re.compile(r' *\((?:manifest +recommit|(?:un)?signed +manifest +commit)\) *$', re.M|re.I).sub,
+ r''))
+
+def process_stream(source, output_dir, output):
+ header = os.path.normpath(os.path.abspath(output_dir))
+ header = "$Header: %s" % output_dir
+ line = source.readline()
+ while line:
+ chunks = line.split()
+ if chunks[0:1] == ['data']:
+ # Process the commit message...
+ size = int(chunks[1])
+ data = source.read(size)
+ assert len(data) == size, (line, data)
+ data = data.replace(header, "$Header: /var/cvsroot")
+ line = 'data %i\n%s' % (len(data), data)
+ output.write(line)
+ line = source.readline()
+
+def main(blob_file, output_dir, output):
+ # allocate the pool now, before we start getting memory abusive; this is
+ # used for thin-manifest conversion if active/enabled.
+ #clean_pool = multiprocessing.Pool()
+
+ # Be careful here to just iterate over source; doing so allows this script
+ # to do basic processing as it goes (specifically while it's being fed from
+ # the mainline cvs2git parallelized repo creator).
+ with mmap_open(blob_file) as data:
+ process_stream(data, output_dir, sys.stdout)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1], sys.argv[2], sys.stdout))