diff options
author | 2012-10-22 19:30:33 -0700 | |
---|---|---|
committer | 2012-10-22 19:30:33 -0700 | |
commit | 7e0f9e20c7b1e356a11502d3be88983388348873 (patch) | |
tree | 224bf7c5e8598965d5bf4eb2a9139e284aaa6fec /rewrite-git-blob.py | |
parent | Drop the prune; it's unnecessary (diff) | |
download | git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.gz git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.bz2 git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.zip |
Rework the blob rewriting of $Header.
Specifically, fix the replacement so it actually matches/replaces,
additionally fix the oversight where the code wasn't rewriting the
data statement to the new length.
Diffstat (limited to 'rewrite-git-blob.py')
-rwxr-xr-x | rewrite-git-blob.py | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/rewrite-git-blob.py b/rewrite-git-blob.py new file mode 100755 index 0000000..430e415 --- /dev/null +++ b/rewrite-git-blob.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +import contextlib +import collections +import functools +import itertools +import mmap +import multiprocessing +import operator +import os +import re +import subprocess +import sys + +@contextlib.contextmanager +def mmap_open(path): + handle = fd = None + try: + fd = os.open(path, os.O_RDONLY) + handle = mmap.mmap(fd, os.fstat(fd).st_size, mmap.MAP_SHARED, mmap.PROT_READ) + os.close(fd) + fd = None + yield handle + finally: + if fd: + os.close(fd) + if handle: + handle.close() + +def readline_iterate(handle): + line = handle.readline() + while line: + yield line + line = handle.readline() + +mangler = [] +mangler.append(functools.partial( + re.compile(r"^\(paludis (0.1.*)\)$", re.M|re.I).sub, + r"Package-Manager: paludis-\1/")) +# Special case not covered by the main portage mangler. +mangler.append(functools.partial( + re.compile('r^\(Portage (2\.1\.2[^\)]+)\)$', re.M|re.I).sub, + r'Package-Manager: portage-\1')) +mangler.append(functools.partial( + re.compile(r' *\((?:manifest +recommit|(?:un)?signed +manifest +commit)\) *$', re.M|re.I).sub, + r'')) + +def process_stream(source, output_dir, output): + header = os.path.normpath(os.path.abspath(output_dir)) + header = "$Header: %s" % output_dir + line = source.readline() + while line: + chunks = line.split() + if chunks[0:1] == ['data']: + # Process the commit message... + size = int(chunks[1]) + data = source.read(size) + assert len(data) == size, (line, data) + data = data.replace(header, "$Header: /var/cvsroot") + line = 'data %i\n%s' % (len(data), data) + output.write(line) + line = source.readline() + +def main(blob_file, output_dir, output): + # allocate the pool now, before we start getting memory abusive; this is + # used for thin-manifest conversion if active/enabled. + #clean_pool = multiprocessing.Pool() + + # Be careful here to just iterate over source; doing so allows this script + # to do basic processing as it goes (specifically while it's being fed from + # the mainline cvs2git parallelized repo creator). + with mmap_open(blob_file) as data: + process_stream(data, output_dir, sys.stdout) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1], sys.argv[2], sys.stdout)) |