aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Harring <ferringb@gmail.com>2012-10-13 17:49:44 -0700
committerBrian Harring <ferringb@google.com>2012-10-16 13:27:24 -0700
commit2919cf0d03b37050c6624d97547653d1fffa033d (patch)
tree76003db257539a345620e1fec369e92a3a69be4b
downloadgit-conversion-tools-2919cf0d03b37050c6624d97547653d1fffa033d.tar.gz
git-conversion-tools-2919cf0d03b37050c6624d97547653d1fffa033d.tar.bz2
git-conversion-tools-2919cf0d03b37050c6624d97547653d1fffa033d.zip
import of content;
note rcsparse has had my old http://cvs2svn.tigris.org/nonav/issues/showattachment.cgi/64/rcparse_redundant_work.patch patch applied.
-rw-r--r--.gitignore2
-rw-r--r--config588
-rw-r--r--cvs2svn_lib/__init__.py18
-rw-r--r--cvs2svn_lib/apple_single_filter.py292
-rw-r--r--cvs2svn_lib/artifact.py59
-rw-r--r--cvs2svn_lib/artifact_manager.py256
-rw-r--r--cvs2svn_lib/bzr_run_options.py175
-rw-r--r--cvs2svn_lib/changeset.py269
-rw-r--r--cvs2svn_lib/changeset_database.py70
-rw-r--r--cvs2svn_lib/changeset_graph.py456
-rw-r--r--cvs2svn_lib/changeset_graph_link.py149
-rw-r--r--cvs2svn_lib/changeset_graph_node.py50
-rw-r--r--cvs2svn_lib/check_dependencies_pass.py144
-rw-r--r--cvs2svn_lib/checkout_internal.py778
-rw-r--r--cvs2svn_lib/collect_data.py1431
-rw-r--r--cvs2svn_lib/common.py409
-rw-r--r--cvs2svn_lib/config.py221
-rw-r--r--cvs2svn_lib/context.py93
-rw-r--r--cvs2svn_lib/cvs_file.py287
-rw-r--r--cvs2svn_lib/cvs_file_database.py75
-rw-r--r--cvs2svn_lib/cvs_file_items.py1075
-rw-r--r--cvs2svn_lib/cvs_item.py901
-rw-r--r--cvs2svn_lib/cvs_item_database.py248
-rw-r--r--cvs2svn_lib/cvs_revision_manager.py85
-rw-r--r--cvs2svn_lib/database.py322
-rw-r--r--cvs2svn_lib/dumpfile_delegate.py510
-rw-r--r--cvs2svn_lib/fill_source.py192
-rw-r--r--cvs2svn_lib/fulltext_revision_recorder.py127
-rw-r--r--cvs2svn_lib/git_output_option.py658
-rw-r--r--cvs2svn_lib/git_revision_recorder.py114
-rw-r--r--cvs2svn_lib/git_run_options.py274
-rw-r--r--cvs2svn_lib/key_generator.py45
-rw-r--r--cvs2svn_lib/log.py174
-rw-r--r--cvs2svn_lib/main.py117
-rw-r--r--cvs2svn_lib/man_writer.py197
-rw-r--r--cvs2svn_lib/metadata.py26
-rw-r--r--cvs2svn_lib/metadata_database.py102
-rw-r--r--cvs2svn_lib/openings_closings.py236
-rw-r--r--cvs2svn_lib/output_option.py85
-rw-r--r--cvs2svn_lib/pass_manager.py215
-rw-r--r--cvs2svn_lib/passes.py1837
-rw-r--r--cvs2svn_lib/persistence_manager.py106
-rw-r--r--cvs2svn_lib/process.py116
-rw-r--r--cvs2svn_lib/project.py219
-rw-r--r--cvs2svn_lib/property_setters.py385
-rw-r--r--cvs2svn_lib/rcs_revision_manager.py51
-rw-r--r--cvs2svn_lib/rcs_stream.py149
-rw-r--r--cvs2svn_lib/record_table.py399
-rw-r--r--cvs2svn_lib/repository_delegate.py98
-rw-r--r--cvs2svn_lib/repository_mirror.py897
-rw-r--r--cvs2svn_lib/revision_manager.py189
-rw-r--r--cvs2svn_lib/run_options.py1035
-rw-r--r--cvs2svn_lib/serializer.py146
-rw-r--r--cvs2svn_lib/stats_keeper.py189
-rw-r--r--cvs2svn_lib/stdout_delegate.py107
-rw-r--r--cvs2svn_lib/svn_commit.py381
-rw-r--r--cvs2svn_lib/svn_commit_creator.py217
-rw-r--r--cvs2svn_lib/svn_commit_item.py50
-rw-r--r--cvs2svn_lib/svn_output_option.py753
-rw-r--r--cvs2svn_lib/svn_repository_delegate.py121
-rw-r--r--cvs2svn_lib/svn_revision_range.py171
-rw-r--r--cvs2svn_lib/svn_run_options.py543
-rw-r--r--cvs2svn_lib/symbol.py246
-rw-r--r--cvs2svn_lib/symbol_database.py68
-rw-r--r--cvs2svn_lib/symbol_statistics.py521
-rw-r--r--cvs2svn_lib/symbol_strategy.py685
-rw-r--r--cvs2svn_lib/symbol_transform.py236
-rw-r--r--cvs2svn_lib/time_range.py44
-rw-r--r--cvs2svn_lib/version.py27
-rw-r--r--cvs2svn_rcsparse/__init__.py26
-rw-r--r--cvs2svn_rcsparse/common.py324
-rw-r--r--cvs2svn_rcsparse/debug.py122
-rw-r--r--cvs2svn_rcsparse/default.py172
-rw-r--r--cvs2svn_rcsparse/parse_rcs_file.py73
-rw-r--r--cvs2svn_rcsparse/rcparse_redundant_work.patch99
-rw-r--r--cvs2svn_rcsparse/run-tests.py73
-rw-r--r--cvs2svn_rcsparse/texttools.py348
77 files changed, 22748 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8b5efc7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cvs-repo
+output
diff --git a/config b/config
new file mode 100644
index 0000000..94c17d7
--- /dev/null
+++ b/config
@@ -0,0 +1,588 @@
+# (Be in -*- mode: python; coding: utf-8 -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+# #####################
+# ## PLEASE READ ME! ##
+# #####################
+#
+# This is a template for an options file that can be used to configure
+# cvs2svn to convert to git rather than to Subversion. See
+# www/cvs2git.html and www/cvs2svn.html for general information, and
+# see the comments in this file for information about what options are
+# available and how they can be set.
+#
+# The program that is run to convert from CVS to git is called
+# cvs2git. Run it with the --options option, passing it this file
+# like this:
+#
+# cvs2git --options=cvs2git-example.options
+#
+# The output of cvs2git is a blob file and a dump file that can be
+# loaded into git using the "git fast-import" command. Please read
+# www/cvs2git.html for more information.
+#
+# Many options do not have defaults, so it is easier to copy this file
+# and modify what you need rather than creating a new options file
+# from scratch. This file is in Python syntax, but you don't need to
+# know Python to modify it. But if you *do* know Python, then you
+# will be happy to know that you can use arbitary Python constructs to
+# do fancy configuration tricks.
+#
+# But please be aware of the following:
+#
+# * In many places, leading whitespace is significant in Python (it is
+# used instead of curly braces to group statements together).
+# Therefore, if you don't know what you are doing, it is best to
+# leave the whitespace as it is.
+#
+# * In normal strings, Python treats a backslash ("\") as an escape
+# character. Therefore, if you want to specify a string that
+# contains a backslash, you need either to escape the backslash with
+# another backslash ("\\"), or use a "raw string", as in one if the
+# following equivalent examples:
+#
+# ctx.sort_executable = 'c:\\windows\\system32\\sort.exe'
+# ctx.sort_executable = r'c:\windows\system32\sort.exe'
+#
+# See http://docs.python.org/tutorial/introduction.html#strings for
+# more information.
+#
+# Two identifiers will have been defined before this file is executed,
+# and can be used freely within this file:
+#
+# ctx -- a Ctx object (see cvs2svn_lib/context.py), which holds
+# many configuration options
+#
+# run_options -- an instance of the GitRunOptions class (see
+# cvs2svn_lib/git_run_options.py), which holds some variables
+# governing how cvs2git is run
+
+
+# Import some modules that are used in setting the options:
+import re
+
+from cvs2svn_lib import config
+from cvs2svn_lib import changeset_database
+from cvs2svn_lib.common import CVSTextDecoder
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder
+from cvs2svn_lib.git_output_option import GitRevisionMarkWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.fulltext_revision_recorder \
+ import SimpleFulltextRevisionRecorderAdapter
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.checkout_internal import InternalRevisionRecorder
+from cvs2svn_lib.checkout_internal import InternalRevisionExcluder
+from cvs2svn_lib.checkout_internal import InternalRevisionReader
+from cvs2svn_lib.symbol_strategy import AllBranchRule
+from cvs2svn_lib.symbol_strategy import AllTagRule
+from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule
+from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule
+from cvs2svn_lib.symbol_strategy import ExcludeVendorBranchRule
+from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule
+from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule
+from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule
+from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule
+from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform
+from cvs2svn_lib.symbol_transform import RegexpSymbolTransform
+from cvs2svn_lib.symbol_transform import IgnoreSymbolTransform
+from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform
+from cvs2svn_lib.property_setters import AutoPropsPropertySetter
+from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter
+from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter
+from cvs2svn_lib.property_setters import CVSRevisionNumberSetter
+from cvs2svn_lib.property_setters import DefaultEOLStyleSetter
+from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter
+from cvs2svn_lib.property_setters import ExecutablePropertySetter
+from cvs2svn_lib.property_setters import KeywordsPropertySetter
+from cvs2svn_lib.property_setters import MimeMapper
+from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter
+
+# To choose the level of logging output, uncomment one of the
+# following lines:
+#Log().log_level = Log.WARN
+#Log().log_level = Log.QUIET
+#Log().log_level = Log.NORMAL
+#Log().log_level = Log.VERBOSE
+Log().log_level = Log.DEBUG
+
+
+# During CollectRevsPass, cvs2git records the contents of file
+# revisions into a "blob" file in git-fast-import format. This option
+# configures that process:
+ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter(
+ # The following option specifies how the revision contents of the RCS
+ # files should be read.
+ #
+ # RCSRevisionReader uses RCS's "co" program to extract the revision
+ # contents of the RCS files during CollectRevsPass. The constructor
+ # argument specifies how to invoke the "co" executable.
+ #
+ # CVSRevisionReader uses the "cvs" program to extract the revision
+ # contents out of the RCS files during OutputPass. This option is
+ # considerably slower than RCSRevisionReader because "cvs" is
+ # considerably slower than "co". However, it works in some situations
+ # where RCSRevisionReader fails; see the HTML documentation of the
+ # "--use-cvs" option for details. The constructor argument specifies
+ # how to invoke the "co" executable.
+ #
+ # Uncomment one of the two following lines:
+ RCSRevisionReader(co_executable=r'co'),
+ #CVSRevisionReader(cvs_executable=r'cvs'),
+
+ # The file in which to write the git-fast-import stream that
+ # contains the file revision contents:
+ GitRevisionRecorder('cvs2svn-tmp/git-blob.dat'),
+ )
+
+# cvs2git does not need to keep track of what revisions will be
+# excluded, so leave this option unchanged:
+ctx.revision_excluder = NullRevisionExcluder()
+
+# cvs2git doesn't need a revision reader because OutputPass only
+# refers to blobs that were output during CollectRevsPass, so leave
+# this option set to None.
+ctx.revision_reader = None
+
+# Set the name (and optionally the path) of some other executables
+# required by cvs2svn:
+ctx.sort_executable = r'sort'
+
+# Change the following line to True if the conversion should only
+# include the trunk of the repository (i.e., all branches and tags
+# should be omitted from the conversion):
+ctx.trunk_only = False
+
+# How to convert CVS author names, log messages, and filenames to
+# Unicode. The first argument to CVSTextDecoder is a list of encoders
+# that are tried in order in 'strict' mode until one of them succeeds.
+# If none of those succeeds, then fallback_encoder (if it is
+# specified) is used in lossy 'replace' mode. Setting a fallback
+# encoder ensures that the encoder always succeeds, but it can cause
+# information loss.
+ctx.cvs_author_decoder = CVSTextDecoder(
+ [
+ #'latin1',
+ #'utf8',
+ 'ascii',
+ ],
+ fallback_encoding='latin1'
+ )
+ctx.cvs_log_decoder = CVSTextDecoder(
+ [
+ #'latin1',
+ #'utf8',
+ 'ascii',
+ ],
+ fallback_encoding='latin1'
+ )
+# You might want to be especially strict when converting filenames to
+# Unicode (e.g., maybe not specify a fallback_encoding).
+ctx.cvs_filename_decoder = CVSTextDecoder(
+ [
+ #'latin1',
+ #'utf8',
+ 'ascii',
+ ],
+ #fallback_encoding='ascii'
+ )
+
+# Template for the commit message to be used for initial project
+# commits.
+ctx.initial_project_commit_message = (
+ 'Standard project directories initialized by cvs2svn.'
+ )
+
+# Template for the commit message to be used for post commits, in
+# which modifications to a vendor branch are copied back to trunk.
+# This message can use '%(revnum)d' to include the SVN revision number
+# of the revision that included the change to the vendor branch
+# (admittedly rather pointless in a cvs2git conversion).
+ctx.post_commit_message = (
+ 'This commit was generated by cvs2svn to track changes on a CVS '
+ 'vendor branch.'
+ )
+
+# Template for the commit message to be used for commits in which
+# symbols are created. This message can use '%(symbol_type)d' to
+# include the type of the symbol ('branch' or 'tag') or
+# '%(symbol_name)' to include the name of the symbol.
+ctx.symbol_commit_message = (
+ "This commit was manufactured by cvs2svn to create %(symbol_type)s "
+ "'%(symbol_name)s'."
+ )
+
+# Some CVS clients for MacOS store resource fork data into CVS along
+# with the file contents itself by wrapping it all up in a container
+# format called "AppleSingle". Subversion currently does not support
+# MacOS resource forks. Nevertheless, sometimes the resource fork
+# information is not necessary and can be discarded. Set the
+# following option to True if you would like cvs2svn to identify files
+# whose contents are encoded in AppleSingle format, and discard all
+# but the data fork for such files before committing them to
+# Subversion. (Please note that AppleSingle contents are identified
+# by the AppleSingle magic number as the first four bytes of the file.
+# This check is not failproof, so only set this option if you think
+# you need it.)
+ctx.decode_apple_single = False
+
+# This option can be set to the name of a filename to which are stored
+# statistics and conversion decisions about the CVS symbols.
+ctx.symbol_info_filename = None
+#ctx.symbol_info_filename = 'symbol-info.txt'
+
+# cvs2svn uses "symbol strategy rules" to help decide how to handle
+# CVS symbols. The rules in a project's symbol_strategy_rules are
+# applied in order, and each rule is allowed to modify the symbol.
+# The result (after each of the rules has been applied) is used for
+# the conversion.
+#
+# 1. A CVS symbol might be used as a tag in one file and as a branch
+# in another file. cvs2svn has to decide whether to convert such a
+# symbol as a tag or as a branch. cvs2svn uses a series of
+# heuristic rules to decide how to convert a symbol. The user can
+# override the default rules for specific symbols or symbols
+# matching regular expressions.
+#
+# 2. cvs2svn is also capable of excluding symbols from the conversion
+# (provided no other symbols depend on them.
+#
+# 3. CVS does not record unambiguously the line of development from
+# which a symbol sprouted. cvs2svn uses a heuristic to choose a
+# symbol's "preferred parents".
+#
+# The standard branch/tag/exclude StrategyRules do not change a symbol
+# that has already been processed by an earlier rule, so in effect the
+# first matching rule is the one that is used.
+
+global_symbol_strategy_rules = [
+ # It is possible to specify manually exactly how symbols should be
+ # converted and what line of development should be used as the
+ # preferred parent. To do so, create a file containing the symbol
+ # hints and enable the following option.
+ #
+ # The format of the hints file is described in the documentation
+ # for the --symbol-hints command-line option. The file output by
+ # the --write-symbol-info (i.e., ctx.symbol_info_filename) option
+ # is in the same format. The simplest way to use this option is
+ # to run the conversion through CollateSymbolsPass with
+ # --write-symbol-info option, copy the symbol info and edit it to
+ # create a hints file, then re-start the conversion at
+ # CollateSymbolsPass with this option enabled.
+ #SymbolHintsFileRule('symbol-hints.txt'),
+
+ # To force all symbols matching a regular expression to be
+ # converted as branches, add rules like the following:
+ #ForceBranchRegexpStrategyRule(r'branch.*'),
+
+ # To force all symbols matching a regular expression to be
+ # converted as tags, add rules like the following:
+ #ForceTagRegexpStrategyRule(r'tag.*'),
+
+ # To force all symbols matching a regular expression to be
+ # excluded from the conversion, add rules like the following:
+ #ExcludeRegexpStrategyRule(r'unknown-.*'),
+
+ # Sometimes people use "cvs import" to get their own source code
+ # into CVS. This practice creates a vendor branch 1.1.1 and
+ # imports the code onto the vendor branch as 1.1.1.1, then copies
+ # the same content to the trunk as version 1.1. Normally, such
+ # vendor branches are useless and they complicate the SVN history
+ # unnecessarily. The following rule excludes any branches that
+ # only existed as a vendor branch with a single import (leaving
+ # only the 1.1 revision). If you want to retain such branches,
+ # comment out the following line. (Please note that this rule
+ # does not exclude vendor *tags*, as they are not so easy to
+ # identify.)
+ ExcludeTrivialImportBranchRule(),
+
+ # To exclude all vendor branches (branches that had "cvs import"s
+ # on them bug no other kinds of commits), uncomment the following
+ # line:
+ #ExcludeVendorBranchRule(),
+
+ # Usually you want this rule, to convert unambiguous symbols
+ # (symbols that were only ever used as tags or only ever used as
+ # branches in CVS) the same way they were used in CVS:
+ UnambiguousUsageRule(),
+
+ # If there was ever a commit on a symbol, then it cannot be
+ # converted as a tag. This rule causes all such symbols to be
+ # converted as branches. If you would like to resolve such
+ # ambiguities manually, comment out the following line:
+ BranchIfCommitsRule(),
+
+ # Last in the list can be a catch-all rule that is used for
+ # symbols that were not matched by any of the more specific rules
+ # above. (Assuming that BranchIfCommitsRule() was included above,
+ # then the symbols that are still indeterminate at this point can
+ # sensibly be converted as branches or tags.) Include at most one
+ # of these lines. If none of these catch-all rules are included,
+ # then the presence of any ambiguous symbols (that haven't been
+ # disambiguated above) is an error:
+
+ # Convert ambiguous symbols based on whether they were used more
+ # often as branches or as tags:
+ HeuristicStrategyRule(),
+ # Convert all ambiguous symbols as branches:
+ #AllBranchRule(),
+ # Convert all ambiguous symbols as tags:
+ #AllTagRule(),
+
+ # The last rule is here to choose the preferred parent of branches
+ # and tags, that is, the line of development from which the symbol
+ # sprouts.
+ HeuristicPreferredParentRule(),
+ ]
+
+# Specify a username to be used for commits for which CVS doesn't
+# record the original author (for example, the creation of a branch).
+# This should be a simple (unix-style) username, but it can be
+# translated into a git-style name by the author_transforms map.
+ctx.username = 'cvs2svn'
+
+# ctx.svn_property_setters contains a list of rules used to set the
+# svn properties on files in the converted archive. For each file,
+# the rules are tried one by one. Any rule can add or suppress one or
+# more svn properties. Typically the rules will not overwrite
+# properties set by a previous rule (though they are free to do so).
+#
+# Obviously, SVN properties per se are not interesting for a cvs2git
+# conversion, but some of these properties have side-effects that do
+# affect the git output. FIXME: Document this in more detail.
+ctx.svn_property_setters.extend([
+ # To read auto-props rules from a file, uncomment the following line
+ # and specify a filename. The boolean argument specifies whether
+ # case should be ignored when matching filenames to the filename
+ # patterns found in the auto-props file:
+ #AutoPropsPropertySetter(
+ # r'/home/username/.subversion/config',
+ # ignore_case=True,
+ # ),
+
+ # To read mime types from a file, uncomment the following line and
+ # specify a filename:
+ #MimeMapper(r'/etc/mime.types'),
+
+ # Omit the svn:eol-style property from any files that are listed
+ # as binary (i.e., mode '-kb') in CVS:
+ CVSBinaryFileEOLStyleSetter(),
+
+ # If the file is binary and its svn:mime-type property is not yet
+ # set, set svn:mime-type to 'application/octet-stream'.
+ CVSBinaryFileDefaultMimeTypeSetter(),
+
+ # To try to determine the eol-style from the mime type, uncomment
+ # the following line:
+ #EOLStyleFromMimeTypeSetter(),
+
+ # Choose one of the following lines to set the default
+ # svn:eol-style if none of the above rules applied. The argument
+ # is the svn:eol-style that should be applied, or None if no
+ # svn:eol-style should be set (i.e., the file should be treated as
+ # binary).
+ #
+ # The default is to treat all files as binary unless one of the
+ # previous rules has determined otherwise, because this is the
+ # safest approach. However, if you have been diligent about
+ # marking binary files with -kb in CVS and/or you have used the
+ # above rules to definitely mark binary files as binary, then you
+ # might prefer to use 'native' as the default, as it is usually
+ # the most convenient setting for text files. Other possible
+ # options: 'CRLF', 'CR', 'LF'.
+ DefaultEOLStyleSetter(None),
+ #DefaultEOLStyleSetter('native'),
+
+ # Prevent svn:keywords from being set on files that have
+ # svn:eol-style unset.
+ SVNBinaryFileKeywordsPropertySetter(),
+
+ # If svn:keywords has not been set yet, set it based on the file's
+ # CVS mode:
+ KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE),
+
+ # Set the svn:executable flag on any files that are marked in CVS as
+ # being executable:
+ ExecutablePropertySetter(),
+
+ ])
+
+# The directory to use for temporary files:
+ctx.tmpdir = r'cvs2svn-tmp'
+
+# To skip the cleanup of temporary files, uncomment the following
+# option:
+#ctx.skip_cleanup = True
+
+
+# In CVS, it is perfectly possible to make a single commit that
+# affects more than one project or more than one branch of a single
+# project. Subversion also allows such commits. Therefore, by
+# default, when cvs2svn sees what looks like a cross-project or
+# cross-branch CVS commit, it converts it into a
+# cross-project/cross-branch Subversion commit.
+#
+# However, other tools and SCMs have trouble representing
+# cross-project or cross-branch commits. (For example, Trac's Revtree
+# plugin, http://www.trac-hacks.org/wiki/RevtreePlugin is confused by
+# such commits.) Therefore, we provide the following two options to
+# allow cross-project/cross-branch commits to be suppressed.
+
+# cvs2git only supports single-project conversions (multiple-project
+# conversions wouldn't really make sense for git anyway). So this
+# option must be set to False:
+ctx.cross_project_commits = False
+
+# git itself doesn't allow commits that affect more than one branch,
+# so this option must be set to False:
+ctx.cross_branch_commits = False
+
+# cvs2git does not yet handle translating .cvsignore files into
+# .gitignore files, so by default, the .cvsignore files are included
+# in the conversion output. If you would like to omit the .cvsignore
+# files from the output, set this option to False:
+ctx.keep_cvsignore = True
+
+# By default, it is a fatal error for a CVS ",v" file to appear both
+# inside and outside of an "Attic" subdirectory (this should never
+# happen, but frequently occurs due to botched repository
+# administration). If you would like to retain both versions of such
+# files, change the following option to True, and the attic version of
+# the file will be written to a subdirectory called "Attic" in the
+# output repository:
+ctx.retain_conflicting_attic_files = False
+
+# CVS uses unix login names as author names whereas git requires
+# author names to be of the form "foo <bar>". The default is to set
+# the git author to "cvsauthor <cvsauthor>". author_transforms can be
+# used to map cvsauthor names (e.g., "jrandom") to a true name and
+# email address (e.g., "J. Random <jrandom@example.com>" for the
+# example shown). All values should be either Unicode strings (i.e.,
+# with "u" as a prefix) or 8-bit strings in the utf-8 encoding.
+# Please substitute your own project's usernames here to use with the
+# author_transforms option of GitOutputOption below.
+author_transforms={
+ 'jrandom' : ('J. Random', 'jrandom@example.com'),
+ 'mhagger' : ('Michael Haggerty', 'mhagger@alum.mit.edu'),
+ 'brane' : (u'Branko ÄŒibej', 'brane@xbc.nu'),
+ 'ringstrom' : ('Tobias Ringström', 'tobias@ringstrom.mine.nu'),
+ 'dionisos' : (u'Erik Hülsmann', 'e.huelsmann@gmx.net'),
+
+ # This one will be used for commits for which CVS doesn't record
+ # the original author, as explained above.
+ 'cvs2svn' : ('cvs2svn', 'admin@example.com'),
+ }
+
+# This is the main option that causes cvs2svn to output to a
+# "fastimport"-format dumpfile rather than to Subversion:
+ctx.output_option = GitOutputOption(
+ # The file in which to write the git-fast-import stream that
+ # contains the changesets and branch/tag information:
+ 'cvs2svn-tmp/git-dump.dat',
+
+ # The blobs will be written via the revision recorder, so in
+ # OutputPass we only have to emit references to the blob marks:
+ GitRevisionMarkWriter(),
+
+ # This option can be set to an integer to limit the number of
+ # revisions that are merged with the main parent in any commit.
+ # For git output, this can be set to None (unlimited), though due
+ # to the limitations of other tools you might want to set it to a
+ # smaller number (e.g., 16). For Mercurial output, this should be
+ # set to 1.
+ max_merges=None,
+ #max_merges=1,
+
+ # Optional map from CVS author names to git author names:
+ author_transforms=author_transforms,
+ )
+
+# Change this option to True to turn on profiling of cvs2svn (for
+# debugging purposes):
+run_options.profiling = False
+
+
+# Should CVSItem -> Changeset database files be memory mapped? In
+# some tests, using memory mapping speeded up the overall conversion
+# by about 5%. But this option can cause the conversion to fail with
+# an out of memory error if the conversion computer runs out of
+# virtual address space (e.g., when running a very large conversion on
+# a 32-bit operating system). Therefore it is disabled by default.
+# Uncomment the following line to allow these database files to be
+# memory mapped.
+changeset_database.use_mmap_for_cvs_item_to_changeset_table = True
+
+# Now set the project to be converted to git. cvs2git only supports
+# single-project conversions, so this method must only be called
+# once:
+run_options.set_project(
+ # The filesystem path to the part of the CVS repository (*not* a
+ # CVS working copy) that should be converted. This may be a
+ # subdirectory (i.e., a module) within a larger CVS repository.
+ r'cvs-repo',
+
+ # A list of symbol transformations that can be used to rename
+ # symbols in this project.
+ symbol_transforms=[
+ # Use IgnoreSymbolTransforms like the following to completely
+ # ignore symbols matching a regular expression when parsing
+ # the CVS repository, for example to avoid warnings about
+ # branches with two names and to choose the preferred name.
+ # It is *not* recommended to use this instead of
+ # ExcludeRegexpStrategyRule; though more efficient,
+ # IgnoreSymbolTransforms are less flexible and don't exclude
+ # branches correctly. The argument is a Python-style regular
+ # expression that has to match the *whole* CVS symbol name:
+ #IgnoreSymbolTransform(r'nightly-build-tag-.*')
+
+ # RegexpSymbolTransforms transform symbols textually using a
+ # regular expression. The first argument is a Python regular
+ # expression pattern and the second is a replacement pattern.
+ # The pattern is matched against each symbol name. If it
+ # matches the whole symbol name, then the symbol name is
+ # replaced with the corresponding replacement text. The
+ # replacement can include substitution patterns (e.g., r'\1'
+ # or r'\g<name>'). Typically you will want to use raw strings
+ # (strings with a preceding 'r', like shown in the examples)
+ # for the regexp and its replacement to avoid backslash
+ # substitution within those strings.
+ #RegexpSymbolTransform(r'release-(\d+)_(\d+)',
+ # r'release-\1.\2'),
+ #RegexpSymbolTransform(r'release-(\d+)_(\d+)_(\d+)',
+ # r'release-\1.\2.\3'),
+
+ # Simple 1:1 character replacements can also be done. The
+ # following transform, which converts backslashes into forward
+ # slashes, should usually be included:
+ ReplaceSubstringsSymbolTransform('\\','/'),
+
+ # This last rule eliminates leading, trailing, and repeated
+ # slashes within the output symbol names:
+ NormalizePathsSymbolTransform(),
+ ],
+
+ # See the definition of global_symbol_strategy_rules above for a
+ # description of this option:
+ symbol_strategy_rules=global_symbol_strategy_rules,
+ )
+
diff --git a/cvs2svn_lib/__init__.py b/cvs2svn_lib/__init__.py
new file mode 100644
index 0000000..838d4c6
--- /dev/null
+++ b/cvs2svn_lib/__init__.py
@@ -0,0 +1,18 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This package contains modules that support cvs2svn."""
+
diff --git a/cvs2svn_lib/apple_single_filter.py b/cvs2svn_lib/apple_single_filter.py
new file mode 100644
index 0000000..95fa9cb
--- /dev/null
+++ b/cvs2svn_lib/apple_single_filter.py
@@ -0,0 +1,292 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""A stream filter for extracting the data fork from AppleSingle data.
+
+Some Macintosh CVS clients store resource fork data along with the
+contents of the file (called the data fork) by encoding both in an
+'AppleSingle' data stream before storing them to CVS. This file
+contains a stream filter for extracting the data fork from such data
+streams. (Any other forks are discarded.)
+
+See the following for some random information about this format and
+how it is used by Macintosh CVS clients:
+
+ http://users.phg-online.de/tk/netatalk/doc/Apple/v1/
+ http://rfc.net/rfc1740.html
+ http://ximbiot.com/cvs/cvshome/cyclic/cvs/dev-mac.html
+ http://www.maccvs.org/faq.html#resfiles
+ http://www.heilancoo.net/MacCVSClient/MacCVSClientDoc/storage-formats.html
+
+"""
+
+
+import struct
+from cStringIO import StringIO
+
+
+class AppleSingleFormatError(IOError):
+ """The stream was not in correct AppleSingle format."""
+
+ pass
+
+
+class AppleSingleIncorrectMagicError(AppleSingleFormatError):
+ """The file didn't start with the correct magic number."""
+
+ def __init__(self, data_read, eof):
+ AppleSingleFormatError.__init__(self)
+ self.data_read = data_read
+ self.eof = eof
+
+
+class AppleSingleEOFError(AppleSingleFormatError):
+ """EOF was reached where AppleSingle doesn't allow it."""
+
+ pass
+
+
+class AppleSingleFilter(object):
+ """A stream that reads the data fork from an AppleSingle stream.
+
+ If the constructor discovers that the file is not a legitimate
+ AppleSingle stream, then it raises an AppleSingleFormatError. In
+ the special case that the magic number is incorrect, it raises
+ AppleSingleIncorrectMagicError with data_read set to the data that
+ have been read so far from the input stream. (This allows the
+ caller the option to fallback to treating the input stream as a
+ normal binary data stream.)"""
+
+ # The header is:
+ #
+ # Magic number 4 bytes
+ # Version number 4 bytes
+ # File system or filler 16 bytes
+ # Number of entries 2 bytes
+ magic_struct = '>i'
+ magic_len = struct.calcsize(magic_struct)
+
+ # The part of the header after the magic number:
+ rest_of_header_struct = '>i16sH'
+ rest_of_header_len = struct.calcsize(rest_of_header_struct)
+
+ # Each entry is:
+ #
+ # Entry ID 4 bytes
+ # Offset 4 bytes
+ # Length 4 bytes
+ entry_struct = '>iii'
+ entry_len = struct.calcsize(entry_struct)
+
+ apple_single_magic = 0x00051600
+ apple_single_version_1 = 0x00010000
+ apple_single_version_2 = 0x00020000
+ apple_single_filler = '\0' * 16
+
+ apple_single_data_fork_entry_id = 1
+
+ def __init__(self, stream):
+ self.stream = stream
+
+ # Check for the AppleSingle magic number:
+ s = self._read_exactly(self.magic_len)
+ if len(s) < self.magic_len:
+ raise AppleSingleIncorrectMagicError(s, True)
+
+ (magic,) = struct.unpack(self.magic_struct, s)
+ if magic != self.apple_single_magic:
+ raise AppleSingleIncorrectMagicError(s, False)
+
+ # Read the rest of the header:
+ s = self._read_exactly(self.rest_of_header_len)
+ if len(s) < self.rest_of_header_len:
+ raise AppleSingleEOFError('AppleSingle header incomplete')
+
+ (version, filler, num_entries) = \
+ struct.unpack(self.rest_of_header_struct, s)
+
+ if version == self.apple_single_version_1:
+ self._prepare_apple_single_v1_file(num_entries)
+ elif version == self.apple_single_version_2:
+ if filler != self.apple_single_filler:
+ raise AppleSingleFormatError('Incorrect filler')
+ self._prepare_apple_single_v2_file(num_entries)
+ else:
+ raise AppleSingleFormatError('Unknown AppleSingle version')
+
+ def _read_exactly(self, size):
+ """Read and return exactly SIZE characters from the stream.
+
+ This method is to deal with the fact that stream.read(size) is
+ allowed to return less than size characters. If EOF is reached
+ before SIZE characters have been read, return the characters that
+ have been read so far."""
+
+ retval = []
+ length_remaining = size
+ while length_remaining > 0:
+ s = self.stream.read(length_remaining)
+ if not s:
+ break
+ retval.append(s)
+ length_remaining -= len(s)
+
+ return ''.join(retval)
+
+ def _prepare_apple_single_file(self, num_entries):
+ entries = self._read_exactly(num_entries * self.entry_len)
+ if len(entries) < num_entries * self.entry_len:
+ raise AppleSingleEOFError('Incomplete entries list')
+
+ for i in range(num_entries):
+ entry = entries[i * self.entry_len : (i + 1) * self.entry_len]
+ (entry_id, offset, length) = struct.unpack(self.entry_struct, entry)
+ if entry_id == self.apple_single_data_fork_entry_id:
+ break
+ else:
+ raise AppleSingleFormatError('No data fork found')
+
+ # The data fork is located at [offset : offset + length]. Read up
+ # to the start of the data:
+ n = offset - self.magic_len - self.rest_of_header_len - len(entries)
+ if n < 0:
+ raise AppleSingleFormatError('Invalid offset to AppleSingle data fork')
+
+ max_chunk_size = 65536
+ while n > 0:
+ s = self.stream.read(min(n, max_chunk_size))
+ if not s:
+ raise AppleSingleEOFError(
+ 'Offset to AppleSingle data fork past end of file'
+ )
+ n -= len(s)
+
+ self.length_remaining = length
+
+ def _prepare_apple_single_v1_file(self, num_entries):
+ self._prepare_apple_single_file(num_entries)
+
+ def _prepare_apple_single_v2_file(self, num_entries):
+ self._prepare_apple_single_file(num_entries)
+
+ def read(self, size=-1):
+ if size == 0 or self.length_remaining == 0:
+ return ''
+ elif size < 0:
+ s = self._read_exactly(self.length_remaining)
+ if len(s) < self.length_remaining:
+ raise AppleSingleEOFError('AppleSingle data fork truncated')
+ self.length_remaining = 0
+ return s
+ else:
+ # The length of this read is allowed to be shorter than the
+ # requested size:
+ s = self.stream.read(min(size, self.length_remaining))
+ if not s:
+ raise AppleSingleEOFError()
+ self.length_remaining -= len(s)
+ return s
+
+ def close(self):
+ self.stream.close()
+ self.stream = None
+
+
+class CompoundStream(object):
+ """A stream that reads from a series of streams, one after the other."""
+
+ def __init__(self, *streams):
+ self.streams = list(streams)
+ self.stream_index = 0
+
+ def read(self, size=-1):
+ if size < 0:
+ retval = []
+ while self.stream_index < len(self.streams):
+ retval.append(self.streams[self.stream_index].read())
+ self.stream_index += 1
+ return ''.join(retval)
+ else:
+ while self.stream_index < len(self.streams):
+ s = self.streams[self.stream_index].read(size)
+ if s:
+ # This may not be the full size requested, but that is OK:
+ return s
+ else:
+ # That stream was empty; proceed to the next stream:
+ self.stream_index += 1
+
+ # No streams are left:
+ return ''
+
+ def close(self):
+ for stream in self.streams:
+ stream.close()
+ self.streams = None
+
+
+def get_maybe_apple_single_stream(stream):
+ """Treat STREAM as AppleSingle if possible; otherwise treat it literally.
+
+ If STREAM is in AppleSingle format, then return a stream that will
+ output the data fork of the original stream. Otherwise, return a
+ stream that will output the original file contents literally.
+
+ Be careful not to read from STREAM after it has already hit EOF."""
+
+ try:
+ return AppleSingleFilter(stream)
+ except AppleSingleIncorrectMagicError, e:
+ # This is OK; the file is not AppleSingle, so we read it normally:
+ string_io = StringIO(e.data_read)
+ if e.eof:
+ # The original stream already reached EOF, so the part already
+ # read contains the complete file contents:
+ return string_io
+ else:
+ # The stream needs to output the part already read followed by
+ # whatever hasn't been read of the original stream:
+ return CompoundStream(string_io, stream)
+
+
+if __name__ == '__main__':
+ # For fun and testing, allow use of this file as a pipe if it is
+ # invoked as a script. Specifically, if stdin is in AppleSingle
+ # format, then output only its data fork; otherwise, output it
+ # unchanged.
+ #
+ # This might not work on systems where sys.stdin is opened in text
+ # mode.
+ #
+ # Remember to set PYTHONPATH to point to the main cvs2svn directory.
+
+ import sys
+
+ #CHUNK_SIZE = -1
+ CHUNK_SIZE = 100
+
+ f = get_maybe_apple_single_stream(sys.stdin)
+
+ if CHUNK_SIZE < 0:
+ sys.stdout.write(f.read())
+ else:
+ while True:
+ s = f.read(CHUNK_SIZE)
+ if not s:
+ break
+ sys.stdout.write(s)
+
+
diff --git a/cvs2svn_lib/artifact.py b/cvs2svn_lib/artifact.py
new file mode 100644
index 0000000..99d6945
--- /dev/null
+++ b/cvs2svn_lib/artifact.py
@@ -0,0 +1,59 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines Artifact types to be used with an ArtifactManager."""
+
+
+import os
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+
+
+class Artifact(object):
+ """An object that is created, used across passes, then cleaned up."""
+
+ def __init__(self):
+ # The set of passes that need this artifact. This field is
+ # maintained by ArtifactManager.
+ self._passes_needed = set()
+
+ def cleanup(self):
+ """This artifact is no longer needed; clean it up."""
+
+ pass
+
+
+class TempFile(Artifact):
+ """A temporary file that can be used across cvs2svn passes."""
+
+ def __init__(self, basename):
+ Artifact.__init__(self)
+ self.basename = basename
+
+ def _get_filename(self):
+ return Ctx().get_temp_filename(self.basename)
+
+ filename = property(_get_filename)
+
+ def cleanup(self):
+ Log().verbose("Deleting", self.filename)
+ os.unlink(self.filename)
+
+ def __str__(self):
+ return 'Temporary file %r' % (self.filename,)
+
+
diff --git a/cvs2svn_lib/artifact_manager.py b/cvs2svn_lib/artifact_manager.py
new file mode 100644
index 0000000..08f0ec7
--- /dev/null
+++ b/cvs2svn_lib/artifact_manager.py
@@ -0,0 +1,256 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages the artifacts produced by conversion passes."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact import TempFile
+
+
+class ArtifactNotActiveError(Exception):
+ """An artifact was requested when no passes that have registered
+ that they need it are active."""
+
+ def __init__(self, artifact_name):
+ Exception.__init__(
+ self, 'Artifact %s is not currently active' % artifact_name)
+
+
+class ArtifactManager:
+ """Manage artifacts that are created by one pass but needed by others.
+
+ This class is responsible for cleaning up artifacts once they are no
+ longer needed. The trick is that cvs2svn can be run pass by pass,
+ so not all passes might be executed during a specific program run.
+
+ To use this class:
+
+ - Call artifact_manager.set_artifact(name, artifact) once for each
+ known artifact.
+
+ - Call artifact_manager.creates(which_pass, artifact) to indicate
+ that WHICH_PASS is the pass that creates ARTIFACT.
+
+ - Call artifact_manager.uses(which_pass, artifact) to indicate that
+ WHICH_PASS needs to use ARTIFACT.
+
+ There are also helper methods register_temp_file(),
+ register_artifact_needed(), and register_temp_file_needed() which
+ combine some useful operations.
+
+ Then, in pass order:
+
+ - Call pass_skipped() for any passes that were already executed
+ during a previous cvs2svn run.
+
+ - Call pass_started() when a pass is about to start execution.
+
+ - If a pass that has been started will be continued during the next
+ program run, then call pass_continued().
+
+ - If a pass that has been started finishes execution, call
+ pass_done(), to allow any artifacts that won't be needed anymore
+ to be cleaned up.
+
+ - Call pass_deferred() for any passes that have been deferred to a
+ future cvs2svn run.
+
+ Finally:
+
+ - Call check_clean() to verify that all artifacts have been
+ accounted for."""
+
+ def __init__(self):
+ # A map { artifact_name : artifact } of known artifacts.
+ self._artifacts = { }
+
+ # A map { pass : set_of_artifacts }, where set_of_artifacts is a
+ # set of artifacts needed by the pass.
+ self._pass_needs = { }
+
+ # A set of passes that are currently being executed.
+ self._active_passes = set()
+
+ def set_artifact(self, name, artifact):
+ """Add ARTIFACT to the list of artifacts that we manage.
+
+ Store it under NAME."""
+
+ assert name not in self._artifacts
+ self._artifacts[name] = artifact
+
+ def get_artifact(self, name):
+ """Return the artifact with the specified name.
+
+ If the artifact does not currently exist, raise a KeyError. If it
+ is not registered as being needed by one of the active passes,
+ raise an ArtifactNotActiveError."""
+
+ artifact = self._artifacts[name]
+ for active_pass in self._active_passes:
+ if artifact in self._pass_needs[active_pass]:
+ # OK
+ return artifact
+ else:
+ raise ArtifactNotActiveError(name)
+
+ def creates(self, which_pass, artifact):
+ """Register that WHICH_PASS creates ARTIFACT.
+
+ ARTIFACT must already have been registered."""
+
+ # An artifact is automatically "needed" in the pass in which it is
+ # created:
+ self.uses(which_pass, artifact)
+
+ def uses(self, which_pass, artifact):
+ """Register that WHICH_PASS uses ARTIFACT.
+
+ ARTIFACT must already have been registered."""
+
+ artifact._passes_needed.add(which_pass)
+ if which_pass in self._pass_needs:
+ self._pass_needs[which_pass].add(artifact)
+ else:
+ self._pass_needs[which_pass] = set([artifact])
+
+ def register_temp_file(self, basename, which_pass):
+ """Register a temporary file with base name BASENAME as an artifact.
+
+ Return the filename of the temporary file."""
+
+ artifact = TempFile(basename)
+ self.set_artifact(basename, artifact)
+ self.creates(which_pass, artifact)
+
+ def get_temp_file(self, basename):
+ """Return the filename of the temporary file with the specified BASENAME.
+
+ If the temporary file is not an existing, registered TempFile,
+ raise a KeyError."""
+
+ return self.get_artifact(basename).filename
+
+ def register_artifact_needed(self, artifact_name, which_pass):
+ """Register that WHICH_PASS uses the artifact named ARTIFACT_NAME.
+
+ An artifact with this name must already have been registered."""
+
+ artifact = self._artifacts[artifact_name]
+ artifact._passes_needed.add(which_pass)
+ if which_pass in self._pass_needs:
+ self._pass_needs[which_pass].add(artifact)
+ else:
+ self._pass_needs[which_pass] = set([artifact,])
+
+ def register_temp_file_needed(self, basename, which_pass):
+ """Register that a temporary file is needed by WHICH_PASS.
+
+ Register that the temporary file with base name BASENAME is needed
+ by WHICH_PASS."""
+
+ self.register_artifact_needed(basename, which_pass)
+
+ def _unregister_artifacts(self, which_pass):
+ """Unregister any artifacts that were needed for WHICH_PASS.
+
+ Return a list of artifacts that are no longer needed at all."""
+
+ try:
+ artifacts = list(self._pass_needs[which_pass])
+ except KeyError:
+ # No artifacts were needed for that pass:
+ return []
+
+ del self._pass_needs[which_pass]
+
+ unneeded_artifacts = []
+ for artifact in artifacts:
+ artifact._passes_needed.remove(which_pass)
+ if not artifact._passes_needed:
+ unneeded_artifacts.append(artifact)
+
+ return unneeded_artifacts
+
+ def pass_skipped(self, which_pass):
+ """WHICH_PASS was executed during a previous cvs2svn run.
+
+ Its artifacts were created then, and any artifacts that would
+ normally be cleaned up after this pass have already been cleaned
+ up."""
+
+ self._unregister_artifacts(which_pass)
+
+ def pass_started(self, which_pass):
+ """WHICH_PASS is starting."""
+
+ self._active_passes.add(which_pass)
+
+ def pass_continued(self, which_pass):
+ """WHICH_PASS will be continued during the next program run.
+
+ WHICH_PASS, which has already been started, will be continued
+ during the next program run. Unregister any artifacts that would
+ be cleaned up at the end of WHICH_PASS without actually cleaning
+ them up."""
+
+ self._active_passes.remove(which_pass)
+ self._unregister_artifacts(which_pass)
+
+ def pass_done(self, which_pass, skip_cleanup):
+ """WHICH_PASS is done.
+
+ Clean up all artifacts that are no longer needed. If SKIP_CLEANUP
+ is True, then just do the bookkeeping without actually calling
+ artifact.cleanup()."""
+
+ self._active_passes.remove(which_pass)
+ artifacts = self._unregister_artifacts(which_pass)
+ if not skip_cleanup:
+ for artifact in artifacts:
+ artifact.cleanup()
+
+ def pass_deferred(self, which_pass):
+ """WHICH_PASS is being deferred until a future cvs2svn run.
+
+ Unregister any artifacts that would be cleaned up during
+ WHICH_PASS."""
+
+ self._unregister_artifacts(which_pass)
+
+ def check_clean(self):
+ """All passes have been processed.
+
+ Output a warning messages if all artifacts have not been accounted
+ for. (This is mainly a consistency check, that no artifacts were
+ registered under nonexistent passes.)"""
+
+ unclean_artifacts = [
+ str(artifact)
+ for artifact in self._artifacts.values()
+ if artifact._passes_needed]
+
+ if unclean_artifacts:
+ Log().warn(
+ 'INTERNAL: The following artifacts were not cleaned up:\n %s\n'
+ % ('\n '.join(unclean_artifacts)))
+
+
+# The default ArtifactManager instance:
+artifact_manager = ArtifactManager()
+
+
diff --git a/cvs2svn_lib/bzr_run_options.py b/cvs2svn_lib/bzr_run_options.py
new file mode 100644
index 0000000..5332dff
--- /dev/null
+++ b/cvs2svn_lib/bzr_run_options.py
@@ -0,0 +1,175 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2bzr run options."""
+
+
+import sys
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.git_run_options import GitRunOptions
+from cvs2svn_lib.git_output_option import GitRevisionInlineWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+
+
+short_desc = 'convert a cvs repository into a Bazaar repository'
+
+synopsis = """\
+.B cvs2bzr
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
+.br
+.B cvs2bzr
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+description="""\
+Convert a CVS repository into a Bazaar repository, including history.
+
+"""
+long_desc = """\
+Create a new Bazaar repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the Bazaar
+repository, including such information as date of commit and id of the
+committer.
+.P
+The output of this program is a "fast-import dumpfile", which
+can be loaded into a Bazaar repository using the Bazaar FastImport
+Plugin, available from https://launchpad.net/bzr-fastimport.
+
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert. This path doesn't have to be the
+top level directory of a CVS repository; it can point at a project
+within a repository, in which case only that project will be
+converted. This path or one of its parent directories has to contain
+a subdirectory called CVSROOT (though the CVSROOT directory can be
+empty).
+.P
+It is not possible directly to convert a CVS repository to which you
+only have remote access, but the FAQ describes tools that may be used
+to create a local copy of a remote CVS repository.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+ ('cvs', '1'),
+ ('bzr', '1'),
+ ]
+
+
+class BzrRunOptions(GitRunOptions):
+
+ def get_description(self):
+ return description
+
+ def _get_output_options_group(self):
+ group = RunOptions._get_output_options_group(self)
+
+ group.add_option(IncompatibleOption(
+ '--dumpfile', type='string',
+ action='store',
+ help='path to which the data should be written',
+ man_help=(
+ 'Write the blobs and revision data to \\fIpath\\fR.'
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(ContextOption(
+ '--dry-run',
+ action='store_true',
+ help=(
+ 'do not create any output; just print what would happen.'
+ ),
+ man_help=(
+ 'Do not create any output; just print what would happen.'
+ ),
+ ))
+
+ return group
+
+ def callback_manpage(self, option, opt_str, value, parser):
+ f = codecs.getwriter('utf_8')(sys.stdout)
+ ManWriter(
+ parser,
+ section='1',
+ date=datetime.date.today(),
+ source='Version %s' % (VERSION,),
+ manual='User Commands',
+ short_desc=short_desc,
+ synopsis=synopsis,
+ long_desc=long_desc,
+ files=files,
+ authors=authors,
+ see_also=see_also,
+ ).write_manpage(f)
+ sys.exit(0)
+
+ def process_io_options(self):
+ """Process input/output options.
+
+ Process options related to extracting data from the CVS repository
+ and writing to a Bazaar-friendly fast-import file."""
+
+ ctx = Ctx()
+ options = self.options
+
+ not_both(options.use_rcs, '--use-rcs',
+ options.use_cvs, '--use-cvs')
+
+ if options.use_rcs:
+ revision_reader = RCSRevisionReader(
+ co_executable=options.co_executable
+ )
+ else:
+ # --use-cvs is the default:
+ revision_reader = CVSRevisionReader(
+ cvs_executable=options.cvs_executable
+ )
+
+ if not ctx.dry_run and not options.dumpfile:
+ raise FatalError("must pass '--dry-run' or '--dumpfile' option.")
+
+ ctx.revision_recorder = NullRevisionRecorder()
+ ctx.revision_excluder = NullRevisionExcluder()
+ ctx.revision_reader = None
+
+ ctx.output_option = GitOutputOption(
+ options.dumpfile,
+ GitRevisionInlineWriter(revision_reader),
+ max_merges=None,
+ # Optional map from CVS author names to bzr author names:
+ author_transforms={}, # FIXME
+ )
+
+
diff --git a/cvs2svn_lib/changeset.py b/cvs2svn_lib/changeset.py
new file mode 100644
index 0000000..1022e0a
--- /dev/null
+++ b/cvs2svn_lib/changeset.py
@@ -0,0 +1,269 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Manage change sets."""
+
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.time_range import TimeRange
+from cvs2svn_lib.changeset_graph_node import ChangesetGraphNode
+
+
+class Changeset(object):
+ """A set of cvs_items that might potentially form a single change set."""
+
+ def __init__(self, id, cvs_item_ids):
+ self.id = id
+ self.cvs_item_ids = list(cvs_item_ids)
+
+ def iter_cvs_items(self):
+ """Yield the CVSItems within this Changeset."""
+
+ for (id, cvs_item) in Ctx()._cvs_items_db.get_many(self.cvs_item_ids):
+ assert cvs_item is not None
+ yield cvs_item
+
+ def get_projects_opened(self):
+ """Return the set of projects that might be opened by this changeset."""
+
+ raise NotImplementedError()
+
+ def create_graph_node(self, cvs_item_to_changeset_id):
+ """Return a ChangesetGraphNode for this Changeset."""
+
+ raise NotImplementedError()
+
+ def create_split_changeset(self, id, cvs_item_ids):
+ """Return a Changeset with the specified contents.
+
+ This method is only implemented for changesets that can be split.
+ The type of the new changeset should be the same as that of SELF,
+ and any other information from SELF should also be copied to the
+ new changeset."""
+
+ raise NotImplementedError()
+
+ def __getstate__(self):
+ return (self.id, self.cvs_item_ids,)
+
+ def __setstate__(self, state):
+ (self.id, self.cvs_item_ids,) = state
+
+ def __cmp__(self, other):
+ raise NotImplementedError()
+
+ def __str__(self):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ return '%s [%s]' % (
+ self, ', '.join(['%x' % id for id in self.cvs_item_ids]),)
+
+
+class RevisionChangeset(Changeset):
+ """A Changeset consisting of CVSRevisions."""
+
+ _sort_order = 3
+
+ def create_graph_node(self, cvs_item_to_changeset_id):
+ time_range = TimeRange()
+ pred_ids = set()
+ succ_ids = set()
+
+ for cvs_item in self.iter_cvs_items():
+ time_range.add(cvs_item.timestamp)
+
+ for pred_id in cvs_item.get_pred_ids():
+ changeset_id = cvs_item_to_changeset_id.get(pred_id)
+ if changeset_id is not None:
+ pred_ids.add(changeset_id)
+
+ for succ_id in cvs_item.get_succ_ids():
+ changeset_id = cvs_item_to_changeset_id.get(succ_id)
+ if changeset_id is not None:
+ succ_ids.add(changeset_id)
+
+ return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
+
+ def create_split_changeset(self, id, cvs_item_ids):
+ return RevisionChangeset(id, cvs_item_ids)
+
+ def __cmp__(self, other):
+ return cmp(self._sort_order, other._sort_order) \
+ or cmp(self.id, other.id)
+
+ def __str__(self):
+ return 'RevisionChangeset<%x>' % (self.id,)
+
+
+class OrderedChangeset(Changeset):
+ """A Changeset of CVSRevisions whose preliminary order is known.
+
+ The first changeset ordering involves only RevisionChangesets, and
+ results in a full ordering of RevisionChangesets (i.e., a linear
+ chain of dependencies with the order consistent with the
+ dependencies). These OrderedChangesets form the skeleton for the
+ full topological sort that includes SymbolChangesets as well."""
+
+ _sort_order = 2
+
+ def __init__(self, id, cvs_item_ids, ordinal, prev_id, next_id):
+ Changeset.__init__(self, id, cvs_item_ids)
+
+ # The order of this changeset among all OrderedChangesets:
+ self.ordinal = ordinal
+
+ # The changeset id of the previous OrderedChangeset, or None if
+ # this is the first OrderedChangeset:
+ self.prev_id = prev_id
+
+ # The changeset id of the next OrderedChangeset, or None if this
+ # is the last OrderedChangeset:
+ self.next_id = next_id
+
+ def get_projects_opened(self):
+ retval = set()
+ for cvs_item in self.iter_cvs_items():
+ retval.add(cvs_item.cvs_file.project)
+ return retval
+
+ def create_graph_node(self, cvs_item_to_changeset_id):
+ time_range = TimeRange()
+
+ pred_ids = set()
+ succ_ids = set()
+
+ if self.prev_id is not None:
+ pred_ids.add(self.prev_id)
+
+ if self.next_id is not None:
+ succ_ids.add(self.next_id)
+
+ for cvs_item in self.iter_cvs_items():
+ time_range.add(cvs_item.timestamp)
+
+ for pred_id in cvs_item.get_symbol_pred_ids():
+ changeset_id = cvs_item_to_changeset_id.get(pred_id)
+ if changeset_id is not None:
+ pred_ids.add(changeset_id)
+
+ for succ_id in cvs_item.get_symbol_succ_ids():
+ changeset_id = cvs_item_to_changeset_id.get(succ_id)
+ if changeset_id is not None:
+ succ_ids.add(changeset_id)
+
+ return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
+
+ def __getstate__(self):
+ return (
+ Changeset.__getstate__(self),
+ self.ordinal, self.prev_id, self.next_id,)
+
+ def __setstate__(self, state):
+ (changeset_state, self.ordinal, self.prev_id, self.next_id,) = state
+ Changeset.__setstate__(self, changeset_state)
+
+ def __cmp__(self, other):
+ return cmp(self._sort_order, other._sort_order) \
+ or cmp(self.id, other.id)
+
+ def __str__(self):
+ return 'OrderedChangeset<%x(%d)>' % (self.id, self.ordinal,)
+
+
+class SymbolChangeset(Changeset):
+ """A Changeset consisting of CVSSymbols."""
+
+ def __init__(self, id, symbol, cvs_item_ids):
+ Changeset.__init__(self, id, cvs_item_ids)
+ self.symbol = symbol
+
+ def get_projects_opened(self):
+ # A SymbolChangeset can never open a project.
+ return set()
+
+ def create_graph_node(self, cvs_item_to_changeset_id):
+ pred_ids = set()
+ succ_ids = set()
+
+ for cvs_item in self.iter_cvs_items():
+ for pred_id in cvs_item.get_pred_ids():
+ changeset_id = cvs_item_to_changeset_id.get(pred_id)
+ if changeset_id is not None:
+ pred_ids.add(changeset_id)
+
+ for succ_id in cvs_item.get_succ_ids():
+ changeset_id = cvs_item_to_changeset_id.get(succ_id)
+ if changeset_id is not None:
+ succ_ids.add(changeset_id)
+
+ return ChangesetGraphNode(self, TimeRange(), pred_ids, succ_ids)
+
+ def __cmp__(self, other):
+ return cmp(self._sort_order, other._sort_order) \
+ or cmp(self.symbol, other.symbol) \
+ or cmp(self.id, other.id)
+
+ def __getstate__(self):
+ return (Changeset.__getstate__(self), self.symbol.id,)
+
+ def __setstate__(self, state):
+ (changeset_state, symbol_id) = state
+ Changeset.__setstate__(self, changeset_state)
+ self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+
+
+class BranchChangeset(SymbolChangeset):
+ """A Changeset consisting of CVSBranches."""
+
+ _sort_order = 1
+
+ def create_split_changeset(self, id, cvs_item_ids):
+ return BranchChangeset(id, self.symbol, cvs_item_ids)
+
+ def __str__(self):
+ return 'BranchChangeset<%x>("%s")' % (self.id, self.symbol,)
+
+
+class TagChangeset(SymbolChangeset):
+ """A Changeset consisting of CVSTags."""
+
+ _sort_order = 0
+
+ def create_split_changeset(self, id, cvs_item_ids):
+ return TagChangeset(id, self.symbol, cvs_item_ids)
+
+ def __str__(self):
+ return 'TagChangeset<%x>("%s")' % (self.id, self.symbol,)
+
+
+def create_symbol_changeset(id, symbol, cvs_item_ids):
+ """Factory function for SymbolChangesets.
+
+ Return a BranchChangeset or TagChangeset, depending on the type of
+ SYMBOL. SYMBOL must be a Branch or Tag."""
+
+ if isinstance(symbol, Branch):
+ return BranchChangeset(id, symbol, cvs_item_ids)
+ if isinstance(symbol, Tag):
+ return TagChangeset(id, symbol, cvs_item_ids)
+ else:
+ raise InternalError('Unknown symbol type %s' % (symbol,))
+
+
diff --git a/cvs2svn_lib/changeset_database.py b/cvs2svn_lib/changeset_database.py
new file mode 100644
index 0000000..82ca904
--- /dev/null
+++ b/cvs2svn_lib/changeset_database.py
@@ -0,0 +1,70 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to store changesets."""
+
+
+from cvs2svn_lib.changeset import Changeset
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import SymbolChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+from cvs2svn_lib.record_table import UnsignedIntegerPacker
+from cvs2svn_lib.record_table import MmapRecordTable
+from cvs2svn_lib.record_table import RecordTable
+from cvs2svn_lib.database import IndexedStore
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+
+
+# Should the CVSItemToChangesetTable database files be memory mapped?
+# This speeds up the converstion but can cause the computer's virtual
+# address space to be exhausted. This option can be changed
+# externally, affecting any CVSItemToChangesetTables opened subsequent
+# to the change:
+use_mmap_for_cvs_item_to_changeset_table = False
+
+
+def CVSItemToChangesetTable(filename, mode):
+ if use_mmap_for_cvs_item_to_changeset_table:
+ return MmapRecordTable(filename, mode, UnsignedIntegerPacker())
+ else:
+ return RecordTable(filename, mode, UnsignedIntegerPacker())
+
+
+class ChangesetDatabase(IndexedStore):
+ def __init__(self, filename, index_filename, mode):
+ primer = (
+ Changeset,
+ RevisionChangeset,
+ OrderedChangeset,
+ SymbolChangeset,
+ BranchChangeset,
+ TagChangeset,
+ )
+ IndexedStore.__init__(
+ self, filename, index_filename, mode, PrimedPickleSerializer(primer))
+
+ def store(self, changeset):
+ self.add(changeset)
+
+ def keys(self):
+ return list(self.iterkeys())
+
+ def close(self):
+ IndexedStore.close(self)
+
+
diff --git a/cvs2svn_lib/changeset_graph.py b/cvs2svn_lib/changeset_graph.py
new file mode 100644
index 0000000..64ebf2c
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph.py
@@ -0,0 +1,456 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""The changeset dependency graph."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+
+
+class CycleInGraphException(Exception):
+ def __init__(self, cycle):
+ Exception.__init__(
+ self,
+ 'Cycle found in graph: %s'
+ % ' -> '.join(map(str, cycle + [cycle[0]])))
+
+
+class NoPredNodeInGraphException(Exception):
+ def __init__(self, node):
+ Exception.__init__(self, 'Node %s has no predecessors' % (node,))
+
+
+class _NoPredNodes:
+ """Manage changesets that are to be processed.
+
+ Output the changesets in order by time and changeset type.
+
+ The implementation of this class is crude: as changesets are added,
+ they are appended to a list. When one is needed, the list is sorted
+ in reverse order and then the last changeset in the list is
+ returned. To reduce the number of sorts that are needed, the class
+ keeps track of whether the list is currently sorted.
+
+ All this repeated sorting is wasteful and unnecessary. We should
+ instead use a heap to output the changeset order, which would
+ require O(lg N) work per add()/get() rather than O(1) and O(N lg N)
+ as in the current implementation [1]. But: (1) the lame interface
+ of heapq doesn't allow an arbitrary compare function, so we would
+ have to store extra information in the array elements; (2) in
+ practice, the number of items in the list at any time is only a tiny
+ fraction of the total number of changesets; and (3) testing showed
+ that the heapq implementation is no faster than this one (perhaps
+ because of the increased memory usage).
+
+ [1] According to Objects/listsort.txt in the Python source code, the
+ Python list-sorting code is heavily optimized for arrays that have
+ runs of already-sorted elements, so the current cost of get() is
+ probably closer to O(N) than O(N lg N)."""
+
+ def __init__(self, changeset_db):
+ self.changeset_db = changeset_db
+ # A list [(node, changeset,)] of nodes with no predecessors:
+ self._nodes = []
+ self._sorted = True
+
+ def __len__(self):
+ return len(self._nodes)
+
+ @staticmethod
+ def _compare((node_1, changeset_1), (node_2, changeset_2)):
+ """Define a (reverse) ordering on self._nodes."""
+
+ return cmp(node_2.time_range, node_1.time_range) \
+ or cmp(changeset_2, changeset_1)
+
+ def add(self, node):
+ self._nodes.append( (node, self.changeset_db[node.id],) )
+ self._sorted = False
+
+ def get(self):
+ """Return (node, changeset,) of the smallest node.
+
+ 'Smallest' is defined by self._compare()."""
+
+ if not self._sorted:
+ self._nodes.sort(self._compare)
+ self._sorted = True
+ return self._nodes.pop()
+
+
+class ChangesetGraph(object):
+ """A graph of changesets and their dependencies."""
+
+ def __init__(self, changeset_db, cvs_item_to_changeset_id):
+ self._changeset_db = changeset_db
+ self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
+ # A map { id : ChangesetGraphNode }
+ self.nodes = {}
+
+ def close(self):
+ self._cvs_item_to_changeset_id.close()
+ self._cvs_item_to_changeset_id = None
+ self._changeset_db.close()
+ self._changeset_db = None
+
+ def add_changeset(self, changeset):
+ """Add CHANGESET to this graph.
+
+ Determine and record any dependencies to changesets that are
+ already in the graph. This method does not affect the databases."""
+
+ node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
+
+ # Now tie the node into our graph. If a changeset referenced by
+ # node is already in our graph, then add the backwards connection
+ # from the other node to the new one. If not, then delete the
+ # changeset from node.
+
+ for pred_id in list(node.pred_ids):
+ pred_node = self.nodes.get(pred_id)
+ if pred_node is not None:
+ pred_node.succ_ids.add(node.id)
+ else:
+ node.pred_ids.remove(pred_id)
+
+ for succ_id in list(node.succ_ids):
+ succ_node = self.nodes.get(succ_id)
+ if succ_node is not None:
+ succ_node.pred_ids.add(node.id)
+ else:
+ node.succ_ids.remove(succ_id)
+
+ self.nodes[node.id] = node
+
+ def store_changeset(self, changeset):
+ for cvs_item_id in changeset.cvs_item_ids:
+ self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
+ self._changeset_db.store(changeset)
+
+ def add_new_changeset(self, changeset):
+ """Add the new CHANGESET to the graph and also to the databases."""
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug('Adding changeset %r' % (changeset,))
+
+ self.add_changeset(changeset)
+ self.store_changeset(changeset)
+
+ def delete_changeset(self, changeset):
+ """Remove CHANGESET from the graph and also from the databases.
+
+ In fact, we don't remove CHANGESET from
+ self._cvs_item_to_changeset_id, because in practice the CVSItems
+ in CHANGESET are always added again as part of a new CHANGESET,
+ which will cause the old values to be overwritten."""
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug('Removing changeset %r' % (changeset,))
+
+ del self[changeset.id]
+ del self._changeset_db[changeset.id]
+
+ def __nonzero__(self):
+ """Instances are considered True iff they contain any nodes."""
+
+ return bool(self.nodes)
+
+ def __contains__(self, id):
+ """Return True if the specified ID is contained in this graph."""
+
+ return id in self.nodes
+
+ def __getitem__(self, id):
+ return self.nodes[id]
+
+ def get(self, id):
+ return self.nodes.get(id)
+
+ def __delitem__(self, id):
+ """Remove the node corresponding to ID.
+
+ Also remove references to it from other nodes. This method does
+ not change pred_ids or succ_ids of the node being deleted, nor
+ does it affect the databases."""
+
+ node = self[id]
+
+ for succ_id in node.succ_ids:
+ succ = self[succ_id]
+ succ.pred_ids.remove(node.id)
+
+ for pred_id in node.pred_ids:
+ pred = self[pred_id]
+ pred.succ_ids.remove(node.id)
+
+ del self.nodes[node.id]
+
+ def keys(self):
+ return self.nodes.keys()
+
+ def __iter__(self):
+ return self.nodes.itervalues()
+
+ def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
+ """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
+
+ Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
+ REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
+ changeset that depends on the changeset with ENDING_NODE_ID. (See
+ the comment in search_for_path() for a description of the format
+ of REACHABLE_CHANGESETS.)
+
+ Return a list of changesets, where the 0th one has ENDING_NODE_ID
+ and the last one has STARTING_NODE_ID. If there is no such path
+ described in in REACHABLE_CHANGESETS, return None."""
+
+ if ending_node_id not in reachable_changesets:
+ return None
+
+ path = [self._changeset_db[ending_node_id]]
+ id = reachable_changesets[ending_node_id][1]
+ while id != starting_node_id:
+ path.append(self._changeset_db[id])
+ id = reachable_changesets[id][1]
+ path.append(self._changeset_db[starting_node_id])
+ return path
+
+ def search_for_path(self, starting_node_id, stop_set):
+ """Search for paths to prerequisites of STARTING_NODE_ID.
+
+ Try to find the shortest dependency path that causes the changeset
+ with STARTING_NODE_ID to depend (directly or indirectly) on one of
+ the changesets whose ids are contained in STOP_SET.
+
+ We consider direct and indirect dependencies in the sense that the
+ changeset can be reached by following a chain of predecessor nodes.
+
+ When one of the changeset_ids in STOP_SET is found, terminate the
+ search and return the path from that changeset_id to
+ STARTING_NODE_ID. If no path is found to a node in STOP_SET,
+ return None."""
+
+ # A map {node_id : (steps, next_node_id)} where NODE_ID can be
+ # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
+ # is the id of the previous node in the path. STARTING_NODE_ID is
+ # only included as a key if there is a loop leading back to it.
+ reachable_changesets = {}
+
+ # A list of (node_id, steps) that still have to be investigated,
+ # and STEPS is the number of steps to get to NODE_ID.
+ open_nodes = [(starting_node_id, 0)]
+ # A breadth-first search:
+ while open_nodes:
+ (id, steps) = open_nodes.pop(0)
+ steps += 1
+ node = self[id]
+ for pred_id in node.pred_ids:
+ # Since the search is breadth-first, we only have to set steps
+ # that don't already exist.
+ if pred_id not in reachable_changesets:
+ reachable_changesets[pred_id] = (steps, id)
+ open_nodes.append((pred_id, steps))
+
+ # See if we can stop now:
+ if pred_id in stop_set:
+ return self._get_path(
+ reachable_changesets, starting_node_id, pred_id
+ )
+
+ return None
+
+ def consume_nopred_nodes(self):
+ """Remove and yield changesets in dependency order.
+
+ Each iteration, this generator yields a (changeset, time_range)
+ tuple for the oldest changeset in the graph that doesn't have any
+ predecessor nodes (i.e., it is ready to be committed). This is
+ continued until there are no more nodes without predecessors
+ (either because the graph has been emptied, or because of cycles
+ in the graph).
+
+ Among the changesets that are ready to be processed, the earliest
+ one (according to the sorting of the TimeRange class) is yielded
+ each time. (This is the order in which the changesets should be
+ committed.)
+
+ The graph should not be otherwise altered while this generator is
+ running."""
+
+ # Find a list of (node,changeset,) where the node has no
+ # predecessors:
+ nopred_nodes = _NoPredNodes(self._changeset_db)
+ for node in self.nodes.itervalues():
+ if not node.pred_ids:
+ nopred_nodes.add(node)
+
+ while nopred_nodes:
+ (node, changeset,) = nopred_nodes.get()
+ del self[node.id]
+ # See if any successors are now ready for extraction:
+ for succ_id in node.succ_ids:
+ succ = self[succ_id]
+ if not succ.pred_ids:
+ nopred_nodes.add(succ)
+ yield (changeset, node.time_range)
+
+ def find_cycle(self, starting_node_id):
+ """Find a cycle in the dependency graph and return it.
+
+ Use STARTING_NODE_ID as the place to start looking. This routine
+ must only be called after all nopred_nodes have been removed.
+ Return the list of changesets that are involved in the cycle
+ (ordered such that cycle[n-1] is a predecessor of cycle[n] and
+ cycle[-1] is a predecessor of cycle[0])."""
+
+ # Since there are no nopred nodes in the graph, all nodes in the
+ # graph must either be involved in a cycle or depend (directly or
+ # indirectly) on nodes that are in a cycle.
+
+ # Pick an arbitrary node:
+ node = self[starting_node_id]
+
+ seen_nodes = [node]
+
+ # Follow it backwards until a node is seen a second time; then we
+ # have our cycle.
+ while True:
+ # Pick an arbitrary predecessor of node. It must exist, because
+ # there are no nopred nodes:
+ try:
+ node_id = node.pred_ids.__iter__().next()
+ except StopIteration:
+ raise NoPredNodeInGraphException(node)
+ node = self[node_id]
+ try:
+ i = seen_nodes.index(node)
+ except ValueError:
+ seen_nodes.append(node)
+ else:
+ seen_nodes = seen_nodes[i:]
+ seen_nodes.reverse()
+ return [self._changeset_db[node.id] for node in seen_nodes]
+
+ def consume_graph(self, cycle_breaker=None):
+ """Remove and yield changesets from this graph in dependency order.
+
+ Each iteration, this generator yields a (changeset, time_range)
+ tuple for the oldest changeset in the graph that doesn't have any
+ predecessor nodes. If CYCLE_BREAKER is specified, then call
+ CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
+ is the list of changesets that are involved in the cycle (ordered
+ such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
+ a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
+ in place then return.
+
+ If a cycle is found and CYCLE_BREAKER was not specified, raise
+ CycleInGraphException."""
+
+ while True:
+ for (changeset, time_range) in self.consume_nopred_nodes():
+ yield (changeset, time_range)
+
+ # If there are any nodes left in the graph, then there must be
+ # at least one cycle. Find a cycle and process it.
+
+ # This might raise StopIteration, but that indicates that the
+ # graph has been fully consumed, so we just let the exception
+ # escape.
+ start_node_id = self.nodes.iterkeys().next()
+
+ cycle = self.find_cycle(start_node_id)
+
+ if cycle_breaker is not None:
+ cycle_breaker(cycle)
+ else:
+ raise CycleInGraphException(cycle)
+
+ def __repr__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ if self.nodes:
+ return 'ChangesetGraph:\n%s' \
+ % ''.join([' %r\n' % node for node in self])
+ else:
+ return 'ChangesetGraph:\n EMPTY\n'
+
+ node_colors = {
+ RevisionChangeset : 'lightgreen',
+ OrderedChangeset : 'cyan',
+ BranchChangeset : 'orange',
+ TagChangeset : 'yellow',
+ }
+
+ def output_coarse_dot(self, f):
+ """Output the graph in DOT format to file-like object f.
+
+ Such a file can be rendered into a visual representation of the
+ graph using tools like graphviz. Include only changesets in the
+ graph, and the dependencies between changesets."""
+
+ f.write('digraph G {\n')
+ for node in self:
+ f.write(
+ ' C%x [style=filled, fillcolor=%s];\n' % (
+ node.id,
+ self.node_colors[self._changeset_db[node.id].__class__],
+ )
+ )
+ f.write('\n')
+
+ for node in self:
+ for succ_id in node.succ_ids:
+ f.write(' C%x -> C%x\n' % (node.id, succ_id,))
+ f.write('\n')
+
+ f.write('}\n')
+
+ def output_fine_dot(self, f):
+ """Output the graph in DOT format to file-like object f.
+
+ Such a file can be rendered into a visual representation of the
+ graph using tools like graphviz. Include all CVSItems and the
+ CVSItem-CVSItem dependencies in the graph. Group the CVSItems
+ into clusters by changeset."""
+
+ f.write('digraph G {\n')
+ for node in self:
+ f.write(' subgraph cluster_%x {\n' % (node.id,))
+ f.write(' label = "C%x";\n' % (node.id,))
+ changeset = self._changeset_db[node.id]
+ for item_id in changeset.cvs_item_ids:
+ f.write(' I%x;\n' % (item_id,))
+ f.write(' style=filled;\n')
+ f.write(
+ ' fillcolor=%s;\n'
+ % (self.node_colors[self._changeset_db[node.id].__class__],))
+ f.write(' }\n\n')
+
+ for node in self:
+ changeset = self._changeset_db[node.id]
+ for cvs_item in changeset.iter_cvs_items():
+ for succ_id in cvs_item.get_succ_ids():
+ f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,))
+
+ f.write('\n')
+
+ f.write('}\n')
+
+
diff --git a/cvs2svn_lib/changeset_graph_link.py b/cvs2svn_lib/changeset_graph_link.py
new file mode 100644
index 0000000..9d0cc9d
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph_link.py
@@ -0,0 +1,149 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Keep track of counts of different types of changeset links."""
+
+
+
+# A cvs_item doesn't depend on any cvs_items in either pred or succ:
+LINK_NONE = 0
+
+# A cvs_item depends on one or more cvs_items in pred but none in succ:
+LINK_PRED = 1
+
+# A cvs_item depends on one or more cvs_items in succ but none in pred:
+LINK_SUCC = 2
+
+# A cvs_item depends on one or more cvs_items in both pred and succ:
+LINK_PASSTHRU = LINK_PRED | LINK_SUCC
+
+
+class ChangesetGraphLink(object):
+ def __init__(self, pred, changeset, succ):
+ """Represent a link in a loop in a changeset graph.
+
+ This is the link that goes from PRED -> CHANGESET -> SUCC.
+
+ We are mainly concerned with how many CVSItems have LINK_PRED,
+ LINK_SUCC, and LINK_PASSTHRU type links to the neighboring
+ commitsets. If necessary, this class can also break up CHANGESET
+ into multiple changesets."""
+
+ self.pred = pred
+ self.pred_ids = set(pred.cvs_item_ids)
+
+ self.changeset = changeset
+
+ self.succ_ids = set(succ.cvs_item_ids)
+ self.succ = succ
+
+ # A count of each type of link for cvs_items in changeset
+ # (indexed by LINK_* constants):
+ link_counts = [0] * 4
+
+ for cvs_item in list(changeset.iter_cvs_items()):
+ link_counts[self.get_link_type(cvs_item)] += 1
+
+ [self.pred_links, self.succ_links, self.passthru_links] = link_counts[1:]
+
+ def get_link_type(self, cvs_item):
+ """Return the type of links from CVS_ITEM to self.PRED and self.SUCC.
+
+ The return value is one of LINK_NONE, LINK_PRED, LINK_SUCC, or
+ LINK_PASSTHRU."""
+
+ retval = LINK_NONE
+
+ if cvs_item.get_pred_ids() & self.pred_ids:
+ retval |= LINK_PRED
+ if cvs_item.get_succ_ids() & self.succ_ids:
+ retval |= LINK_SUCC
+
+ return retval
+
+ def get_links_to_move(self):
+ """Return the number of items that would be moved to split changeset."""
+
+ return min(self.pred_links, self.succ_links) \
+ or max(self.pred_links, self.succ_links)
+
+ def is_breakable(self):
+ """Return True iff breaking the changeset will do any good."""
+
+ return self.pred_links != 0 or self.succ_links != 0
+
+ def __cmp__(self, other):
+ """Compare SELF with OTHER in terms of which would be better to break.
+
+ The one that is better to break is considered the lesser."""
+
+ return (
+ - cmp(int(self.is_breakable()), int(other.is_breakable()))
+ or cmp(self.passthru_links, other.passthru_links)
+ or cmp(self.get_links_to_move(), other.get_links_to_move())
+ )
+
+ def break_changeset(self, changeset_key_generator):
+ """Break up self.changeset and return the fragments.
+
+ Break it up in such a way that the link is weakened as efficiently
+ as possible."""
+
+ if not self.is_breakable():
+ raise ValueError('Changeset is not breakable: %r' % self.changeset)
+
+ pred_items = []
+ succ_items = []
+
+ # For each link type, should such CVSItems be moved to the
+ # changeset containing the predecessor items or the one containing
+ # the successor items?
+ destination = {
+ LINK_PRED : pred_items,
+ LINK_SUCC : succ_items,
+ }
+
+ if self.pred_links == 0:
+ destination[LINK_NONE] = pred_items
+ destination[LINK_PASSTHRU] = pred_items
+ elif self.succ_links == 0:
+ destination[LINK_NONE] = succ_items
+ destination[LINK_PASSTHRU] = succ_items
+ elif self.pred_links < self.succ_links:
+ destination[LINK_NONE] = succ_items
+ destination[LINK_PASSTHRU] = succ_items
+ else:
+ destination[LINK_NONE] = pred_items
+ destination[LINK_PASSTHRU] = pred_items
+
+ for cvs_item in self.changeset.iter_cvs_items():
+ link_type = self.get_link_type(cvs_item)
+ destination[link_type].append(cvs_item.id)
+
+ # Create new changesets of the same type as the old one:
+ return [
+ self.changeset.create_split_changeset(
+ changeset_key_generator.gen_id(), pred_items),
+ self.changeset.create_split_changeset(
+ changeset_key_generator.gen_id(), succ_items),
+ ]
+
+ def __str__(self):
+ return 'Link<%x>(%d, %d, %d)' % (
+ self.changeset.id,
+ self.pred_links, self.succ_links, self.passthru_links)
+
+
diff --git a/cvs2svn_lib/changeset_graph_node.py b/cvs2svn_lib/changeset_graph_node.py
new file mode 100644
index 0000000..cbbebd7
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph_node.py
@@ -0,0 +1,50 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""A node in the changeset dependency graph."""
+
+
+class ChangesetGraphNode(object):
+ """A node in the changeset dependency graph."""
+
+ __slots__ = ['id', 'time_range', 'pred_ids', 'succ_ids']
+
+ def __init__(self, changeset, time_range, pred_ids, succ_ids):
+ # The id of the ChangesetGraphNode is the same as the id of the
+ # changeset.
+ self.id = changeset.id
+
+ # The range of times of CVSItems within this Changeset.
+ self.time_range = time_range
+
+ # The set of changeset ids of changesets that are direct
+ # predecessors of this one.
+ self.pred_ids = pred_ids
+
+ # The set of changeset ids of changesets that are direct
+ # successors of this one.
+ self.succ_ids = succ_ids
+
+ def __repr__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%x; pred=[%s]; succ=[%s]' % (
+ self.id,
+ ','.join(['%x' % id for id in self.pred_ids]),
+ ','.join(['%x' % id for id in self.succ_ids]),
+ )
+
+
diff --git a/cvs2svn_lib/check_dependencies_pass.py b/cvs2svn_lib/check_dependencies_pass.py
new file mode 100644
index 0000000..172c264
--- /dev/null
+++ b/cvs2svn_lib/check_dependencies_pass.py
@@ -0,0 +1,144 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines some passes that can be used for debugging cv2svn."""
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.pass_manager import Pass
+from cvs2svn_lib.project import read_projects
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.cvs_file_database import CVSFileDatabase
+from cvs2svn_lib.symbol_database import SymbolDatabase
+from cvs2svn_lib.cvs_item_database import OldCVSItemStore
+from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
+
+
+class CheckDependenciesPass(Pass):
+ """Check that the dependencies are self-consistent."""
+
+ def __init__(self):
+ Pass.__init__(self)
+
+ def register_artifacts(self):
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+
+ def iter_cvs_items(self):
+ raise NotImplementedError()
+
+ def get_cvs_item(self, item_id):
+ raise NotImplementedError()
+
+ def run(self, run_options, stats_keeper):
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ self.symbol_db = SymbolDatabase()
+ Ctx()._symbol_db = self.symbol_db
+
+ Log().quiet("Checking dependency consistency...")
+
+ fatal_errors = []
+ for cvs_item in self.iter_cvs_items():
+ # Check that the pred_ids and succ_ids are mutually consistent:
+ for pred_id in cvs_item.get_pred_ids():
+ pred = self.get_cvs_item(pred_id)
+ if not cvs_item.id in pred.get_succ_ids():
+ fatal_errors.append(
+ '%s lists pred=%s, but not vice versa.' % (cvs_item, pred,))
+
+ for succ_id in cvs_item.get_succ_ids():
+ succ = self.get_cvs_item(succ_id)
+ if not cvs_item.id in succ.get_pred_ids():
+ fatal_errors.append(
+ '%s lists succ=%s, but not vice versa.' % (cvs_item, succ,))
+
+ if fatal_errors:
+ raise FatalException(
+ 'Dependencies inconsistent:\n'
+ '%s\n'
+ 'Exited due to fatal error(s).'
+ % ('\n'.join(fatal_errors),)
+ )
+
+ self.symbol_db.close()
+ self.symbol_db = None
+ Ctx()._cvs_file_db.close()
+ Log().quiet("Done")
+
+
+class CheckItemStoreDependenciesPass(CheckDependenciesPass):
+ def __init__(self, cvs_items_store_file):
+ CheckDependenciesPass.__init__(self)
+ self.cvs_items_store_file = cvs_items_store_file
+
+ def register_artifacts(self):
+ CheckDependenciesPass.register_artifacts(self)
+ self._register_temp_file_needed(self.cvs_items_store_file)
+
+ def iter_cvs_items(self):
+ cvs_item_store = OldCVSItemStore(
+ artifact_manager.get_temp_file(self.cvs_items_store_file))
+
+ for cvs_file_items in cvs_item_store.iter_cvs_file_items():
+ self.current_cvs_file_items = cvs_file_items
+ for cvs_item in cvs_file_items.values():
+ yield cvs_item
+
+ del self.current_cvs_file_items
+
+ cvs_item_store.close()
+
+ def get_cvs_item(self, item_id):
+ return self.current_cvs_file_items[item_id]
+
+
+class CheckIndexedItemStoreDependenciesPass(CheckDependenciesPass):
+ def __init__(self, cvs_items_store_file, cvs_items_store_index_file):
+ CheckDependenciesPass.__init__(self)
+ self.cvs_items_store_file = cvs_items_store_file
+ self.cvs_items_store_index_file = cvs_items_store_index_file
+
+ def register_artifacts(self):
+ CheckDependenciesPass.register_artifacts(self)
+ self._register_temp_file_needed(self.cvs_items_store_file)
+ self._register_temp_file_needed(self.cvs_items_store_index_file)
+
+ def iter_cvs_items(self):
+ return self.cvs_item_store.itervalues()
+
+ def get_cvs_item(self, item_id):
+ return self.cvs_item_store[item_id]
+
+ def run(self, run_options, stats_keeper):
+ self.cvs_item_store = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(self.cvs_items_store_file),
+ artifact_manager.get_temp_file(self.cvs_items_store_index_file),
+ DB_OPEN_READ)
+
+ CheckDependenciesPass.run(self, run_options, stats_keeper)
+
+ self.cvs_item_store.close()
+ self.cvs_item_store = None
+
+
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py
new file mode 100644
index 0000000..fe28e0c
--- /dev/null
+++ b/cvs2svn_lib/checkout_internal.py
@@ -0,0 +1,778 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that implement the --use-internal-co option.
+
+The idea is to patch up the revisions' contents incrementally, thus
+avoiding the huge number of process spawns and the O(n^2) overhead of
+using 'co' and 'cvs'.
+
+InternalRevisionRecorder saves the RCS deltas and RCS revision trees
+to databases. Notably, deltas from the trunk need to be reversed, as
+CVS stores them so they apply from HEAD backwards.
+
+InternalRevisionExcluder copies the revision trees to a new database,
+omitting excluded branches.
+
+InternalRevisionReader produces the revisions' contents on demand. To
+generate the text for a typical revision, we need the revision's delta
+text plus the fulltext of the previous revision. Therefore, we
+maintain a checkout database containing a copy of the fulltext of any
+revision for which subsequent revisions still need to be retrieved.
+It is crucial to remove text from this database as soon as it is no
+longer needed, to prevent it from growing enormous.
+
+There are two reasons that the text from a revision can be needed: (1)
+because the revision itself still needs to be output to a dumpfile;
+(2) because another revision needs it as the base of its delta. We
+maintain a reference count for each revision, which includes *both*
+possibilities. The first time a revision's text is needed, it is
+generated by applying the revision's deltatext to the previous
+revision's fulltext, and the resulting fulltext is stored in the
+checkout database. Each time a revision's fulltext is retrieved, its
+reference count is decremented. When the reference count goes to
+zero, then the fulltext is deleted from the checkout database.
+
+The administrative data for managing this consists of one TextRecord
+entry for each revision. Each TextRecord has an id, which is the same
+id as used for the corresponding CVSRevision instance. It also
+maintains a count of the times it is expected to be retrieved.
+TextRecords come in several varieties:
+
+FullTextRecord -- Used for revisions whose fulltext is contained
+ directly in the RCS file, and therefore available during
+ CollectRevsPass (i.e., typically revision 1.1 of each file).
+
+DeltaTextRecord -- Used for revisions that are defined via a delta
+ relative to some other TextRecord. These records record the id of
+ the TextRecord that holds the base text against which the delta is
+ defined. When the text for a DeltaTextRecord is retrieved, the
+ DeltaTextRecord instance is deleted and a CheckedOutTextRecord
+ instance is created to take its place.
+
+CheckedOutTextRecord -- Used during OutputPass for a revision that
+ started out as a DeltaTextRecord, but has already been retrieved
+ (and therefore its fulltext is stored in the checkout database).
+
+While a file is being processed during CollectRevsPass, the fulltext
+and deltas are stored to the delta database, and TextRecord instances
+are created to keep track of things. The reference counts are all
+initialized to zero.
+
+After CollectRevsPass has done any preliminary tree mangling, its
+_FileDataCollector.parse_completed(), method calls
+RevisionRecorder.finish_file(), passing it the CVSFileItems instance
+that describes the revisions in the file. At this point the reference
+counts for the file's TextRecords are updated: each record referred to
+by a delta has its refcount incremented, and each record that
+corresponds to a non-delete CVSRevision is incremented. After that,
+any records with refcount==0 are removed. When one record is removed,
+that can cause another record's reference count to go to zero and be
+removed too, recursively. When a TextRecord is deleted at this stage,
+its deltatext is also deleted from the delta database.
+
+In FilterSymbolsPass, the exact same procedure (described in the
+previous paragraph) is repeated, but this time using the CVSFileItems
+after it has been updated for excluded symbols, symbol
+preferred-parent grafting, etc."""
+
+
+import cStringIO
+import re
+import time
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevisionModification
+from cvs2svn_lib.database import Database
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.rcs_stream import RCSStream
+from cvs2svn_lib.rcs_stream import MalformedDeltaException
+from cvs2svn_lib.revision_manager import RevisionRecorder
+from cvs2svn_lib.revision_manager import RevisionExcluder
+from cvs2svn_lib.revision_manager import RevisionReader
+from cvs2svn_lib.serializer import MarshalSerializer
+from cvs2svn_lib.serializer import CompressingSerializer
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+
+
+class TextRecord(object):
+ """Bookkeeping data for the text of a single CVSRevision."""
+
+ __slots__ = ['id', 'refcount']
+
+ def __init__(self, id):
+ # The cvs_rev_id of the revision whose text this is.
+ self.id = id
+
+ # The number of times that the text of this revision will be
+ # retrieved.
+ self.refcount = 0
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def increment_dependency_refcounts(self, text_record_db):
+ """Increment the refcounts of any records that this one depends on."""
+
+ pass
+
+ def decrement_refcount(self, text_record_db):
+ """Decrement the number of times our text still has to be checked out.
+
+ If the reference count goes to zero, call discard()."""
+
+ self.refcount -= 1
+ if self.refcount == 0:
+ text_record_db.discard(self.id)
+
+ def checkout(self, text_record_db):
+ """Workhorse of the checkout process.
+
+ Return the text for this revision, decrement our reference count,
+ and update the databases depending on whether there will be future
+ checkouts."""
+
+ raise NotImplementedError()
+
+ def free(self, text_record_db):
+ """This instance will never again be checked out; free it.
+
+ Also free any associated resources and decrement the refcounts of
+ any other TextRecords that this one depends on."""
+
+ raise NotImplementedError()
+
+
+class FullTextRecord(TextRecord):
+ __slots__ = []
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def checkout(self, text_record_db):
+ text = text_record_db.delta_db[self.id]
+ self.decrement_refcount(text_record_db)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.delta_db[self.id]
+
+ def __str__(self):
+ return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class DeltaTextRecord(TextRecord):
+ __slots__ = ['pred_id']
+
+ def __init__(self, id, pred_id):
+ TextRecord.__init__(self, id)
+
+ # The cvs_rev_id of the revision relative to which this delta is
+ # defined.
+ self.pred_id = pred_id
+
+ def __getstate__(self):
+ return (self.id, self.refcount, self.pred_id,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount, self.pred_id,) = state
+
+ def increment_dependency_refcounts(self, text_record_db):
+ text_record_db[self.pred_id].refcount += 1
+
+ def checkout(self, text_record_db):
+ base_text = text_record_db[self.pred_id].checkout(text_record_db)
+ co = RCSStream(base_text)
+ delta_text = text_record_db.delta_db[self.id]
+ co.apply_diff(delta_text)
+ text = co.get_text()
+ del co
+ self.refcount -= 1
+ if self.refcount == 0:
+ # This text will never be needed again; just delete ourselves
+ # without ever having stored the fulltext to the checkout
+ # database:
+ del text_record_db[self.id]
+ else:
+ # Store a new CheckedOutTextRecord in place of ourselves:
+ text_record_db.checkout_db['%x' % self.id] = text
+ new_text_record = CheckedOutTextRecord(self.id)
+ new_text_record.refcount = self.refcount
+ text_record_db.replace(new_text_record)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.delta_db[self.id]
+ text_record_db[self.pred_id].decrement_refcount(text_record_db)
+
+ def __str__(self):
+ return 'DeltaTextRecord(%x -> %x, %d)' \
+ % (self.pred_id, self.id, self.refcount,)
+
+
+class CheckedOutTextRecord(TextRecord):
+ __slots__ = []
+
+ def __getstate__(self):
+ return (self.id, self.refcount,)
+
+ def __setstate__(self, state):
+ (self.id, self.refcount,) = state
+
+ def checkout(self, text_record_db):
+ text = text_record_db.checkout_db['%x' % self.id]
+ self.decrement_refcount(text_record_db)
+ return text
+
+ def free(self, text_record_db):
+ del text_record_db.checkout_db['%x' % self.id]
+
+ def __str__(self):
+ return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class NullDatabase(object):
+ """A do-nothing database that can be used with TextRecordDatabase.
+
+ Use this when you don't actually want to allow anything to be
+ deleted."""
+
+ def __delitem__(self, id):
+ pass
+
+
+class TextRecordDatabase:
+ """Holds the TextRecord instances that are currently live.
+
+ During CollectRevsPass and FilterSymbolsPass, files are processed
+ one by one and a new TextRecordDatabase instance is used for each
+ file. During OutputPass, a single TextRecordDatabase instance is
+ used for the duration of OutputPass; individual records are added
+ and removed when they are active."""
+
+ def __init__(self, delta_db, checkout_db):
+ # A map { cvs_rev_id -> TextRecord }.
+ self.text_records = {}
+
+ # A database-like object using cvs_rev_ids as keys and containing
+ # fulltext/deltatext strings as values. Its __getitem__() method
+ # is used to retrieve deltas when they are needed, and its
+ # __delitem__() method is used to delete deltas when they can be
+ # freed. The modifiability of the delta database varies from pass
+ # to pass, so the object stored here varies as well:
+ #
+ # CollectRevsPass: a fully-functional IndexedDatabase. This
+ # allows deltas that will not be needed to be deleted.
+ #
+ # FilterSymbolsPass: a NullDatabase. The delta database cannot be
+ # modified during this pass, and we have no need to retrieve
+ # deltas, so we just use a dummy object here.
+ #
+ # OutputPass: a disabled IndexedDatabase. During this pass we
+ # need to retrieve deltas, but we are not allowed to modify the
+ # delta database. So we use an IndexedDatabase whose __del__()
+ # method has been disabled to do nothing.
+ self.delta_db = delta_db
+
+ # A database-like object using cvs_rev_ids as keys and containing
+ # fulltext strings as values. This database is only set during
+ # OutputPass.
+ self.checkout_db = checkout_db
+
+ # If this is set to a list, then the list holds the ids of
+ # text_records that have to be deleted; when discard() is called,
+ # it adds the requested id to the list but does not delete it. If
+ # this member is set to None, then text_records are deleted
+ # immediately when discard() is called.
+ self.deferred_deletes = None
+
+ def __getstate__(self):
+ return (self.text_records.values(),)
+
+ def __setstate__(self, state):
+ (text_records,) = state
+ self.text_records = {}
+ for text_record in text_records:
+ self.add(text_record)
+ self.delta_db = NullDatabase()
+ self.checkout_db = NullDatabase()
+ self.deferred_deletes = None
+
+ def add(self, text_record):
+ """Add TEXT_RECORD to our database.
+
+ There must not already be a record with the same id."""
+
+ assert not self.text_records.has_key(text_record.id)
+
+ self.text_records[text_record.id] = text_record
+
+ def __getitem__(self, id):
+ return self.text_records[id]
+
+ def __delitem__(self, id):
+ """Free the record with the specified ID."""
+
+ del self.text_records[id]
+
+ def replace(self, text_record):
+ """Store TEXT_RECORD in place of the existing record with the same id.
+
+ Do not do anything with the old record."""
+
+ assert self.text_records.has_key(text_record.id)
+ self.text_records[text_record.id] = text_record
+
+ def discard(self, *ids):
+ """The text records with IDS are no longer needed; discard them.
+
+ This involves calling their free() methods and also removing them
+ from SELF.
+
+ If SELF.deferred_deletes is not None, then the ids to be deleted
+ are added to the list instead of deleted immediately. This
+ mechanism is to prevent a stack overflow from the avalanche of
+ deletes that can result from deleting a long chain of revisions."""
+
+ if self.deferred_deletes is None:
+ # This is an outer-level delete.
+ self.deferred_deletes = list(ids)
+ while self.deferred_deletes:
+ id = self.deferred_deletes.pop()
+ text_record = self[id]
+ if text_record.refcount != 0:
+ raise InternalError(
+ 'TextRecordDatabase.discard(%s) called with refcount = %d'
+ % (text_record, text_record.refcount,)
+ )
+ # This call might cause other text_record ids to be added to
+ # self.deferred_deletes:
+ text_record.free(self)
+ del self[id]
+ self.deferred_deletes = None
+ else:
+ self.deferred_deletes.extend(ids)
+
+ def itervalues(self):
+ return self.text_records.itervalues()
+
+ def recompute_refcounts(self, cvs_file_items):
+ """Recompute the refcounts of the contained TextRecords.
+
+ Use CVS_FILE_ITEMS to determine which records will be needed by
+ cvs2svn."""
+
+ # First clear all of the refcounts:
+ for text_record in self.itervalues():
+ text_record.refcount = 0
+
+ # Now increment the reference count of records that are needed as
+ # the source of another record's deltas:
+ for text_record in self.itervalues():
+ text_record.increment_dependency_refcounts(self.text_records)
+
+ # Now increment the reference count of records that will be needed
+ # by cvs2svn:
+ for lod_items in cvs_file_items.iter_lods():
+ for cvs_rev in lod_items.cvs_revisions:
+ if isinstance(cvs_rev, CVSRevisionModification):
+ self[cvs_rev.id].refcount += 1
+
+ def free_unused(self):
+ """Free any TextRecords whose reference counts are zero."""
+
+ # The deletion of some of these text records might cause others to
+ # be unused, in which case they will be deleted automatically.
+ # But since the initially-unused records are not referred to by
+ # any others, we don't have to be afraid that they will be deleted
+ # before we get to them. But it *is* crucial that we create the
+ # whole unused list before starting the loop.
+
+ unused = [
+ text_record.id
+ for text_record in self.itervalues()
+ if text_record.refcount == 0
+ ]
+
+ self.discard(*unused)
+
+ def log_leftovers(self):
+ """If any TextRecords still exist, log them."""
+
+ if self.text_records:
+ Log().warn(
+ "%s: internal problem: leftover revisions in the checkout cache:"
+ % warning_prefix)
+ for text_record in self.itervalues():
+ Log().warn(' %s' % (text_record,))
+
+ def __repr__(self):
+ """Debugging output of the current contents of the TextRecordDatabase."""
+
+ retval = ['TextRecordDatabase:']
+ for text_record in self.itervalues():
+ retval.append(' %s' % (text_record,))
+ return '\n'.join(retval)
+
+
+class InternalRevisionRecorder(RevisionRecorder):
+ """A RevisionRecorder that reconstructs the fulltext internally."""
+
+ def __init__(self, compress):
+ RevisionRecorder.__init__(self)
+ self._compress = compress
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file(
+ config.RCS_DELTAS_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
+
+ def start(self):
+ ser = MarshalSerializer()
+ if self._compress:
+ ser = CompressingSerializer(ser)
+ self._rcs_deltas = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+ artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+ DB_OPEN_NEW, ser)
+ primer = (FullTextRecord, DeltaTextRecord)
+ self._rcs_trees = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+ DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+ def start_file(self, cvs_file_items):
+ self._cvs_file_items = cvs_file_items
+
+ # A map from cvs_rev_id to TextRecord instance:
+ self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
+
+ def record_text(self, cvs_rev, log, text):
+ if isinstance(cvs_rev.lod, Trunk):
+ # On trunk, revisions are encountered in reverse order (1.<N>
+ # ... 1.1) and deltas are inverted. The first text that we see
+ # is the fulltext for the HEAD revision. After that, the text
+ # corresponding to revision 1.N is the delta (1.<N+1> ->
+ # 1.<N>)). We have to invert the deltas here so that we can
+ # read the revisions out in dependency order; that is, for
+ # revision 1.1 we want the fulltext, and for revision 1.<N> we
+ # want the delta (1.<N-1> -> 1.<N>). This means that we can't
+ # compute the delta for a revision until we see its logical
+ # parent. When we finally see revision 1.1 (which is recognized
+ # because it doesn't have a parent), we can record the diff (1.1
+ # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
+
+ if cvs_rev.next_id is None:
+ # This is HEAD, as fulltext. Initialize the RCSStream so
+ # that we can compute deltas backwards in time.
+ self._stream = RCSStream(text)
+ else:
+ # Any other trunk revision is a backward delta. Apply the
+ # delta to the RCSStream to mutate it to the contents of this
+ # revision, and also to get the reverse delta, which we store
+ # as the forward delta of our child revision.
+ try:
+ text = self._stream.invert_diff(text)
+ except MalformedDeltaException, (msg):
+ Log().error('Malformed RCS delta in %s, revision %s: %s'
+ % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
+ msg))
+ raise RuntimeError
+ text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
+ self._writeout(text_record, text)
+
+ if cvs_rev.prev_id is None:
+ # This is revision 1.1. Write its fulltext:
+ text_record = FullTextRecord(cvs_rev.id)
+ self._writeout(text_record, self._stream.get_text())
+
+ # There will be no more trunk revisions delivered, so free the
+ # RCSStream.
+ del self._stream
+
+ else:
+ # On branches, revisions are encountered in logical order
+ # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
+ # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
+ # <BRANCH>.<N>). That's what we need, so just store it.
+
+ # FIXME: It would be nice to avoid writing out branch deltas
+ # when --trunk-only. (They will be deleted when finish_file()
+ # is called, but if the delta db is in an IndexedDatabase the
+ # deletions won't actually recover any disk space.)
+ text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
+ self._writeout(text_record, text)
+
+ return None
+
+ def _writeout(self, text_record, text):
+ self.text_record_db.add(text_record)
+ self._rcs_deltas[text_record.id] = text
+
+ def finish_file(self, cvs_file_items):
+ """Finish processing of the current file.
+
+ Compute the initial text record refcounts, discard any records
+ that are unneeded, and store the text records for the file to the
+ _rcs_trees database."""
+
+ # Delete our copy of the preliminary CVSFileItems:
+ del self._cvs_file_items
+
+ self.text_record_db.recompute_refcounts(cvs_file_items)
+ self.text_record_db.free_unused()
+ self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
+ del self.text_record_db
+
+ def finish(self):
+ self._rcs_deltas.close()
+ self._rcs_trees.close()
+
+
+class InternalRevisionExcluder(RevisionExcluder):
+ """The RevisionExcluder used by InternalRevisionReader."""
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_FILTERED_STORE, which_pass
+ )
+ artifact_manager.register_temp_file(
+ config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+ )
+
+ def start(self):
+ self._tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+ DB_OPEN_READ)
+ primer = (FullTextRecord, DeltaTextRecord)
+ self._new_tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+ DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+ def process_file(self, cvs_file_items):
+ text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
+ text_record_db.recompute_refcounts(cvs_file_items)
+ text_record_db.free_unused()
+ self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
+
+ def finish(self):
+ self._tree_db.close()
+ self._new_tree_db.close()
+
+
+class _KeywordExpander:
+ """A class whose instances provide substitutions for CVS keywords.
+
+ This class is used via its __call__() method, which should be called
+ with a match object representing a match for a CVS keyword string.
+ The method returns the replacement for the matched text.
+
+ The __call__() method works by calling the method with the same name
+ as that of the CVS keyword (converted to lower case).
+
+ Instances of this class can be passed as the REPL argument to
+ re.sub()."""
+
+ date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
+ date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
+
+ date_fmt = date_fmt_new
+
+ @classmethod
+ def use_old_date_format(klass):
+ """Class method to ensure exact compatibility with CVS 1.11
+ output. Use this if you want to verify your conversion and you're
+ using CVS 1.11."""
+ klass.date_fmt = klass.date_fmt_old
+
+ def __init__(self, cvs_rev):
+ self.cvs_rev = cvs_rev
+
+ def __call__(self, match):
+ return '$%s: %s $' % \
+ (match.group(1), getattr(self, match.group(1).lower())(),)
+
+ def author(self):
+ return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
+
+ def date(self):
+ return time.strftime(self.date_fmt,
+ time.gmtime(self.cvs_rev.timestamp))
+
+ def header(self):
+ return '%s %s %s %s Exp' % \
+ (self.source(), self.cvs_rev.rev, self.date(), self.author())
+
+ def id(self):
+ return '%s %s %s %s Exp' % \
+ (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
+
+ def locker(self):
+ # Handle kvl like kv, as a converted repo is supposed to have no
+ # locks.
+ return ''
+
+ def log(self):
+ # Would need some special handling.
+ return 'not supported by cvs2svn'
+
+ def name(self):
+ # Cannot work, as just creating a new symbol does not check out
+ # the revision again.
+ return 'not supported by cvs2svn'
+
+ def rcsfile(self):
+ return self.cvs_rev.cvs_file.basename + ",v"
+
+ def revision(self):
+ return self.cvs_rev.rev
+
+ def source(self):
+ project = self.cvs_rev.cvs_file.project
+ return project.cvs_repository_root + '/' + project.cvs_module + \
+ self.cvs_rev.cvs_file.cvs_path + ",v"
+
+ def state(self):
+ # We check out only live revisions.
+ return 'Exp'
+
+
+class InternalRevisionReader(RevisionReader):
+ """A RevisionReader that reads the contents from an own delta store."""
+
+ _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
+ _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
+ _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
+
+ def __init__(self, compress):
+ self._compress = compress
+
+ def register_artifacts(self, which_pass):
+ artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
+ artifact_manager.register_temp_file_needed(
+ config.RCS_DELTAS_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_DELTAS_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_FILTERED_STORE, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+ )
+
+ def start(self):
+ self._delta_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+ artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+ DB_OPEN_READ)
+ self._delta_db.__delitem__ = lambda id: None
+ self._tree_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+ artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+ DB_OPEN_READ)
+ ser = MarshalSerializer()
+ if self._compress:
+ ser = CompressingSerializer(ser)
+ self._co_db = Database(
+ artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
+ ser)
+
+ # The set of CVSFile instances whose TextRecords have already been
+ # read:
+ self._loaded_files = set()
+
+ # A map { CVSFILE : _FileTree } for files that currently have live
+ # revisions:
+ self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
+
+ def _get_text_record(self, cvs_rev):
+ """Return the TextRecord instance for CVS_REV.
+
+ If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
+ do so now."""
+
+ if cvs_rev.cvs_file not in self._loaded_files:
+ for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
+ self._text_record_db.add(text_record)
+ self._loaded_files.add(cvs_rev.cvs_file)
+
+ return self._text_record_db[cvs_rev.id]
+
+ def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+ """Check out the text for revision C_REV from the repository.
+
+ Return the text wrapped in a readable file object. If
+ SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
+ _un_expanded prior to returning the file content. Note that $Log$
+ never actually generates a log (which makes test 'requires_cvs()'
+ fail).
+
+ Revisions may be requested in any order, but if they are not
+ requested in dependency order the checkout database will become
+ very large. Revisions may be skipped. Each revision may be
+ requested only once."""
+
+ try:
+ text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
+ except MalformedDeltaException, (msg):
+ raise FatalError('Malformed RCS delta in %s, revision %s: %s'
+ % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
+ if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
+ if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
+ text = self._kw_re.sub(r'$\1$', text)
+ else:
+ text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
+
+ return cStringIO.StringIO(text)
+
+ def finish(self):
+ self._text_record_db.log_leftovers()
+
+ del self._text_record_db
+ self._delta_db.close()
+ self._tree_db.close()
+ self._co_db.close()
+
diff --git a/cvs2svn_lib/collect_data.py b/cvs2svn_lib/collect_data.py
new file mode 100644
index 0000000..160d7b9
--- /dev/null
+++ b/cvs2svn_lib/collect_data.py
@@ -0,0 +1,1431 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Data collection classes.
+
+This module contains the code used to collect data from the CVS
+repository. It parses *,v files, recording all useful information
+except for the actual file contents (though even the file contents
+might be recorded by the RevisionRecorder if one is configured).
+
+As a *,v file is parsed, the information pertaining to the file is
+accumulated in memory, mostly in _RevisionData, _BranchData, and
+_TagData objects. When parsing is complete, a final pass is made over
+the data to create some final dependency links, collect statistics,
+etc., then the _*Data objects are converted into CVSItem objects
+(CVSRevision, CVSBranch, and CVSTag respectively) and the CVSItems are
+dumped into databases.
+
+During the data collection, persistent unique ids are allocated to
+many types of objects: CVSFile, Symbol, and CVSItems. CVSItems are a
+special case. CVSItem ids are unique across all CVSItem types, and
+the ids are carried over from the corresponding data collection
+objects:
+
+ _RevisionData -> CVSRevision
+
+ _BranchData -> CVSBranch
+
+ _TagData -> CVSTag
+
+In a later pass it is possible to convert tags <-> branches. But even
+if this occurs, the new branch or tag uses the same id as the old tag
+or branch.
+
+"""
+
+
+import os
+import stat
+import re
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import verify_svn_filename_legal
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.project import FileInAndOutOfAtticException
+from cvs2svn_lib.cvs_file import CVSPath
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.symbol import Symbol
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import cvs_revision_type_map
+from cvs2svn_lib.cvs_file_items import VendorBranchError
+from cvs2svn_lib.cvs_file_items import CVSFileItems
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.cvs_item_database import NewCVSItemStore
+from cvs2svn_lib.symbol_statistics import SymbolStatisticsCollector
+from cvs2svn_lib.metadata_database import MetadataDatabase
+from cvs2svn_lib.metadata_database import MetadataLogger
+
+import cvs2svn_rcsparse
+
+
+# A regular expression defining "valid" revision numbers (used to
+# check that symbol definitions are reasonable).
+_valid_revision_re = re.compile(r'''
+ ^
+ (?:\d+\.)+ # Digit groups with trailing dots
+ \d+ # And the last digit group.
+ $
+ ''', re.VERBOSE)
+
+_branch_revision_re = re.compile(r'''
+ ^
+ ((?:\d+\.\d+\.)+) # A nonzero even number of digit groups w/trailing dot
+ (?:0\.)? # CVS sticks an extra 0 here; RCS does not
+ (\d+) # And the last digit group
+ $
+ ''', re.VERBOSE)
+
+
+def rev_tuple(rev):
+ """Return a tuple of integers corresponding to revision number REV.
+
+ For example, if REV is '1.2.3.4', then return (1,2,3,4)."""
+
+ return tuple([int(x) for x in rev.split('.')])
+
+
+def is_trunk_revision(rev):
+ """Return True iff REV is a trunk revision.
+
+ REV is a revision number corresponding to a specific revision (i.e.,
+ not a whole branch)."""
+
+ return rev.count('.') == 1
+
+
+def is_branch_revision_number(rev):
+ """Return True iff REV is a branch revision number.
+
+ REV is a CVS revision number in canonical form (i.e., with zeros
+ removed). Return True iff it refers to a whole branch, as opposed
+ to a single revision."""
+
+ return rev.count('.') % 2 == 0
+
+
+def is_same_line_of_development(rev1, rev2):
+ """Return True if rev1 and rev2 are on the same line of
+ development (i.e., both on trunk, or both on the same branch);
+ return False otherwise. Either rev1 or rev2 can be None, in
+ which case automatically return False."""
+
+ if rev1 is None or rev2 is None:
+ return False
+ if rev1.count('.') == 1 and rev2.count('.') == 1:
+ return True
+ if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
+ return True
+ return False
+
+
+class _RevisionData:
+ """We track the state of each revision so that in set_revision_info,
+ we can determine if our op is an add/change/delete. We can do this
+ because in set_revision_info, we'll have all of the _RevisionData
+ for a file at our fingertips, and we need to examine the state of
+ our prev_rev to determine if we're an add or a change. Without the
+ state of the prev_rev, we are unable to distinguish between an add
+ and a change."""
+
+ def __init__(self, cvs_rev_id, rev, timestamp, author, state):
+ # The id of this revision:
+ self.cvs_rev_id = cvs_rev_id
+ self.rev = rev
+ self.timestamp = timestamp
+ self.author = author
+ self.original_timestamp = timestamp
+ self.state = state
+
+ # If this is the first revision on a branch, then this is the
+ # branch_data of that branch; otherwise it is None.
+ self.parent_branch_data = None
+
+ # The revision number of the parent of this revision along the
+ # same line of development, if any. For the first revision R on a
+ # branch, we consider the revision from which R sprouted to be the
+ # 'parent'. If this is the root revision in the file's revision
+ # tree, then this field is None.
+ #
+ # Note that this revision can't be determined arithmetically (due
+ # to cvsadmin -o), which is why this field is necessary.
+ self.parent = None
+
+ # The revision number of the primary child of this revision (the
+ # child along the same line of development), if any; otherwise,
+ # None.
+ self.child = None
+
+ # The _BranchData instances of branches that sprout from this
+ # revision, sorted in ascending order by branch number. It would
+ # be inconvenient to initialize it here because we would have to
+ # scan through all branches known by the _SymbolDataCollector to
+ # find the ones having us as the parent. Instead, this
+ # information is filled in by
+ # _FileDataCollector._resolve_dependencies() and sorted by
+ # _FileDataCollector._sort_branches().
+ self.branches_data = []
+
+ # The revision numbers of the first commits on any branches on
+ # which commits occurred. This dependency is kept explicitly
+ # because otherwise a revision-only topological sort would miss
+ # the dependency that exists via branches_data.
+ self.branches_revs_data = []
+
+ # The _TagData instances of tags that are connected to this
+ # revision.
+ self.tags_data = []
+
+ # A token that may be returned from
+ # RevisionRecorder.record_text(). It can be used by
+ # RevisionReader to obtain the text again.
+ self.revision_recorder_token = None
+
+ def get_first_on_branch_id(self):
+ return self.parent_branch_data and self.parent_branch_data.id
+
+
+class _SymbolData:
+ """Collection area for information about a symbol in a single CVSFile.
+
+ SYMBOL is an instance of Symbol, undifferentiated as a Branch or a
+ Tag regardless of whether self is a _BranchData or a _TagData."""
+
+ def __init__(self, id, symbol):
+ """Initialize an object for SYMBOL."""
+
+ # The unique id that will be used for this particular symbol in
+ # this particular file. This same id will be used for the CVSItem
+ # that is derived from this instance.
+ self.id = id
+
+ # An instance of Symbol.
+ self.symbol = symbol
+
+
+class _BranchData(_SymbolData):
+ """Collection area for information about a Branch in a single CVSFile."""
+
+ def __init__(self, id, symbol, branch_number):
+ _SymbolData.__init__(self, id, symbol)
+
+ # The branch number (e.g., '1.5.2') of this branch.
+ self.branch_number = branch_number
+
+ # The revision number of the revision from which this branch
+ # sprouts (e.g., '1.5').
+ self.parent = self.branch_number[:self.branch_number.rindex(".")]
+
+ # The revision number of the first commit on this branch, if any
+ # (e.g., '1.5.2.1'); otherwise, None.
+ self.child = None
+
+
+class _TagData(_SymbolData):
+ """Collection area for information about a Tag in a single CVSFile."""
+
+ def __init__(self, id, symbol, rev):
+ _SymbolData.__init__(self, id, symbol)
+
+ # The revision number being tagged (e.g., '1.5.2.3').
+ self.rev = rev
+
+
+class _SymbolDataCollector(object):
+ """Collect information about symbols in a single CVSFile."""
+
+ def __init__(self, fdc, cvs_file):
+ self.fdc = fdc
+ self.cvs_file = cvs_file
+
+ self.pdc = self.fdc.pdc
+ self.collect_data = self.fdc.collect_data
+
+ # A list [(name, revision), ...] of symbols defined in the header
+ # of the file. The name has already been transformed using the
+ # symbol transform rules. If the symbol transform rules indicate
+ # that the symbol should be ignored, then it is never added to
+ # this list. This list is processed then deleted in
+ # process_symbols().
+ self._symbol_defs = []
+
+ # A set containing the transformed names of symbols in this file
+ # (used to detect duplicats during processing of unlabeled
+ # branches):
+ self._defined_symbols = set()
+
+ # Map { branch_number : _BranchData }, where branch_number has an
+ # odd number of digits.
+ self.branches_data = { }
+
+ # Map { revision : [ tag_data ] }, where revision has an even
+ # number of digits, and the value is a list of _TagData objects
+ # for tags that apply to that revision.
+ self.tags_data = { }
+
+ def _add_branch(self, name, branch_number):
+ """Record that BRANCH_NUMBER is the branch number for branch NAME,
+ and derive and record the revision from which NAME sprouts.
+ BRANCH_NUMBER is an RCS branch number with an odd number of
+ components, for example '1.7.2' (never '1.7.0.2'). Return the
+ _BranchData instance (which is usually newly-created)."""
+
+ branch_data = self.branches_data.get(branch_number)
+
+ if branch_data is not None:
+ Log().warn(
+ "%s: in '%s':\n"
+ " branch '%s' already has name '%s',\n"
+ " cannot also have name '%s', ignoring the latter\n"
+ % (warning_prefix,
+ self.cvs_file.filename, branch_number,
+ branch_data.symbol.name, name)
+ )
+ return branch_data
+
+ symbol = self.pdc.get_symbol(name)
+ branch_data = _BranchData(
+ self.collect_data.item_key_generator.gen_id(), symbol, branch_number
+ )
+ self.branches_data[branch_number] = branch_data
+ return branch_data
+
+ def _construct_distinct_name(self, name, original_name):
+ """Construct a distinct symbol name from NAME.
+
+ If NAME is distinct, return it. If it is already used in this
+ file (as determined from its presence in self._defined_symbols),
+ construct and return a new name that is not already used."""
+
+ if name not in self._defined_symbols:
+ return name
+ else:
+ index = 1
+ while True:
+ dup_name = '%s-DUPLICATE-%d' % (name, index,)
+ if dup_name not in self._defined_symbols:
+ self.collect_data.record_fatal_error(
+ "Symbol name '%s' is already used in '%s'.\n"
+ "The unlabeled branch '%s' must be renamed using "
+ "--symbol-transform."
+ % (name, self.cvs_file.filename, original_name,)
+ )
+ return dup_name
+
+ def _add_unlabeled_branch(self, branch_number):
+ original_name = "unlabeled-" + branch_number
+ name = self.transform_symbol(original_name, branch_number)
+ if name is None:
+ self.collect_data.record_fatal_error(
+ "The unlabeled branch '%s' in '%s' contains commits.\n"
+ "It may not be ignored via a symbol transform. (Use --exclude "
+ "instead.)"
+ % (original_name, self.cvs_file.filename,)
+ )
+ # Retain the original name to allow the conversion to continue:
+ name = original_name
+
+ distinct_name = self._construct_distinct_name(name, original_name)
+ self._defined_symbols.add(distinct_name)
+ return self._add_branch(distinct_name, branch_number)
+
+ def _add_tag(self, name, revision):
+ """Record that tag NAME refers to the specified REVISION."""
+
+ symbol = self.pdc.get_symbol(name)
+ tag_data = _TagData(
+ self.collect_data.item_key_generator.gen_id(), symbol, revision
+ )
+ self.tags_data.setdefault(revision, []).append(tag_data)
+ return tag_data
+
+ def transform_symbol(self, name, revision):
+ """Transform a symbol according to the project's symbol transforms.
+
+ Transform the symbol with the original name NAME and canonicalized
+ revision number REVISION. Return the new symbol name or None if
+ the symbol should be ignored entirely.
+
+ Log the results of the symbol transform if necessary."""
+
+ old_name = name
+ # Apply any user-defined symbol transforms to the symbol name:
+ name = self.cvs_file.project.transform_symbol(
+ self.cvs_file, name, revision
+ )
+
+ if name is None:
+ # Ignore symbol:
+ self.pdc.log_symbol_transform(old_name, None)
+ Log().verbose(
+ " symbol '%s'=%s ignored in %s"
+ % (old_name, revision, self.cvs_file.filename,)
+ )
+ else:
+ if name != old_name:
+ self.pdc.log_symbol_transform(old_name, name)
+ Log().verbose(
+ " symbol '%s'=%s transformed to '%s' in %s"
+ % (old_name, revision, name, self.cvs_file.filename,)
+ )
+
+ return name
+
+ def define_symbol(self, name, revision):
+ """Record a symbol definition for later processing."""
+
+ # Canonicalize the revision number:
+ revision = _branch_revision_re.sub(r'\1\2', revision)
+
+ # Apply any user-defined symbol transforms to the symbol name:
+ name = self.transform_symbol(name, revision)
+
+ if name is not None:
+ # Verify that the revision number is valid:
+ if _valid_revision_re.match(revision):
+ # The revision number is valid; record it for later processing:
+ self._symbol_defs.append( (name, revision) )
+ else:
+ Log().warn(
+ 'In %r:\n'
+ ' branch %r references invalid revision %s\n'
+ ' and will be ignored.'
+ % (self.cvs_file.filename, name, revision,)
+ )
+
+ def _eliminate_trivial_duplicate_defs(self, symbol_defs):
+ """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions.
+
+ Duplicate definitions of symbol names have been seen in the wild,
+ and they can also happen when --symbol-transform is used. If a
+ symbol is defined to the same revision number repeatedly, then
+ ignore all but the last definition."""
+
+ # Make a copy, since we have to iterate through the definitions
+ # twice:
+ symbol_defs = list(symbol_defs)
+
+ # A map { (name, revision) : [index,...] } of the indexes where
+ # symbol definitions name=revision were found:
+ known_definitions = {}
+ for (i, symbol_def) in enumerate(symbol_defs):
+ known_definitions.setdefault(symbol_def, []).append(i)
+
+ # A set of the indexes of entries that have to be removed from
+ # symbol_defs:
+ dup_indexes = set()
+ for ((name, revision), indexes) in known_definitions.iteritems():
+ if len(indexes) > 1:
+ Log().verbose(
+ "in %r:\n"
+ " symbol %s:%s defined multiple times; ignoring duplicates\n"
+ % (self.cvs_file.filename, name, revision,)
+ )
+ dup_indexes.update(indexes[:-1])
+
+ for (i, symbol_def) in enumerate(symbol_defs):
+ if i not in dup_indexes:
+ yield symbol_def
+
+ def _process_duplicate_defs(self, symbol_defs):
+ """Iterate through SYMBOL_DEFS, processing duplicate names.
+
+ Duplicate definitions of symbol names have been seen in the wild,
+ and they can also happen when --symbol-transform is used. If a
+ symbol is defined multiple times, then it is a fatal error. This
+ method should be called after _eliminate_trivial_duplicate_defs()."""
+
+ # Make a copy, since we have to access multiple times:
+ symbol_defs = list(symbol_defs)
+
+ # A map {name : [index,...]} mapping the names of symbols to a
+ # list of their definitions' indexes in symbol_defs:
+ known_symbols = {}
+ for (i, (name, revision)) in enumerate(symbol_defs):
+ known_symbols.setdefault(name, []).append(i)
+
+ known_symbols = known_symbols.items()
+ known_symbols.sort()
+ dup_indexes = set()
+ for (name, indexes) in known_symbols:
+ if len(indexes) > 1:
+ # This symbol was defined multiple times.
+ self.collect_data.record_fatal_error(
+ "Multiple definitions of the symbol '%s' in '%s': %s" % (
+ name, self.cvs_file.filename,
+ ' '.join([symbol_defs[i][1] for i in indexes]),
+ )
+ )
+ # Ignore all but the last definition for now, to allow the
+ # conversion to proceed:
+ dup_indexes.update(indexes[:-1])
+
+ for (i, symbol_def) in enumerate(symbol_defs):
+ if i not in dup_indexes:
+ yield symbol_def
+
+ def _process_symbol(self, name, revision):
+ """Process a symbol called NAME, which is associated with REVISON.
+
+ REVISION is a canonical revision number with zeros removed, for
+ example: '1.7', '1.7.2', or '1.1.1' or '1.1.1.1'. NAME is a
+ transformed branch or tag name."""
+
+ # Add symbol to our records:
+ if is_branch_revision_number(revision):
+ self._add_branch(name, revision)
+ else:
+ self._add_tag(name, revision)
+
+ def process_symbols(self):
+ """Process the symbol definitions from SELF._symbol_defs."""
+
+ symbol_defs = self._symbol_defs
+ del self._symbol_defs
+
+ symbol_defs = self._eliminate_trivial_duplicate_defs(symbol_defs)
+ symbol_defs = self._process_duplicate_defs(symbol_defs)
+
+ for (name, revision) in symbol_defs:
+ self._defined_symbols.add(name)
+ self._process_symbol(name, revision)
+
+ @staticmethod
+ def rev_to_branch_number(revision):
+ """Return the branch_number of the branch on which REVISION lies.
+
+ REVISION is a branch revision number with an even number of
+ components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
+ The return value is the branch number (for example, '1.7.2').
+ Return none iff REVISION is a trunk revision such as '1.2'."""
+
+ if is_trunk_revision(revision):
+ return None
+ return revision[:revision.rindex(".")]
+
+ def rev_to_branch_data(self, revision):
+ """Return the branch_data of the branch on which REVISION lies.
+
+ REVISION must be a branch revision number with an even number of
+ components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
+ Raise KeyError iff REVISION is unknown."""
+
+ assert not is_trunk_revision(revision)
+
+ return self.branches_data[self.rev_to_branch_number(revision)]
+
+ def rev_to_lod(self, revision):
+ """Return the line of development on which REVISION lies.
+
+ REVISION must be a revision number with an even number of
+ components. Raise KeyError iff REVISION is unknown."""
+
+ if is_trunk_revision(revision):
+ return self.pdc.trunk
+ else:
+ return self.rev_to_branch_data(revision).symbol
+
+
+class _FileDataCollector(cvs2svn_rcsparse.Sink):
+ """Class responsible for collecting RCS data for a particular file.
+
+ Any collected data that need to be remembered are stored into the
+ referenced CollectData instance."""
+
+ def __init__(self, pdc, cvs_file):
+ """Create an object that is prepared to receive data for CVS_FILE.
+ CVS_FILE is a CVSFile instance. COLLECT_DATA is used to store the
+ information collected about the file."""
+
+ self.pdc = pdc
+ self.cvs_file = cvs_file
+
+ self.collect_data = self.pdc.collect_data
+ self.project = self.cvs_file.project
+
+ # A place to store information about the symbols in this file:
+ self.sdc = _SymbolDataCollector(self, self.cvs_file)
+
+ # { revision : _RevisionData instance }
+ self._rev_data = { }
+
+ # Lists [ (parent, child) ] of revision number pairs indicating
+ # that revision child depends on revision parent along the main
+ # line of development.
+ self._primary_dependencies = []
+
+ # If set, this is an RCS branch number -- rcsparse calls this the
+ # "principal branch", but CVS and RCS refer to it as the "default
+ # branch", so that's what we call it, even though the rcsparse API
+ # setter method is still 'set_principal_branch'.
+ self.default_branch = None
+
+ # True iff revision 1.1 of the file appears to have been imported
+ # (as opposed to added normally).
+ self._file_imported = False
+
+ def _get_rev_id(self, revision):
+ if revision is None:
+ return None
+ return self._rev_data[revision].cvs_rev_id
+
+ def set_principal_branch(self, branch):
+ """This is a callback method declared in Sink."""
+
+ if branch.find('.') == -1:
+ # This just sets the default branch to trunk. Normally this
+ # shouldn't occur, but it has been seen in at least one CVS
+ # repository. Just ignore it.
+ pass
+ else:
+ self.default_branch = branch
+
+ def set_expansion(self, mode):
+ """This is a callback method declared in Sink."""
+
+ self.cvs_file.mode = mode
+
+ def define_tag(self, name, revision):
+ """Remember the symbol name and revision, but don't process them yet.
+
+ This is a callback method declared in Sink."""
+
+ self.sdc.define_symbol(name, revision)
+
+ def admin_completed(self):
+ """This is a callback method declared in Sink."""
+
+ self.sdc.process_symbols()
+
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ """This is a callback method declared in Sink."""
+
+ for branch in branches:
+ try:
+ branch_data = self.sdc.rev_to_branch_data(branch)
+ except KeyError:
+ # Normally we learn about the branches from the branch names
+ # and numbers parsed from the symbolic name header. But this
+ # must have been an unlabeled branch that slipped through the
+ # net. Generate a name for it and create a _BranchData record
+ # for it now.
+ branch_data = self.sdc._add_unlabeled_branch(
+ self.sdc.rev_to_branch_number(branch))
+
+ assert branch_data.child is None
+ branch_data.child = branch
+
+ if revision in self._rev_data:
+ # This revision has already been seen.
+ Log().error('File %r contains duplicate definitions of revision %s.'
+ % (self.cvs_file.filename, revision,))
+ raise RuntimeError
+
+ # Record basic information about the revision:
+ rev_data = _RevisionData(
+ self.collect_data.item_key_generator.gen_id(),
+ revision, int(timestamp), author, state)
+ self._rev_data[revision] = rev_data
+
+ # When on trunk, the RCS 'next' revision number points to what
+ # humans might consider to be the 'previous' revision number. For
+ # example, 1.3's RCS 'next' is 1.2.
+ #
+ # However, on a branch, the RCS 'next' revision number really does
+ # point to what humans would consider to be the 'next' revision
+ # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
+ #
+ # In other words, in RCS, 'next' always means "where to find the next
+ # deltatext that you need this revision to retrieve.
+ #
+ # That said, we don't *want* RCS's behavior here, so we determine
+ # whether we're on trunk or a branch and set the dependencies
+ # accordingly.
+ if next:
+ if is_trunk_revision(revision):
+ self._primary_dependencies.append( (next, revision,) )
+ else:
+ self._primary_dependencies.append( (revision, next,) )
+
+ def _resolve_primary_dependencies(self):
+ """Resolve the dependencies listed in self._primary_dependencies."""
+
+ for (parent, child,) in self._primary_dependencies:
+ parent_data = self._rev_data[parent]
+ assert parent_data.child is None
+ parent_data.child = child
+
+ child_data = self._rev_data[child]
+ assert child_data.parent is None
+ child_data.parent = parent
+
+ def _resolve_branch_dependencies(self):
+ """Resolve dependencies involving branches."""
+
+ for branch_data in self.sdc.branches_data.values():
+ # The branch_data's parent has the branch as a child regardless
+ # of whether the branch had any subsequent commits:
+ try:
+ parent_data = self._rev_data[branch_data.parent]
+ except KeyError:
+ Log().warn(
+ 'In %r:\n'
+ ' branch %r references non-existing revision %s\n'
+ ' and will be ignored.'
+ % (self.cvs_file.filename, branch_data.symbol.name,
+ branch_data.parent,))
+ del self.sdc.branches_data[branch_data.branch_number]
+ else:
+ parent_data.branches_data.append(branch_data)
+
+ # If the branch has a child (i.e., something was committed on
+ # the branch), then we store a reference to the branch_data
+ # there, define the child's parent to be the branch's parent,
+ # and list the child in the branch parent's branches_revs_data:
+ if branch_data.child is not None:
+ child_data = self._rev_data[branch_data.child]
+ assert child_data.parent_branch_data is None
+ child_data.parent_branch_data = branch_data
+ assert child_data.parent is None
+ child_data.parent = branch_data.parent
+ parent_data.branches_revs_data.append(branch_data.child)
+
+ def _sort_branches(self):
+ """Sort the branches sprouting from each revision in creation order.
+
+ Creation order is taken to be the reverse of the order that they
+ are listed in the symbols part of the RCS file. (If a branch is
+ created then deleted, a later branch can be assigned the recycled
+ branch number; therefore branch numbers are not an indication of
+ creation order.)"""
+
+ for rev_data in self._rev_data.values():
+ rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id))
+
+ def _resolve_tag_dependencies(self):
+ """Resolve dependencies involving tags."""
+
+ for (rev, tag_data_list) in self.sdc.tags_data.items():
+ try:
+ parent_data = self._rev_data[rev]
+ except KeyError:
+ Log().warn(
+ 'In %r:\n'
+ ' the following tag(s) reference non-existing revision %s\n'
+ ' and will be ignored:\n'
+ ' %s' % (
+ self.cvs_file.filename, rev,
+ ', '.join([repr(tag_data.symbol.name)
+ for tag_data in tag_data_list]),))
+ del self.sdc.tags_data[rev]
+ else:
+ for tag_data in tag_data_list:
+ assert tag_data.rev == rev
+ # The tag_data's rev has the tag as a child:
+ parent_data.tags_data.append(tag_data)
+
+ def _determine_operation(self, rev_data):
+ prev_rev_data = self._rev_data.get(rev_data.parent)
+ return cvs_revision_type_map[(
+ rev_data.state != 'dead',
+ prev_rev_data is not None and prev_rev_data.state != 'dead',
+ )]
+
+ def _get_cvs_revision(self, rev_data):
+ """Create and return a CVSRevision for REV_DATA."""
+
+ branch_ids = [
+ branch_data.id
+ for branch_data in rev_data.branches_data
+ ]
+
+ branch_commit_ids = [
+ self._get_rev_id(rev)
+ for rev in rev_data.branches_revs_data
+ ]
+
+ tag_ids = [
+ tag_data.id
+ for tag_data in rev_data.tags_data
+ ]
+
+ revision_type = self._determine_operation(rev_data)
+
+ return revision_type(
+ self._get_rev_id(rev_data.rev), self.cvs_file,
+ rev_data.timestamp, None,
+ self._get_rev_id(rev_data.parent),
+ self._get_rev_id(rev_data.child),
+ rev_data.rev,
+ True,
+ self.sdc.rev_to_lod(rev_data.rev),
+ rev_data.get_first_on_branch_id(),
+ False, None, None,
+ tag_ids, branch_ids, branch_commit_ids,
+ rev_data.revision_recorder_token)
+
+ def _get_cvs_revisions(self):
+ """Generate the CVSRevisions present in this file."""
+
+ for rev_data in self._rev_data.itervalues():
+ yield self._get_cvs_revision(rev_data)
+
+ def _get_cvs_branches(self):
+ """Generate the CVSBranches present in this file."""
+
+ for branch_data in self.sdc.branches_data.values():
+ yield CVSBranch(
+ branch_data.id, self.cvs_file, branch_data.symbol,
+ branch_data.branch_number,
+ self.sdc.rev_to_lod(branch_data.parent),
+ self._get_rev_id(branch_data.parent),
+ self._get_rev_id(branch_data.child),
+ None,
+ )
+
+ def _get_cvs_tags(self):
+ """Generate the CVSTags present in this file."""
+
+ for tags_data in self.sdc.tags_data.values():
+ for tag_data in tags_data:
+ yield CVSTag(
+ tag_data.id, self.cvs_file, tag_data.symbol,
+ self.sdc.rev_to_lod(tag_data.rev),
+ self._get_rev_id(tag_data.rev),
+ None,
+ )
+
+ def tree_completed(self):
+ """The revision tree has been parsed.
+
+ Analyze it for consistency and connect some loose ends.
+
+ This is a callback method declared in Sink."""
+
+ self._resolve_primary_dependencies()
+ self._resolve_branch_dependencies()
+ self._sort_branches()
+ self._resolve_tag_dependencies()
+
+ # Compute the preliminary CVSFileItems for this file:
+ cvs_items = []
+ cvs_items.extend(self._get_cvs_revisions())
+ cvs_items.extend(self._get_cvs_branches())
+ cvs_items.extend(self._get_cvs_tags())
+ self._cvs_file_items = CVSFileItems(
+ self.cvs_file, self.pdc.trunk, cvs_items
+ )
+
+ self._cvs_file_items.check_link_consistency()
+
+ # Tell the revision recorder about the file dependency tree.
+ self.collect_data.revision_recorder.start_file(self._cvs_file_items)
+
+ def set_revision_info(self, revision, log, text):
+ """This is a callback method declared in Sink."""
+
+ rev_data = self._rev_data[revision]
+ cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]
+
+ if cvs_rev.metadata_id is not None:
+ # Users have reported problems with repositories in which the
+ # deltatext block for revision 1.1 appears twice. It is not
+ # known whether this results from a CVS/RCS bug, or from botched
+ # hand-editing of the repository. In any case, empirically, cvs
+ # and rcs both use the first version when checking out data, so
+ # that's what we will do. (For the record: "cvs log" fails on
+ # such a file; "rlog" prints the log message from the first
+ # block and ignores the second one.)
+ Log().warn(
+ "%s: in '%s':\n"
+ " Deltatext block for revision %s appeared twice;\n"
+ " ignoring the second occurrence.\n"
+ % (warning_prefix, self.cvs_file.filename, revision,)
+ )
+ return
+
+ if is_trunk_revision(revision):
+ branch_name = None
+ else:
+ branch_name = self.sdc.rev_to_branch_data(revision).symbol.name
+
+ cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
+ self.project, branch_name, rev_data.author, log
+ )
+ cvs_rev.deltatext_exists = bool(text)
+
+ # If this is revision 1.1, determine whether the file appears to
+ # have been created via 'cvs add' instead of 'cvs import'. The
+ # test is that the log message CVS uses for 1.1 in imports is
+ # "Initial revision\n" with no period. (This fact helps determine
+ # whether this file might have had a default branch in the past.)
+ if revision == '1.1':
+ self._file_imported = (log == 'Initial revision\n')
+
+ cvs_rev.revision_recorder_token = \
+ self.collect_data.revision_recorder.record_text(cvs_rev, log, text)
+
+ def parse_completed(self):
+ """Finish the processing of this file.
+
+ This is a callback method declared in Sink."""
+
+ # Make sure that there was an info section for each revision:
+ for cvs_item in self._cvs_file_items.values():
+ if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None:
+ self.collect_data.record_fatal_error(
+ '%r has no deltatext section for revision %s'
+ % (self.cvs_file.filename, cvs_item.rev,)
+ )
+
+ def _process_ntdbrs(self):
+ """Fix up any non-trunk default branch revisions (if present).
+
+ If a non-trunk default branch is determined to have existed, yield
+ the _RevisionData.ids for all revisions that were once non-trunk
+ default revisions, in dependency order.
+
+ There are two cases to handle:
+
+ One case is simple. The RCS file lists a default branch
+ explicitly in its header, such as '1.1.1'. In this case, we know
+ that every revision on the vendor branch is to be treated as head
+ of trunk at that point in time.
+
+ But there's also a degenerate case. The RCS file does not
+ currently have a default branch, yet we can deduce that for some
+ period in the past it probably *did* have one. For example, the
+ file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
+ dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
+ after 1.2. In this case, we should record 1.1.1.96 as the last
+ vendor revision to have been the head of the default branch.
+
+ If any non-trunk default branch revisions are found:
+
+ - Set their ntdbr members to True.
+
+ - Connect the last one with revision 1.2.
+
+ - Remove revision 1.1 if it is not needed.
+
+ """
+
+ try:
+ if self.default_branch:
+ vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id
+ vendor_lod_items = self._cvs_file_items.get_lod_items(
+ self._cvs_file_items[vendor_cvs_branch_id]
+ )
+ if not self._cvs_file_items.process_live_ntdb(vendor_lod_items):
+ return
+ elif self._file_imported:
+ vendor_branch_data = self.sdc.branches_data.get('1.1.1')
+ if vendor_branch_data is None:
+ return
+ else:
+ vendor_lod_items = self._cvs_file_items.get_lod_items(
+ self._cvs_file_items[vendor_branch_data.id]
+ )
+ if not self._cvs_file_items.process_historical_ntdb(
+ vendor_lod_items
+ ):
+ return
+ else:
+ return
+ except VendorBranchError, e:
+ self.collect_data.record_fatal_error(str(e))
+ return
+
+ if self._file_imported:
+ self._cvs_file_items.imported_remove_1_1(vendor_lod_items)
+
+ self._cvs_file_items.check_link_consistency()
+
+ def get_cvs_file_items(self):
+ """Finish up and return a CVSFileItems instance for this file.
+
+ This method must only be called once."""
+
+ self._process_ntdbrs()
+
+ # Break a circular reference loop, allowing the memory for self
+ # and sdc to be freed.
+ del self.sdc
+
+ return self._cvs_file_items
+
+
+class _ProjectDataCollector:
+ def __init__(self, collect_data, project):
+ self.collect_data = collect_data
+ self.project = project
+ self.num_files = 0
+
+ # The Trunk LineOfDevelopment object for this project:
+ self.trunk = Trunk(
+ self.collect_data.symbol_key_generator.gen_id(), self.project
+ )
+ self.project.trunk_id = self.trunk.id
+
+ # This causes a record for self.trunk to spring into existence:
+ self.collect_data.symbol_stats[self.trunk]
+
+ # A map { name -> Symbol } for all known symbols in this project.
+ # The symbols listed here are undifferentiated into Branches and
+ # Tags because the same name might appear as a branch in one file
+ # and a tag in another.
+ self.symbols = {}
+
+ # A map { (old_name, new_name) : count } indicating how many files
+ # were affected by each each symbol name transformation:
+ self.symbol_transform_counts = {}
+
+ def get_symbol(self, name):
+ """Return the Symbol object for the symbol named NAME in this project.
+
+ If such a symbol does not yet exist, allocate a new symbol_id,
+ create a Symbol instance, store it in self.symbols, and return it."""
+
+ symbol = self.symbols.get(name)
+ if symbol is None:
+ symbol = Symbol(
+ self.collect_data.symbol_key_generator.gen_id(),
+ self.project, name)
+ self.symbols[name] = symbol
+ return symbol
+
+ def log_symbol_transform(self, old_name, new_name):
+ """Record that OLD_NAME was transformed to NEW_NAME in one file.
+
+ This information is used to generated a statistical summary of
+ symbol transforms."""
+
+ try:
+ self.symbol_transform_counts[old_name, new_name] += 1
+ except KeyError:
+ self.symbol_transform_counts[old_name, new_name] = 1
+
+ def summarize_symbol_transforms(self):
+ if self.symbol_transform_counts and Log().is_on(Log.NORMAL):
+ log = Log()
+ log.normal('Summary of symbol transforms:')
+ transforms = self.symbol_transform_counts.items()
+ transforms.sort()
+ for ((old_name, new_name), count) in transforms:
+ if new_name is None:
+ log.normal(' "%s" ignored in %d files' % (old_name, count,))
+ else:
+ log.normal(
+ ' "%s" transformed to "%s" in %d files'
+ % (old_name, new_name, count,)
+ )
+
+ def _process_cvs_file_items(self, cvs_file_items):
+ """Process the CVSFileItems from one CVSFile."""
+
+ # Remove CVSRevisionDeletes that are not needed:
+ cvs_file_items.remove_unneeded_deletes(self.collect_data.metadata_db)
+
+ # Remove initial branch deletes that are not needed:
+ cvs_file_items.remove_initial_branch_deletes(
+ self.collect_data.metadata_db
+ )
+
+ # If this is a --trunk-only conversion, discard all branches and
+ # tags, then draft any non-trunk default branch revisions to
+ # trunk:
+ if Ctx().trunk_only:
+ cvs_file_items.exclude_non_trunk()
+
+ self.collect_data.revision_recorder.finish_file(cvs_file_items)
+ self.collect_data.add_cvs_file_items(cvs_file_items)
+ self.collect_data.symbol_stats.register(cvs_file_items)
+
+ def process_file(self, cvs_file):
+ Log().normal(cvs_file.filename)
+ fdc = _FileDataCollector(self, cvs_file)
+ try:
+ cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc)
+ except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
+ self.collect_data.record_fatal_error(
+ "%r is not a valid ,v file" % (cvs_file.filename,)
+ )
+ # Abort the processing of this file, but let the pass continue
+ # with other files:
+ return
+ except:
+ Log().warn("Exception occurred while parsing %s" % cvs_file.filename)
+ raise
+ else:
+ self.num_files += 1
+
+ cvs_file_items = fdc.get_cvs_file_items()
+
+ del fdc
+
+ self._process_cvs_file_items(cvs_file_items)
+
+
+class CollectData:
+ """Repository for data collected by parsing the CVS repository files.
+
+ This class manages the databases into which information collected
+ from the CVS repository is stored. The data are stored into this
+ class by _FileDataCollector instances, one of which is created for
+ each file to be parsed."""
+
+ def __init__(self, revision_recorder, stats_keeper):
+ self.revision_recorder = revision_recorder
+ self._cvs_item_store = NewCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
+ self.metadata_db = MetadataDatabase(
+ artifact_manager.get_temp_file(config.METADATA_STORE),
+ artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
+ DB_OPEN_NEW,
+ )
+ self.metadata_logger = MetadataLogger(self.metadata_db)
+ self.fatal_errors = []
+ self.num_files = 0
+ self.symbol_stats = SymbolStatisticsCollector()
+ self.stats_keeper = stats_keeper
+
+ # Key generator for CVSFiles:
+ self.file_key_generator = KeyGenerator()
+
+ # Key generator for CVSItems:
+ self.item_key_generator = KeyGenerator()
+
+ # Key generator for Symbols:
+ self.symbol_key_generator = KeyGenerator()
+
+ self.revision_recorder.start()
+
+ def record_fatal_error(self, err):
+ """Record that fatal error ERR was found.
+
+ ERR is a string (without trailing newline) describing the error.
+ Output the error to stderr immediately, and record a copy to be
+ output again in a summary at the end of CollectRevsPass."""
+
+ err = '%s: %s' % (error_prefix, err,)
+ Log().error(err + '\n')
+ self.fatal_errors.append(err)
+
+ def add_cvs_directory(self, cvs_directory):
+ """Record CVS_DIRECTORY."""
+
+ Ctx()._cvs_file_db.log_file(cvs_directory)
+
+ def add_cvs_file_items(self, cvs_file_items):
+ """Record the information from CVS_FILE_ITEMS.
+
+ Store the CVSFile to _cvs_file_db under its persistent id, store
+ the CVSItems, and record the CVSItems to self.stats_keeper."""
+
+ Ctx()._cvs_file_db.log_file(cvs_file_items.cvs_file)
+ self._cvs_item_store.add(cvs_file_items)
+
+ self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
+ for cvs_item in cvs_file_items.values():
+ self.stats_keeper.record_cvs_item(cvs_item)
+
+ def _get_cvs_file(
+ self, parent_directory, basename, file_in_attic, leave_in_attic=False
+ ):
+ """Return a CVSFile describing the file with name BASENAME.
+
+ PARENT_DIRECTORY is the CVSDirectory instance describing the
+ directory that physically holds this file in the filesystem.
+ BASENAME must be the base name of a *,v file within
+ PARENT_DIRECTORY.
+
+ FILE_IN_ATTIC is a boolean telling whether the specified file is
+ in an Attic subdirectory. If FILE_IN_ATTIC is True, then:
+
+ - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in
+ the filename.
+
+ - Otherwise, raise FileInAndOutOfAtticException if a file with the
+ same filename appears outside of Attic.
+
+ The CVSFile is assigned a new unique id. All of the CVSFile
+ information is filled in except mode (which can only be determined
+ by parsing the file).
+
+ Raise FatalError if the resulting filename would not be legal in
+ SVN."""
+
+ filename = os.path.join(parent_directory.filename, basename)
+ try:
+ verify_svn_filename_legal(basename[:-2])
+ except IllegalSVNPathError, e:
+ raise FatalError(
+ 'File %r would result in an illegal SVN filename: %s'
+ % (filename, e,)
+ )
+
+ if file_in_attic and not leave_in_attic:
+ in_attic = True
+ logical_parent_directory = parent_directory.parent_directory
+
+ # If this file also exists outside of the attic, it's a fatal
+ # error:
+ non_attic_filename = os.path.join(
+ logical_parent_directory.filename, basename,
+ )
+ if os.path.exists(non_attic_filename):
+ raise FileInAndOutOfAtticException(non_attic_filename, filename)
+ else:
+ in_attic = False
+ logical_parent_directory = parent_directory
+
+ file_stat = os.stat(filename)
+
+ # The size of the file in bytes:
+ file_size = file_stat[stat.ST_SIZE]
+
+ # Whether or not the executable bit is set:
+ file_executable = bool(file_stat[0] & stat.S_IXUSR)
+
+ # mode is not known, so we temporarily set it to None.
+ return CVSFile(
+ self.file_key_generator.gen_id(),
+ parent_directory.project, logical_parent_directory, basename[:-2],
+ in_attic, file_executable, file_size, None
+ )
+
+ def _get_attic_file(self, parent_directory, basename):
+ """Return a CVSFile object for the Attic file at BASENAME.
+
+ PARENT_DIRECTORY is the CVSDirectory that physically contains the
+ file on the filesystem (i.e., the Attic directory). It is not
+ necessarily the parent_directory of the CVSFile that will be
+ returned.
+
+ Return CVSFile, whose parent directory is usually
+ PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
+ iff CVSFile will remain in the Attic directory."""
+
+ try:
+ return self._get_cvs_file(parent_directory, basename, True)
+ except FileInAndOutOfAtticException, e:
+ if Ctx().retain_conflicting_attic_files:
+ Log().warn(
+ "%s: %s;\n"
+ " storing the latter into 'Attic' subdirectory.\n"
+ % (warning_prefix, e)
+ )
+ else:
+ self.record_fatal_error(str(e))
+
+ # Either way, return a CVSFile object so that the rest of the
+ # file processing can proceed:
+ return self._get_cvs_file(
+ parent_directory, basename, True, leave_in_attic=True
+ )
+
+ def _generate_attic_cvs_files(self, cvs_directory):
+ """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.
+
+ Also add CVS_DIRECTORY to self if any files are being retained in
+ that directory."""
+
+ retained_attic_file = False
+
+ fnames = os.listdir(cvs_directory.filename)
+ fnames.sort()
+ for fname in fnames:
+ pathname = os.path.join(cvs_directory.filename, fname)
+ if os.path.isdir(pathname):
+ Log().warn("Directory %s found within Attic; ignoring" % (pathname,))
+ elif fname.endswith(',v'):
+ cvs_file = self._get_attic_file(cvs_directory, fname)
+ if cvs_file.parent_directory == cvs_directory:
+ # This file will be retained in the Attic directory.
+ retained_attic_file = True
+ yield cvs_file
+
+ if retained_attic_file:
+ # If any files were retained in the Attic directory, then write
+ # the Attic directory to CVSFileDatabase:
+ self.add_cvs_directory(cvs_directory)
+
+ def _get_non_attic_file(self, parent_directory, basename):
+ """Return a CVSFile object for the non-Attic file at BASENAME."""
+
+ return self._get_cvs_file(parent_directory, basename, False)
+
+ def _generate_cvs_files(self, cvs_directory):
+ """Generate the CVSFiles under non-Attic directory CVS_DIRECTORY.
+
+ Process directories recursively, including Attic directories.
+ Also create and register CVSDirectories as they are found, and
+ look for conflicts between the filenames that will result from
+ files, attic files, and subdirectories."""
+
+ self.add_cvs_directory(cvs_directory)
+
+ # Map {cvs_file.basename : cvs_file.filename} for files directly
+ # in cvs_directory:
+ rcsfiles = {}
+
+ attic_dir = None
+
+ # Non-Attic subdirectories of cvs_directory (to be recursed into):
+ dirs = []
+
+ fnames = os.listdir(cvs_directory.filename)
+ fnames.sort()
+ for fname in fnames:
+ pathname = os.path.join(cvs_directory.filename, fname)
+ if os.path.isdir(pathname):
+ if fname == 'Attic':
+ attic_dir = fname
+ else:
+ dirs.append(fname)
+ elif fname.endswith(',v'):
+ cvs_file = self._get_non_attic_file(cvs_directory, fname)
+ rcsfiles[cvs_file.basename] = cvs_file.filename
+ yield cvs_file
+ else:
+ # Silently ignore other files:
+ pass
+
+ # Map {cvs_file.basename : cvs_file.filename} for files in an
+ # Attic directory within cvs_directory:
+ attic_rcsfiles = {}
+
+ if attic_dir is not None:
+ attic_directory = CVSDirectory(
+ self.file_key_generator.gen_id(),
+ cvs_directory.project, cvs_directory, 'Attic',
+ )
+
+ for cvs_file in self._generate_attic_cvs_files(attic_directory):
+ if cvs_file.parent_directory == cvs_directory:
+ attic_rcsfiles[cvs_file.basename] = cvs_file.filename
+ yield cvs_file
+
+ alldirs = dirs + [attic_dir]
+ else:
+ alldirs = dirs
+
+ # Check for conflicts between directory names and the filenames
+ # that will result from the rcs files (both in this directory and
+ # in attic). (We recurse into the subdirectories nevertheless, to
+ # try to detect more problems.)
+ for fname in alldirs:
+ pathname = os.path.join(cvs_directory.filename, fname)
+ for rcsfile_list in [rcsfiles, attic_rcsfiles]:
+ if fname in rcsfile_list:
+ self.record_fatal_error(
+ 'Directory name conflicts with filename. Please remove or '
+ 'rename one\n'
+ 'of the following:\n'
+ ' "%s"\n'
+ ' "%s"'
+ % (pathname, rcsfile_list[fname],)
+ )
+
+ # Now recurse into the other subdirectories:
+ for fname in dirs:
+ dirname = os.path.join(cvs_directory.filename, fname)
+
+ # Verify that the directory name does not contain any illegal
+ # characters:
+ try:
+ verify_svn_filename_legal(fname)
+ except IllegalSVNPathError, e:
+ raise FatalError(
+ 'Directory %r would result in an illegal SVN path name: %s'
+ % (dirname, e,)
+ )
+
+ sub_directory = CVSDirectory(
+ self.file_key_generator.gen_id(),
+ cvs_directory.project, cvs_directory, fname,
+ )
+
+ for cvs_file in self._generate_cvs_files(sub_directory):
+ yield cvs_file
+
+ def process_project(self, project):
+ Ctx()._projects[project.id] = project
+
+ root_cvs_directory = CVSDirectory(
+ self.file_key_generator.gen_id(), project, None, ''
+ )
+ project.root_cvs_directory_id = root_cvs_directory.id
+ pdc = _ProjectDataCollector(self, project)
+
+ found_rcs_file = False
+ for cvs_file in self._generate_cvs_files(root_cvs_directory):
+ pdc.process_file(cvs_file)
+ found_rcs_file = True
+
+ if not found_rcs_file:
+ self.record_fatal_error(
+ 'No RCS files found under %r!\n'
+ 'Are you absolutely certain you are pointing cvs2svn\n'
+ 'at a CVS repository?\n'
+ % (project.project_cvs_repos_path,)
+ )
+
+ pdc.summarize_symbol_transforms()
+
+ self.num_files += pdc.num_files
+ Log().verbose('Processed', self.num_files, 'files')
+
+ def _set_cvs_path_ordinals(self):
+ cvs_files = list(Ctx()._cvs_file_db.itervalues())
+ cvs_files.sort(CVSPath.slow_compare)
+ for (i, cvs_file) in enumerate(cvs_files):
+ cvs_file.ordinal = i
+
+ def close(self):
+ """Close the data structures associated with this instance.
+
+ Return a list of fatal errors encountered while processing input.
+ Each list entry is a string describing one fatal error."""
+
+ self.revision_recorder.finish()
+ self.symbol_stats.purge_ghost_symbols()
+ self.symbol_stats.close()
+ self.symbol_stats = None
+ self.metadata_logger = None
+ self.metadata_db.close()
+ self.metadata_db = None
+ self._cvs_item_store.close()
+ self._cvs_item_store = None
+ self._set_cvs_path_ordinals()
+ self.revision_recorder = None
+ retval = self.fatal_errors
+ self.fatal_errors = None
+ return retval
+
+
diff --git a/cvs2svn_lib/common.py b/cvs2svn_lib/common.py
new file mode 100644
index 0000000..8400907
--- /dev/null
+++ b/cvs2svn_lib/common.py
@@ -0,0 +1,409 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains common facilities used by cvs2svn."""
+
+
+import re
+import time
+import codecs
+
+from cvs2svn_lib.log import Log
+
+
+# Always use these constants for opening databases.
+DB_OPEN_READ = 'r'
+DB_OPEN_WRITE = 'w'
+DB_OPEN_NEW = 'n'
+
+
+SVN_INVALID_REVNUM = -1
+
+
+# Warnings and errors start with these strings. They are typically
+# followed by a colon and a space, as in "%s: " ==> "WARNING: ".
+warning_prefix = "WARNING"
+error_prefix = "ERROR"
+
+
+class FatalException(Exception):
+ """Exception thrown on a non-recoverable error.
+
+ If this exception is thrown by main(), it is caught by the global
+ layer of the program, its string representation is printed (followed
+ by a newline), and the program is ended with an exit code of 1."""
+
+ pass
+
+
+class InternalError(Exception):
+ """Exception thrown in the case of a cvs2svn internal error (aka, bug)."""
+
+ pass
+
+
+class FatalError(FatalException):
+ """A FatalException that prepends error_prefix to the message."""
+
+ def __init__(self, msg):
+ """Use (error_prefix + ': ' + MSG) as the error message."""
+
+ FatalException.__init__(self, '%s: %s' % (error_prefix, msg,))
+
+
+class CommandError(FatalError):
+ """A FatalError caused by a failed command invocation.
+
+ The error message includes the command name, exit code, and output."""
+
+ def __init__(self, command, exit_status, error_output=''):
+ self.command = command
+ self.exit_status = exit_status
+ self.error_output = error_output
+ if error_output.rstrip():
+ FatalError.__init__(
+ self,
+ 'The command %r failed with exit status=%s\n'
+ 'and the following output:\n'
+ '%s'
+ % (self.command, self.exit_status, self.error_output.rstrip()))
+ else:
+ FatalError.__init__(
+ self,
+ 'The command %r failed with exit status=%s and no output'
+ % (self.command, self.exit_status))
+
+
+def path_join(*components):
+ """Join two or more pathname COMPONENTS, inserting '/' as needed.
+ Empty component are skipped."""
+
+ return '/'.join(filter(None, components))
+
+
+def path_split(path):
+ """Split the svn pathname PATH into a pair, (HEAD, TAIL).
+
+ This is similar to os.path.split(), but always uses '/' as path
+ separator. PATH is an svn path, which should not start with a '/'.
+ HEAD is everything before the last slash, and TAIL is everything
+ after. If PATH ends in a slash, TAIL will be empty. If there is no
+ slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
+ TAIL are empty."""
+
+ pos = path.rfind('/')
+ if pos == -1:
+ return ('', path,)
+ else:
+ return (path[:pos], path[pos+1:],)
+
+
+class IllegalSVNPathError(FatalException):
+ pass
+
+
+# Control characters (characters not allowed in Subversion filenames):
+ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
+
+
+def verify_svn_filename_legal(filename):
+ """Verify that FILENAME is a legal filename.
+
+ FILENAME is a path component of a CVS path. Check that it won't
+ choke SVN:
+
+ - Check that it is not empty.
+
+ - Check that it is not equal to '.' or '..'.
+
+ - Check that the filename does not include any control characters.
+
+ If any of these tests fail, raise an IllegalSVNPathError."""
+
+ if filename == '':
+ raise IllegalSVNPathError("Empty filename component.")
+
+ if filename in ['.', '..']:
+ raise IllegalSVNPathError("Illegal filename component %r." % (filename,))
+
+ m = ctrl_characters_regexp.search(filename)
+ if m:
+ raise IllegalSVNPathError(
+ "Character %r in filename %r is not supported by Subversion."
+ % (m.group(), filename,)
+ )
+
+
+def verify_svn_path_legal(path):
+ """Verify that PATH is a legitimate SVN path.
+
+ If not, raise an IllegalSVNPathError."""
+
+ if path.startswith('/'):
+ raise IllegalSVNPathError("Path %r must not start with '/'." % (path,))
+ head = path
+ while head != '':
+ (head,tail) = path_split(head)
+ try:
+ verify_svn_filename_legal(tail)
+ except IllegalSVNPathError, e:
+ raise IllegalSVNPathError('Problem with path %r: %s' % (path, e,))
+
+
+def normalize_svn_path(path, allow_empty=False):
+ """Normalize an SVN path (e.g., one supplied by a user).
+
+ 1. Strip leading, trailing, and duplicated '/'.
+ 2. If ALLOW_EMPTY is not set, verify that PATH is not empty.
+
+ Return the normalized path.
+
+ If the path is invalid, raise an IllegalSVNPathError."""
+
+ norm_path = path_join(*path.split('/'))
+ if not allow_empty and not norm_path:
+ raise IllegalSVNPathError("Path is empty")
+ return norm_path
+
+
+class PathRepeatedException(Exception):
+ def __init__(self, path, count):
+ self.path = path
+ self.count = count
+ Exception.__init__(
+ self, 'Path %s is repeated %d times' % (self.path, self.count,)
+ )
+
+
+class PathsNestedException(Exception):
+ def __init__(self, nest, nestlings):
+ self.nest = nest
+ self.nestlings = nestlings
+ Exception.__init__(
+ self,
+ 'Path %s contains the following other paths: %s'
+ % (self.nest, ', '.join(self.nestlings),)
+ )
+
+
+class PathsNotDisjointException(FatalException):
+ """An exception that collects multiple other disjointness exceptions."""
+
+ def __init__(self, problems):
+ self.problems = problems
+ Exception.__init__(
+ self,
+ 'The following paths are not disjoint:\n'
+ ' %s\n'
+ % ('\n '.join([str(problem) for problem in self.problems]),)
+ )
+
+
+def verify_paths_disjoint(*paths):
+ """Verify that all of the paths in the argument list are disjoint.
+
+ If any of the paths is nested in another one (i.e., in the sense
+ that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
+ raise a PathsNotDisjointException containing exceptions detailing
+ the individual problems."""
+
+ def split(path):
+ if not path:
+ return []
+ else:
+ return path.split('/')
+
+ def contains(split_path1, split_path2):
+ """Return True iff SPLIT_PATH1 contains SPLIT_PATH2."""
+
+ return (
+ len(split_path1) < len(split_path2)
+ and split_path2[:len(split_path1)] == split_path1
+ )
+
+ paths = [(split(path), path) for path in paths]
+ # If all overlapping elements are equal, a shorter list is
+ # considered "less than" a longer one. Therefore if any paths are
+ # nested, this sort will leave at least one such pair adjacent, in
+ # the order [nest,nestling].
+ paths.sort()
+
+ problems = []
+
+ # Create exceptions for any repeated paths, and delete the repeats
+ # from the paths array:
+ i = 0
+ while i < len(paths):
+ split_path, path = paths[i]
+ j = i + 1
+ while j < len(paths) and split_path == paths[j][0]:
+ j += 1
+ if j - i > 1:
+ problems.append(PathRepeatedException(path, j - i))
+ # Delete all but the first copy:
+ del paths[i + 1:j]
+ i += 1
+
+ # Create exceptions for paths nested in each other:
+ i = 0
+ while i < len(paths):
+ split_path, path = paths[i]
+ j = i + 1
+ while j < len(paths) and contains(split_path, paths[j][0]):
+ j += 1
+ if j - i > 1:
+ problems.append(PathsNestedException(
+ path, [path2 for (split_path2, path2) in paths[i + 1:j]]
+ ))
+ i += 1
+
+ if problems:
+ raise PathsNotDisjointException(problems)
+
+
+def format_date(date):
+ """Return an svn-compatible date string for DATE (seconds since epoch).
+
+ A Subversion date looks like '2002-09-29T14:44:59.000000Z'."""
+
+ return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
+
+
+class CVSTextDecoder:
+ """Callable that decodes CVS strings into Unicode."""
+
+ def __init__(self, encodings, fallback_encoding=None):
+ """Create a CVSTextDecoder instance.
+
+ ENCODINGS is a list containing the names of encodings that are
+ attempted to be used as source encodings in 'strict' mode.
+
+ FALLBACK_ENCODING, if specified, is the name of an encoding that
+ should be used as a source encoding in lossy 'replace' mode if all
+ of ENCODINGS failed.
+
+ Raise LookupError if any of the specified encodings is unknown."""
+
+ self.decoders = [
+ (encoding, codecs.lookup(encoding)[1])
+ for encoding in encodings]
+
+ if fallback_encoding is None:
+ self.fallback_decoder = None
+ else:
+ self.fallback_decoder = (
+ fallback_encoding, codecs.lookup(fallback_encoding)[1]
+ )
+
+ def add_encoding(self, encoding):
+ """Add an encoding to be tried in 'strict' mode.
+
+ ENCODING is the name of an encoding. If it is unknown, raise a
+ LookupError."""
+
+ for (name, decoder) in self.decoders:
+ if name == encoding:
+ return
+ else:
+ self.decoders.append( (encoding, codecs.lookup(encoding)[1]) )
+
+ def set_fallback_encoding(self, encoding):
+ """Set the fallback encoding, to be tried in 'replace' mode.
+
+ ENCODING is the name of an encoding. If it is unknown, raise a
+ LookupError."""
+
+ if encoding is None:
+ self.fallback_decoder = None
+ else:
+ self.fallback_decoder = (encoding, codecs.lookup(encoding)[1])
+
+ def __call__(self, s):
+ """Try to decode string S using our configured source encodings.
+
+ Return the string as a Unicode string. If S is already a unicode
+ string, do nothing.
+
+ Raise UnicodeError if the string cannot be decoded using any of
+ the source encodings and no fallback encoding was specified."""
+
+ if isinstance(s, unicode):
+ return s
+ for (name, decoder) in self.decoders:
+ try:
+ return decoder(s)[0]
+ except ValueError:
+ Log().verbose("Encoding '%s' failed for string %r" % (name, s))
+
+ if self.fallback_decoder is not None:
+ (name, decoder) = self.fallback_decoder
+ return decoder(s, 'replace')[0]
+ else:
+ raise UnicodeError
+
+
+class Timestamper:
+ """Return monotonic timestamps derived from changeset timestamps."""
+
+ def __init__(self):
+ # The last timestamp that has been returned:
+ self.timestamp = 0.0
+
+ # The maximum timestamp that is considered reasonable:
+ self.max_timestamp = time.time() + 24.0 * 60.0 * 60.0
+
+ def get(self, timestamp, change_expected):
+ """Return a reasonable timestamp derived from TIMESTAMP.
+
+ Push TIMESTAMP into the future if necessary to ensure that it is
+ at least one second later than every other timestamp that has been
+ returned by previous calls to this method.
+
+ If CHANGE_EXPECTED is not True, then log a message if the
+ timestamp has to be changed."""
+
+ if timestamp > self.max_timestamp:
+ # If a timestamp is in the future, it is assumed that it is
+ # bogus. Shift it backwards in time to prevent it forcing other
+ # timestamps to be pushed even further in the future.
+
+ # Note that this is not nearly a complete solution to the bogus
+ # timestamp problem. A timestamp in the future still affects
+ # the ordering of changesets, and a changeset having such a
+ # timestamp will not be committed until all changesets with
+ # earlier timestamps have been committed, even if other
+ # changesets with even earlier timestamps depend on this one.
+ self.timestamp = self.timestamp + 1.0
+ if not change_expected:
+ Log().warn(
+ 'Timestamp "%s" is in the future; changed to "%s".'
+ % (time.asctime(time.gmtime(timestamp)),
+ time.asctime(time.gmtime(self.timestamp)),)
+ )
+ elif timestamp < self.timestamp + 1.0:
+ self.timestamp = self.timestamp + 1.0
+ if not change_expected and Log().is_on(Log.VERBOSE):
+ Log().verbose(
+ 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.'
+ % (time.asctime(time.gmtime(timestamp)),
+ time.asctime(time.gmtime(self.timestamp)),)
+ )
+ else:
+ self.timestamp = timestamp
+
+ return self.timestamp
+
+
diff --git a/cvs2svn_lib/config.py b/cvs2svn_lib/config.py
new file mode 100644
index 0000000..b313b2c
--- /dev/null
+++ b/cvs2svn_lib/config.py
@@ -0,0 +1,221 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains various configuration constants used by cvs2svn."""
+
+
+SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
+
+# The default names for the trunk/branches/tags directory for each
+# project:
+DEFAULT_TRUNK_BASE = 'trunk'
+DEFAULT_BRANCHES_BASE = 'branches'
+DEFAULT_TAGS_BASE = 'tags'
+
+SVNADMIN_EXECUTABLE = 'svnadmin'
+CO_EXECUTABLE = 'co'
+CVS_EXECUTABLE = 'cvs'
+SORT_EXECUTABLE = 'sort'
+
+# A pickled list of the projects defined for this conversion.
+PROJECTS = 'projects.pck'
+
+# A file holding the Serializer to be used for
+# CVS_REVS_SUMMARY_*_DATAFILE and CVS_SYMBOLS_SYMMARY_*_DATAFILE:
+SUMMARY_SERIALIZER = 'summary-serializer.pck'
+
+# The first file contains enough information about each CVSRevision to
+# deduce preliminary Changesets. The second file is a sorted version
+# of the first.
+CVS_REVS_SUMMARY_DATAFILE = 'revs-summary.txt'
+CVS_REVS_SUMMARY_SORTED_DATAFILE = 'revs-summary-s.txt'
+
+# The first file contains enough information about each CVSSymbol to
+# deduce preliminary Changesets. The second file is a sorted version
+# of the first.
+CVS_SYMBOLS_SUMMARY_DATAFILE = 'symbols-summary.txt'
+CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE = 'symbols-summary-s.txt'
+
+# A mapping from CVSItem id to Changeset id.
+CVS_ITEM_TO_CHANGESET = 'cvs-item-to-changeset.dat'
+
+# A mapping from CVSItem id to Changeset id, after the
+# RevisionChangeset loops have been broken.
+CVS_ITEM_TO_CHANGESET_REVBROKEN = 'cvs-item-to-changeset-revbroken.dat'
+
+# A mapping from CVSItem id to Changeset id, after the SymbolChangeset
+# loops have been broken.
+CVS_ITEM_TO_CHANGESET_SYMBROKEN = 'cvs-item-to-changeset-symbroken.dat'
+
+# A mapping from CVSItem id to Changeset id, after all Changeset
+# loops have been broken.
+CVS_ITEM_TO_CHANGESET_ALLBROKEN = 'cvs-item-to-changeset-allbroken.dat'
+
+# A mapping from id to Changeset.
+CHANGESETS_INDEX = 'changesets-index.dat'
+CHANGESETS_STORE = 'changesets.pck'
+
+# A mapping from id to Changeset, after the RevisionChangeset loops
+# have been broken.
+CHANGESETS_REVBROKEN_INDEX = 'changesets-revbroken-index.dat'
+CHANGESETS_REVBROKEN_STORE = 'changesets-revbroken.pck'
+
+# A mapping from id to Changeset, after the RevisionChangesets have
+# been sorted and converted into OrderedChangesets.
+CHANGESETS_REVSORTED_INDEX = 'changesets-revsorted-index.dat'
+CHANGESETS_REVSORTED_STORE = 'changesets-revsorted.pck'
+
+# A mapping from id to Changeset, after the SymbolChangeset loops have
+# been broken.
+CHANGESETS_SYMBROKEN_INDEX = 'changesets-symbroken-index.dat'
+CHANGESETS_SYMBROKEN_STORE = 'changesets-symbroken.pck'
+
+# A mapping from id to Changeset, after all Changeset loops have been
+# broken.
+CHANGESETS_ALLBROKEN_INDEX = 'changesets-allbroken-index.dat'
+CHANGESETS_ALLBROKEN_STORE = 'changesets-allbroken.pck'
+
+# The RevisionChangesets in commit order. Each line contains the
+# changeset id and timestamp of one changeset, in hexadecimal, in the
+# order that the changesets should be committed to svn.
+CHANGESETS_SORTED_DATAFILE = 'changesets-s.txt'
+
+# A file containing a marshalled copy of all the statistics that have
+# been gathered so far is written at the end of each pass as a
+# marshalled dictionary. This is the pattern used to generate the
+# filenames.
+STATISTICS_FILE = 'statistics-%02d.pck'
+
+# This text file contains records (1 per line) that describe openings
+# and closings for copies to tags and branches. The format is as
+# follows:
+#
+# SYMBOL_ID SVN_REVNUM TYPE CVS_SYMBOL_ID
+#
+# where type is either OPENING or CLOSING. CVS_SYMBOL_ID is the id of
+# the CVSSymbol whose opening or closing is being described (in hex).
+SYMBOL_OPENINGS_CLOSINGS = 'symbolic-names.txt'
+# A sorted version of the above file. SYMBOL_ID and SVN_REVNUM are
+# the primary and secondary sorting criteria. It is important that
+# SYMBOL_IDs be located together to make it quick to read them at
+# once. The order of SVN_REVNUM is only important because it is
+# assumed by some internal consistency checks.
+SYMBOL_OPENINGS_CLOSINGS_SORTED = 'symbolic-names-s.txt'
+
+# Skeleton version of the repository filesystem. See class
+# RepositoryMirror for how these work.
+MIRROR_NODES_INDEX_TABLE = 'mirror-nodes-index.dat'
+MIRROR_NODES_STORE = 'mirror-nodes.pck'
+
+# Offsets pointing to the beginning of each symbol's records in
+# SYMBOL_OPENINGS_CLOSINGS_SORTED. This file contains a pickled map
+# from symbol_id to file offset.
+SYMBOL_OFFSETS_DB = 'symbol-offsets.pck'
+
+# Pickled map of CVSFile.id to instance.
+CVS_FILES_DB = 'cvs-files.pck'
+
+# A series of records. The first is a pickled serializer. Each
+# subsequent record is a serialized list of all CVSItems applying to a
+# CVSFile.
+CVS_ITEMS_STORE = 'cvs-items.pck'
+
+# The same as above, but with the CVSItems ordered in groups based on
+# their initial changesets. CVSItems will usually be accessed one
+# changeset at a time, so this ordering helps disk locality (even
+# though some of the changesets will later be broken up).
+CVS_ITEMS_SORTED_INDEX_TABLE = 'cvs-items-sorted-index.dat'
+CVS_ITEMS_SORTED_STORE = 'cvs-items-sorted.pck'
+
+# A record of all symbolic names that will be processed in the
+# conversion. This file contains a pickled list of TypedSymbol
+# objects.
+SYMBOL_DB = 'symbols.pck'
+
+# A pickled list of the statistics for all symbols. Each entry in the
+# list is an instance of cvs2svn_lib.symbol_statistics._Stats.
+SYMBOL_STATISTICS = 'symbol-statistics.pck'
+
+# These two databases provide a bidirectional mapping between
+# CVSRevision.ids (in hex) and Subversion revision numbers.
+#
+# The first maps CVSRevision.id to the SVN revision number of which it
+# is a part (more than one CVSRevision can map to the same SVN
+# revision number).
+#
+# The second maps Subversion revision numbers (as hex strings) to
+# pickled SVNCommit instances.
+CVS_REVS_TO_SVN_REVNUMS = 'cvs-revs-to-svn-revnums.dat'
+
+# This database maps Subversion revision numbers to pickled SVNCommit
+# instances.
+SVN_COMMITS_INDEX_TABLE = 'svn-commits-index.dat'
+SVN_COMMITS_STORE = 'svn-commits.pck'
+
+# How many bytes to read at a time from a pipe. 128 kiB should be
+# large enough to be efficient without wasting too much memory.
+PIPE_READ_SIZE = 128 * 1024
+
+# Records the author and log message for each changeset. The database
+# contains a map metadata_id -> (author, logmessage). Each
+# CVSRevision that is eligible to be combined into the same SVN commit
+# is assigned the same id. Note that the (author, logmessage) pairs
+# are not necessarily all distinct; other data are taken into account
+# when constructing ids.
+METADATA_INDEX_TABLE = 'metadata-index.dat'
+METADATA_STORE = 'metadata.pck'
+
+# The same, after it has been cleaned up for the chosen output option:
+METADATA_CLEAN_INDEX_TABLE = 'metadata-clean-index.dat'
+METADATA_CLEAN_STORE = 'metadata-clean.pck'
+
+# The following four databases are used in conjunction with --use-internal-co.
+
+# Records the RCS deltas for all CVS revisions. The deltas are to be
+# applied forward, i.e. those from trunk are reversed wrt RCS.
+RCS_DELTAS_INDEX_TABLE = 'rcs-deltas-index.dat'
+RCS_DELTAS_STORE = 'rcs-deltas.pck'
+
+# Records the revision tree of each RCS file. The format is a list of
+# list of integers. The outer list holds lines of development, the inner list
+# revisions within the LODs, revisions are CVSItem ids. Branches "closer
+# to the trunk" appear later. Revisions are sorted by reverse chronological
+# order. The last revision of each branch is the revision it sprouts from.
+# Revisions that represent deletions at the end of a branch are omitted.
+RCS_TREES_INDEX_TABLE = 'rcs-trees-index.dat'
+RCS_TREES_STORE = 'rcs-trees.pck'
+
+# Records the revision tree of each RCS file after removing revisions
+# belonging to excluded branches. Note that the branch ordering is arbitrary
+# in this file.
+RCS_TREES_FILTERED_INDEX_TABLE = 'rcs-trees-filtered-index.dat'
+RCS_TREES_FILTERED_STORE = 'rcs-trees-filtered.pck'
+
+# At any given time during OutputPass, holds the full text of each CVS
+# revision that was checked out already and still has descendants that will
+# be checked out.
+CVS_CHECKOUT_DB = 'cvs-checkout.db'
+
+# End of DBs related to --use-internal-co.
+
+# If this run will output directly to a Subversion repository, then
+# this is the name of the file that each revision will temporarily be
+# written to prior to writing it into the repository.
+DUMPFILE = 'svn.dump'
+
+# flush a commit if a 5 minute gap occurs.
+COMMIT_THRESHOLD = 5 * 60
+
diff --git a/cvs2svn_lib/context.py b/cvs2svn_lib/context.py
new file mode 100644
index 0000000..89dc16a
--- /dev/null
+++ b/cvs2svn_lib/context.py
@@ -0,0 +1,93 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Store the context (options, etc) for a cvs2svn run."""
+
+
+import os
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import CVSTextDecoder
+
+
+class Ctx:
+ """Session state for this run of cvs2svn. For example, run-time
+ options are stored here. This class is a Borg (see
+ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531)."""
+
+ __shared_state = { }
+
+ def __init__(self):
+ self.__dict__ = self.__shared_state
+ if self.__dict__:
+ return
+ # Else, initialize to defaults.
+ self.set_defaults()
+
+ def set_defaults(self):
+ """Set all parameters to their default values."""
+
+ self.output_option = None
+ self.dry_run = False
+ self.revision_recorder = None
+ self.revision_excluder = None
+ self.revision_reader = None
+ self.svnadmin_executable = config.SVNADMIN_EXECUTABLE
+ self.sort_executable = config.SORT_EXECUTABLE
+ self.trunk_only = False
+ self.prune = True
+ self.cvs_author_decoder = CVSTextDecoder(['ascii'])
+ self.cvs_log_decoder = CVSTextDecoder(['ascii'])
+ self.cvs_filename_decoder = CVSTextDecoder(['ascii'])
+ self.decode_apple_single = False
+ self.symbol_info_filename = None
+ self.username = None
+ self.svn_property_setters = []
+ self.tmpdir = 'cvs2svn-tmp'
+ self.skip_cleanup = False
+ self.keep_cvsignore = False
+ self.cross_project_commits = True
+ self.cross_branch_commits = True
+ self.retain_conflicting_attic_files = False
+
+ self.initial_project_commit_message = (
+ 'Standard project directories initialized by cvs2svn.'
+ )
+ self.post_commit_message = (
+ 'This commit was generated by cvs2svn to compensate for '
+ 'changes in r%(revnum)d, which included commits to RCS files '
+ 'with non-trunk default branches.'
+ )
+ self.symbol_commit_message = (
+ "This commit was manufactured by cvs2svn to create %(symbol_type)s "
+ "'%(symbol_name)s'."
+ )
+
+
+ def get_temp_filename(self, basename):
+ return os.path.join(self.tmpdir, basename)
+
+ def clean(self):
+ """Dispose of items in our dictionary that are not intended to
+ live past the end of a pass (identified by exactly one leading
+ underscore)."""
+
+ for attr in self.__dict__.keys():
+ if (attr.startswith('_') and not attr.startswith('__')
+ and not attr.startswith('_Ctx__')):
+ delattr(self, attr)
+
+
diff --git a/cvs2svn_lib/cvs_file.py b/cvs2svn_lib/cvs_file.py
new file mode 100644
index 0000000..3a1bb4f
--- /dev/null
+++ b/cvs2svn_lib/cvs_file.py
@@ -0,0 +1,287 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to store information about a CVS file."""
+
+import os
+
+from cvs2svn_lib.common import path_join
+from cvs2svn_lib.context import Ctx
+
+
+class CVSPath(object):
+ """Represent a CVS file or directory.
+
+ Members:
+
+ id -- (int) unique ID for this CVSPath. At any moment, there is
+ at most one CVSPath instance with a particular ID. (This
+ means that object identity is the same as object equality, and
+ objects can be used as map keys even though they don't have a
+ __hash__() method).
+
+ project -- (Project) the project containing this CVSPath.
+
+ parent_directory -- (CVSDirectory or None) the CVSDirectory
+ containing this CVSPath.
+
+ basename -- (string) the base name of this CVSPath (no ',v'). The
+ basename of the root directory of a project is ''.
+
+ ordinal -- (int) the order that this instance should be sorted
+ relative to other CVSPath instances. This member is set based
+ on the ordering imposed by slow_compare() by CollectData after
+ all CVSFiles have been processed. Comparisons of CVSPath
+ using __cmp__() simply compare the ordinals.
+
+ """
+
+ __slots__ = [
+ 'id',
+ 'project',
+ 'parent_directory',
+ 'basename',
+ 'ordinal',
+ ]
+
+ def __init__(self, id, project, parent_directory, basename):
+ self.id = id
+ self.project = project
+ self.parent_directory = parent_directory
+ self.basename = basename
+
+ def __getstate__(self):
+ """This method must only be called after ordinal has been set."""
+
+ return (
+ self.id, self.project.id,
+ self.parent_directory, self.basename,
+ self.ordinal,
+ )
+
+ def __setstate__(self, state):
+ (
+ self.id, project_id,
+ self.parent_directory, self.basename,
+ self.ordinal,
+ ) = state
+ self.project = Ctx()._projects[project_id]
+
+ def get_ancestry(self):
+ """Return a list of the CVSPaths leading from the root path to SELF.
+
+ Return the CVSPaths in a list, starting with
+ self.project.get_root_cvs_directory() and ending with self."""
+
+ ancestry = []
+ p = self
+ while p is not None:
+ ancestry.append(p)
+ p = p.parent_directory
+
+ ancestry.reverse()
+ return ancestry
+
+ def get_cvs_path(self):
+ """Return the canonical path within the Project.
+
+ The canonical path:
+
+ - Uses forward slashes
+
+ - Doesn't include ',v' for files
+
+ - This doesn't include the 'Attic' segment of the path unless the
+ file is to be left in an Attic directory in the SVN repository;
+ i.e., if a filename exists in and out of Attic and the
+ --retain-conflicting-attic-files option was specified.
+
+ """
+
+ return path_join(*[p.basename for p in self.get_ancestry()[1:]])
+
+ cvs_path = property(get_cvs_path)
+
+ def _get_dir_components(self):
+ """Return a list containing the components of the path leading to SELF.
+
+ The return value contains the base names of all of the parent
+ directories (except for the root directory) and SELF."""
+
+ return [p.basename for p in self.get_ancestry()[1:]]
+
+ def __eq__(a, b):
+ """Compare two CVSPath instances for equality.
+
+ This method is supplied to avoid using __cmp__() for comparing for
+ equality."""
+
+ return a is b
+
+ def slow_compare(a, b):
+ return (
+ # Sort first by project:
+ cmp(a.project, b.project)
+ # Then by directory components:
+ or cmp(a._get_dir_components(), b._get_dir_components())
+ )
+
+ def __cmp__(a, b):
+ """This method must only be called after ordinal has been set."""
+
+ return cmp(a.ordinal, b.ordinal)
+
+
+class CVSDirectory(CVSPath):
+ """Represent a CVS directory.
+
+ Members:
+
+ id -- (int or None) unique id for this file. If None, a new id is
+ generated.
+
+ project -- (Project) the project containing this file.
+
+ parent_directory -- (CVSDirectory or None) the CVSDirectory
+ containing this CVSDirectory.
+
+ basename -- (string) the base name of this CVSDirectory (no ',v').
+
+ """
+
+ __slots__ = []
+
+ def __init__(self, id, project, parent_directory, basename):
+ """Initialize a new CVSDirectory object."""
+
+ CVSPath.__init__(self, id, project, parent_directory, basename)
+
+ def get_filename(self):
+ """Return the filesystem path to this CVSPath in the CVS repository."""
+
+ if self.parent_directory is None:
+ return self.project.project_cvs_repos_path
+ else:
+ return os.path.join(
+ self.parent_directory.get_filename(), self.basename
+ )
+
+ filename = property(get_filename)
+
+ def __getstate__(self):
+ return CVSPath.__getstate__(self)
+
+ def __setstate__(self, state):
+ CVSPath.__setstate__(self, state)
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return self.cvs_path + '/'
+
+ def __repr__(self):
+ return 'CVSDirectory<%x>(%r)' % (self.id, str(self),)
+
+
+class CVSFile(CVSPath):
+ """Represent a CVS file.
+
+ Members:
+
+ id -- (int) unique id for this file.
+
+ project -- (Project) the project containing this file.
+
+ parent_directory -- (CVSDirectory) the CVSDirectory containing
+ this CVSFile.
+
+ basename -- (string) the base name of this CVSFile (no ',v').
+
+ _in_attic -- (bool) True if RCS file is in an Attic subdirectory
+ that is not considered the parent directory. (If a file is
+ in-and-out-of-attic and one copy is to be left in Attic after
+ the conversion, then the Attic directory is that file's
+ PARENT_DIRECTORY and _IN_ATTIC is False.)
+
+ executable -- (bool) True iff RCS file has executable bit set.
+
+ file_size -- (long) size of the RCS file in bytes.
+
+ mode -- (string or None) 'kkv', 'kb', etc.
+
+ PARENT_DIRECTORY might contain an 'Attic' component if it should be
+ retained in the SVN repository; i.e., if the same filename exists out
+ of Attic and the --retain-conflicting-attic-files option was specified.
+
+ """
+
+ __slots__ = [
+ '_in_attic',
+ 'executable',
+ 'file_size',
+ 'mode',
+ ]
+
+ def __init__(
+ self, id, project, parent_directory, basename, in_attic,
+ executable, file_size, mode
+ ):
+ """Initialize a new CVSFile object."""
+
+ CVSPath.__init__(self, id, project, parent_directory, basename)
+ self._in_attic = in_attic
+ self.executable = executable
+ self.file_size = file_size
+ self.mode = mode
+
+ assert self.parent_directory is not None
+
+ def get_filename(self):
+ """Return the filesystem path to this CVSPath in the CVS repository."""
+
+ if self._in_attic:
+ return os.path.join(
+ self.parent_directory.filename, 'Attic', self.basename + ',v'
+ )
+ else:
+ return os.path.join(
+ self.parent_directory.filename, self.basename + ',v'
+ )
+
+ filename = property(get_filename)
+
+ def __getstate__(self):
+ return (
+ CVSPath.__getstate__(self),
+ self._in_attic, self.executable, self.file_size, self.mode,
+ )
+
+ def __setstate__(self, state):
+ (
+ cvs_path_state,
+ self._in_attic, self.executable, self.file_size, self.mode,
+ ) = state
+ CVSPath.__setstate__(self, cvs_path_state)
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return self.cvs_path
+
+ def __repr__(self):
+ return 'CVSFile<%x>(%r)' % (self.id, str(self),)
+
+
diff --git a/cvs2svn_lib/cvs_file_database.py b/cvs2svn_lib/cvs_file_database.py
new file mode 100644
index 0000000..61eebf3
--- /dev/null
+++ b/cvs2svn_lib/cvs_file_database.py
@@ -0,0 +1,75 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class CVSFileDatabase:
+ """A database to store CVSFile objects and retrieve them by their id."""
+
+ def __init__(self, mode):
+ """Initialize an instance, opening database in MODE (where MODE is
+ either DB_OPEN_NEW or DB_OPEN_READ)."""
+
+ self.mode = mode
+
+ # A map { id : CVSFile }
+ self._cvs_files = {}
+
+ if self.mode == DB_OPEN_NEW:
+ pass
+ elif self.mode == DB_OPEN_READ:
+ f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'rb')
+ cvs_files = cPickle.load(f)
+ for cvs_file in cvs_files:
+ self._cvs_files[cvs_file.id] = cvs_file
+ else:
+ raise RuntimeError('Invalid mode %r' % self.mode)
+
+ def log_file(self, cvs_file):
+ """Add CVS_FILE, a CVSFile instance, to the database."""
+
+ if self.mode == DB_OPEN_READ:
+ raise RuntimeError('Cannot write items in mode %r' % self.mode)
+
+ self._cvs_files[cvs_file.id] = cvs_file
+
+ def itervalues(self):
+ for value in self._cvs_files.itervalues():
+ yield value
+
+ def get_file(self, id):
+ """Return the CVSFile with the specified ID."""
+
+ return self._cvs_files[id]
+
+ def close(self):
+ if self.mode == DB_OPEN_NEW:
+ f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'wb')
+ cPickle.dump(self._cvs_files.values(), f, -1)
+ f.close()
+
+ self._cvs_files = None
+
+
diff --git a/cvs2svn_lib/cvs_file_items.py b/cvs2svn_lib/cvs_file_items.py
new file mode 100644
index 0000000..f0dc782
--- /dev/null
+++ b/cvs2svn_lib/cvs_file_items.py
@@ -0,0 +1,1075 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to manage the CVSItems related to one file."""
+
+
+import re
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSRevisionModification
+from cvs2svn_lib.cvs_item import CVSRevisionAbsent
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import cvs_revision_type_map
+from cvs2svn_lib.cvs_item import cvs_branch_type_map
+from cvs2svn_lib.cvs_item import cvs_tag_type_map
+
+
+class VendorBranchError(Exception):
+ """There is an error in the structure of the file revision tree."""
+
+ pass
+
+
+class LODItems(object):
+ def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
+ # The LineOfDevelopment described by this instance.
+ self.lod = lod
+
+ # The CVSBranch starting this LOD, if any; otherwise, None.
+ self.cvs_branch = cvs_branch
+
+ # The list of CVSRevisions on this LOD, if any. The CVSRevisions
+ # are listed in dependency order.
+ self.cvs_revisions = cvs_revisions
+
+ # A list of CVSBranches that sprout from this LOD (either from
+ # cvs_branch or from one of the CVSRevisions).
+ self.cvs_branches = cvs_branches
+
+ # A list of CVSTags that sprout from this LOD (either from
+ # cvs_branch or from one of the CVSRevisions).
+ self.cvs_tags = cvs_tags
+
+ def is_trivial_import(self):
+ """Return True iff this LOD is a trivial import branch in this file.
+
+ A trivial import branch is a branch that was used for a single
+ import and nothing else. Such a branch is eligible for being
+ grafted onto trunk, even if it has branch blockers."""
+
+ return (
+ len(self.cvs_revisions) == 1
+ and self.cvs_revisions[0].ntdbr
+ )
+
+ def is_pure_ntdb(self):
+ """Return True iff this LOD is a pure NTDB in this file.
+
+ A pure non-trunk default branch is defined to be a branch that
+ contains only NTDB revisions (and at least one of them). Such a
+ branch is eligible for being grafted onto trunk, even if it has
+ branch blockers."""
+
+ return (
+ self.cvs_revisions
+ and self.cvs_revisions[-1].ntdbr
+ )
+
+ def iter_blockers(self):
+ if self.is_pure_ntdb():
+ # Such a branch has no blockers, because the blockers can be
+ # grafted to trunk.
+ pass
+ else:
+ # Other branches are only blocked by symbols that sprout from
+ # non-NTDB revisions:
+ non_ntdbr_revision_ids = set()
+ for cvs_revision in self.cvs_revisions:
+ if not cvs_revision.ntdbr:
+ non_ntdbr_revision_ids.add(cvs_revision.id)
+
+ for cvs_tag in self.cvs_tags:
+ if cvs_tag.source_id in non_ntdbr_revision_ids:
+ yield cvs_tag
+
+ for cvs_branch in self.cvs_branches:
+ if cvs_branch.source_id in non_ntdbr_revision_ids:
+ yield cvs_branch
+
+
+class CVSFileItems(object):
+ def __init__(self, cvs_file, trunk, cvs_items):
+ # The file whose data this instance holds.
+ self.cvs_file = cvs_file
+
+ # The symbol that represents "Trunk" in this file.
+ self.trunk = trunk
+
+ # A map from CVSItem.id to CVSItem:
+ self._cvs_items = {}
+
+ # The cvs_item_id of each root in the CVSItem forest. (A root is
+ # defined to be any CVSRevision with no prev_id.)
+ self.root_ids = set()
+
+ for cvs_item in cvs_items:
+ self.add(cvs_item)
+ if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
+ self.root_ids.add(cvs_item.id)
+
+ def __getstate__(self):
+ return (self.cvs_file.id, self.values(),)
+
+ def __setstate__(self, state):
+ (cvs_file_id, cvs_items,) = state
+ cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+ CVSFileItems.__init__(
+ self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
+ )
+
+ def add(self, cvs_item):
+ self._cvs_items[cvs_item.id] = cvs_item
+
+ def __getitem__(self, id):
+ """Return the CVSItem with the specified ID."""
+
+ return self._cvs_items[id]
+
+ def get(self, id, default=None):
+ return self._cvs_items.get(id, default)
+
+ def __delitem__(self, id):
+ assert id not in self.root_ids
+ del self._cvs_items[id]
+
+ def values(self):
+ return self._cvs_items.values()
+
+ def check_link_consistency(self):
+ """Check that the CVSItems are linked correctly with each other."""
+
+ for cvs_item in self.values():
+ try:
+ cvs_item.check_links(self)
+ except AssertionError:
+ Log().error(
+ 'Link consistency error in %s\n'
+ 'This is probably a bug internal to cvs2svn. Please file a bug\n'
+ 'report including the following stack trace (see FAQ for more '
+ 'info).'
+ % (cvs_item,))
+ raise
+
+ def _get_lod(self, lod, cvs_branch, start_id):
+ """Return the indicated LODItems.
+
+ LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
+ CVSBranch instance that starts the LOD if any; otherwise it is
+ None. START_ID is the id of the first CVSRevision on this LOD, or
+ None if there are none."""
+
+ cvs_revisions = []
+ cvs_branches = []
+ cvs_tags = []
+
+ def process_subitems(cvs_item):
+ """Process the branches and tags that are rooted in CVS_ITEM.
+
+ CVS_ITEM can be a CVSRevision or a CVSBranch."""
+
+ for branch_id in cvs_item.branch_ids[:]:
+ cvs_branches.append(self[branch_id])
+
+ for tag_id in cvs_item.tag_ids:
+ cvs_tags.append(self[tag_id])
+
+ if cvs_branch is not None:
+ # Include the symbols sprouting directly from the CVSBranch:
+ process_subitems(cvs_branch)
+
+ id = start_id
+ while id is not None:
+ cvs_rev = self[id]
+ cvs_revisions.append(cvs_rev)
+ process_subitems(cvs_rev)
+ id = cvs_rev.next_id
+
+ return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
+
+ def get_lod_items(self, cvs_branch):
+ """Return an LODItems describing the branch that starts at CVS_BRANCH.
+
+ CVS_BRANCH must be an instance of CVSBranch contained in this
+ CVSFileItems."""
+
+ return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
+
+ def iter_root_lods(self):
+ """Iterate over the LODItems for all root LODs (non-recursively)."""
+
+ for id in list(self.root_ids):
+ cvs_item = self[id]
+ if isinstance(cvs_item, CVSRevision):
+ # This LOD doesn't have a CVSBranch associated with it.
+ # Either it is Trunk, or it is a branch whose CVSBranch has
+ # been deleted.
+ yield self._get_lod(cvs_item.lod, None, id)
+ elif isinstance(cvs_item, CVSBranch):
+ # This is a Branch that has been severed from the rest of the
+ # tree.
+ yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
+ else:
+ raise InternalError('Unexpected root item: %s' % (cvs_item,))
+
+ def _iter_tree(self, lod, cvs_branch, start_id):
+ """Iterate over the tree that starts at the specified line of development.
+
+ LOD is the LineOfDevelopment where the iteration should start.
+ CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
+ otherwise it is None. ID is the id of the first CVSRevision on
+ this LOD, or None if there are none.
+
+ There are two cases handled by this routine: trunk (where LOD is a
+ Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
+ revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
+ is a CVSBranch instance, and ID is either the id of the first
+ CVSRevision on the branch or None if there are no CVSRevisions on
+ the branch). Note that CVS_BRANCH and ID cannot simultaneously be
+ None.
+
+ Yield an LODItems instance for each line of development."""
+
+ cvs_revisions = []
+ cvs_branches = []
+ cvs_tags = []
+
+ def process_subitems(cvs_item):
+ """Process the branches and tags that are rooted in CVS_ITEM.
+
+ CVS_ITEM can be a CVSRevision or a CVSBranch."""
+
+ for branch_id in cvs_item.branch_ids[:]:
+ # Recurse into the branch:
+ branch = self[branch_id]
+ for lod_items in self._iter_tree(
+ branch.symbol, branch, branch.next_id
+ ):
+ yield lod_items
+ # The caller might have deleted the branch that we just
+ # yielded. If it is no longer present, then do not add it to
+ # the list of cvs_branches.
+ try:
+ cvs_branches.append(self[branch_id])
+ except KeyError:
+ pass
+
+ for tag_id in cvs_item.tag_ids:
+ cvs_tags.append(self[tag_id])
+
+ if cvs_branch is not None:
+ # Include the symbols sprouting directly from the CVSBranch:
+ for lod_items in process_subitems(cvs_branch):
+ yield lod_items
+
+ id = start_id
+ while id is not None:
+ cvs_rev = self[id]
+ cvs_revisions.append(cvs_rev)
+
+ for lod_items in process_subitems(cvs_rev):
+ yield lod_items
+
+ id = cvs_rev.next_id
+
+ yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
+
+ def iter_lods(self):
+ """Iterate over LinesOfDevelopment in this file, in depth-first order.
+
+ For each LOD, yield an LODItems instance. The traversal starts at
+ each root node but returns the LODs in depth-first order.
+
+ It is allowed to modify the CVSFileItems instance while the
+ traversal is occurring, but only in ways that don't affect the
+ tree structure above (i.e., towards the trunk from) the current
+ LOD."""
+
+ # Make a list out of root_ids so that callers can change it:
+ for id in list(self.root_ids):
+ cvs_item = self[id]
+ if isinstance(cvs_item, CVSRevision):
+ # This LOD doesn't have a CVSBranch associated with it.
+ # Either it is Trunk, or it is a branch whose CVSBranch has
+ # been deleted.
+ lod = cvs_item.lod
+ cvs_branch = None
+ elif isinstance(cvs_item, CVSBranch):
+ # This is a Branch that has been severed from the rest of the
+ # tree.
+ lod = cvs_item.symbol
+ id = cvs_item.next_id
+ cvs_branch = cvs_item
+ else:
+ raise InternalError('Unexpected root item: %s' % (cvs_item,))
+
+ for lod_items in self._iter_tree(lod, cvs_branch, id):
+ yield lod_items
+
+ def iter_deltatext_ancestors(self, cvs_rev):
+ """Generate the delta-dependency ancestors of CVS_REV.
+
+ Generate then ancestors of CVS_REV in deltatext order; i.e., back
+ along branches towards trunk, then outwards along trunk towards
+ HEAD."""
+
+ while True:
+ # Determine the next candidate source revision:
+ if isinstance(cvs_rev.lod, Trunk):
+ if cvs_rev.next_id is None:
+ # HEAD has no ancestors, so we are done:
+ return
+ else:
+ cvs_rev = self[cvs_rev.next_id]
+ else:
+ cvs_rev = self[cvs_rev.prev_id]
+
+ yield cvs_rev
+
+ def _sever_branch(self, lod_items):
+ """Sever the branch from its source and discard the CVSBranch.
+
+ LOD_ITEMS describes a branch that should be severed from its
+ source, deleting the CVSBranch and creating a new root. Also set
+ LOD_ITEMS.cvs_branch to none.
+
+ This method can only be used before symbols have been grafted onto
+ CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
+ NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
+
+ cvs_branch = lod_items.cvs_branch
+ assert cvs_branch is not None
+ assert not cvs_branch.tag_ids
+ assert not cvs_branch.branch_ids
+ source_rev = self[cvs_branch.source_id]
+
+ # We only cover the following case, even though after
+ # FilterSymbolsPass cvs_branch.source_id might refer to another
+ # CVSBranch.
+ assert isinstance(source_rev, CVSRevision)
+
+ # Delete the CVSBranch itself:
+ lod_items.cvs_branch = None
+ del self[cvs_branch.id]
+
+ # Delete the reference from the source revision to the CVSBranch:
+ source_rev.branch_ids.remove(cvs_branch.id)
+
+ # Delete the reference from the first revision on the branch to
+ # the CVSBranch:
+ if lod_items.cvs_revisions:
+ first_rev = lod_items.cvs_revisions[0]
+
+ # Delete the reference from first_rev to the CVSBranch:
+ first_rev.first_on_branch_id = None
+
+ # Delete the reference from the source revision to the first
+ # revision on the branch:
+ source_rev.branch_commit_ids.remove(first_rev.id)
+
+ # ...and vice versa:
+ first_rev.prev_id = None
+
+ # Change the type of first_rev (e.g., from Change to Add):
+ first_rev.__class__ = cvs_revision_type_map[
+ (isinstance(first_rev, CVSRevisionModification), False,)
+ ]
+
+ # Now first_rev is a new root:
+ self.root_ids.add(first_rev.id)
+
+ def adjust_ntdbrs(self, ntdbr_cvs_revs):
+ """Adjust the specified non-trunk default branch revisions.
+
+ NTDBR_CVS_REVS is a list of CVSRevision instances in this file
+ that have been determined to be non-trunk default branch
+ revisions.
+
+ The first revision on the default branch is handled strangely by
+ CVS. If a file is imported (as opposed to being added), CVS
+ creates a 1.1 revision, then creates a vendor branch 1.1.1 based
+ on 1.1, then creates a 1.1.1.1 revision that is identical to the
+ 1.1 revision (i.e., its deltatext is empty). The log message that
+ the user typed when importing is stored with the 1.1.1.1 revision.
+ The 1.1 revision always contains a standard, generated log
+ message, 'Initial revision\n'.
+
+ When we detect a straightforward import like this, we want to
+ handle it by deleting the 1.1 revision (which doesn't contain any
+ useful information) and making 1.1.1.1 into an independent root in
+ the file's dependency tree. In SVN, 1.1.1.1 will be added
+ directly to the vendor branch with its initial content. Then in a
+ special 'post-commit', the 1.1.1.1 revision is copied back to
+ trunk.
+
+ If the user imports again to the same vendor branch, then CVS
+ creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
+ *without* counterparts in trunk (even though these revisions
+ effectively play the role of trunk revisions). So after we add
+ such revisions to the vendor branch, we also copy them back to
+ trunk in post-commits.
+
+ Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
+ True. Also, if there is a 1.2 revision, then set that revision to
+ depend on the last non-trunk default branch revision and possibly
+ adjust its type accordingly."""
+
+ for cvs_rev in ntdbr_cvs_revs:
+ cvs_rev.ntdbr = True
+
+ # Look for a 1.2 revision:
+ rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
+
+ rev_1_2 = self.get(rev_1_1.next_id)
+ if rev_1_2 is not None:
+ # Revision 1.2 logically follows the imported revisions, not
+ # 1.1. Accordingly, connect it to the last NTDBR and possibly
+ # change its type.
+ last_ntdbr = ntdbr_cvs_revs[-1]
+ rev_1_2.ntdbr_prev_id = last_ntdbr.id
+ last_ntdbr.ntdbr_next_id = rev_1_2.id
+ rev_1_2.__class__ = cvs_revision_type_map[(
+ isinstance(rev_1_2, CVSRevisionModification),
+ isinstance(last_ntdbr, CVSRevisionModification),
+ )]
+
+ def process_live_ntdb(self, vendor_lod_items):
+ """VENDOR_LOD_ITEMS is a live default branch; process it.
+
+ In this case, all revisions on the default branch are NTDBRs and
+ it is an error if there is also a '1.2' revision.
+
+ Return True iff this transformation really does something. Raise
+ a VendorBranchError if there is a '1.2' revision."""
+
+ rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
+ rev_1_2_id = rev_1_1.next_id
+ if rev_1_2_id is not None:
+ raise VendorBranchError(
+ 'File \'%s\' has default branch=%s but also a revision %s'
+ % (self.cvs_file.filename,
+ vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
+ )
+
+ ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
+
+ if ntdbr_cvs_revs:
+ self.adjust_ntdbrs(ntdbr_cvs_revs)
+ return True
+ else:
+ return False
+
+ def process_historical_ntdb(self, vendor_lod_items):
+ """There appears to have been a non-trunk default branch in the past.
+
+ There is currently no default branch, but the branch described by
+ file appears to have been imported. So our educated guess is that
+ all revisions on the '1.1.1' branch (described by
+ VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
+ were non-trunk default branch revisions.
+
+ Return True iff this transformation really does something.
+
+ This really only handles standard '1.1.1.*'-style vendor
+ revisions. One could conceivably have a file whose default branch
+ is 1.1.3 or whatever, or was that at some point in time, with
+ vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
+ branch gone now, we'd have no basis for assuming that the
+ non-standard vendor branch had ever been the default branch
+ anyway.
+
+ Note that we rely on comparisons between the timestamps of the
+ revisions on the vendor branch and that of revision 1.2, even
+ though the timestamps might be incorrect due to clock skew. We
+ could do a slightly better job if we used the changeset
+ timestamps, as it is possible that the dependencies that went into
+ determining those timestamps are more accurate. But that would
+ require an extra pass or two."""
+
+ rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
+ rev_1_2_id = rev_1_1.next_id
+
+ if rev_1_2_id is None:
+ rev_1_2_timestamp = None
+ else:
+ rev_1_2_timestamp = self[rev_1_2_id].timestamp
+
+ ntdbr_cvs_revs = []
+ for cvs_rev in vendor_lod_items.cvs_revisions:
+ if rev_1_2_timestamp is not None \
+ and cvs_rev.timestamp >= rev_1_2_timestamp:
+ # That's the end of the once-default branch.
+ break
+ ntdbr_cvs_revs.append(cvs_rev)
+
+ if ntdbr_cvs_revs:
+ self.adjust_ntdbrs(ntdbr_cvs_revs)
+ return True
+ else:
+ return False
+
+ def imported_remove_1_1(self, vendor_lod_items):
+ """This file was imported. Remove the 1.1 revision if possible.
+
+ VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
+ See adjust_ntdbrs() for more information."""
+
+ assert vendor_lod_items.cvs_revisions
+ cvs_rev = vendor_lod_items.cvs_revisions[0]
+
+ if isinstance(cvs_rev, CVSRevisionModification) \
+ and not cvs_rev.deltatext_exists:
+ cvs_branch = vendor_lod_items.cvs_branch
+ rev_1_1 = self[cvs_branch.source_id]
+ assert isinstance(rev_1_1, CVSRevision)
+ Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
+
+ # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
+ self._sever_branch(vendor_lod_items)
+
+ # Delete rev_1_1:
+ self.root_ids.remove(rev_1_1.id)
+ del self[rev_1_1.id]
+ rev_1_2_id = rev_1_1.next_id
+ if rev_1_2_id is not None:
+ rev_1_2 = self[rev_1_2_id]
+ rev_1_2.prev_id = None
+ self.root_ids.add(rev_1_2.id)
+
+ # Move any tags and branches from rev_1_1 to cvs_rev:
+ cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
+ for id in rev_1_1.tag_ids:
+ cvs_tag = self[id]
+ cvs_tag.source_lod = cvs_rev.lod
+ cvs_tag.source_id = cvs_rev.id
+ cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
+ for id in rev_1_1.branch_ids:
+ cvs_branch = self[id]
+ cvs_branch.source_lod = cvs_rev.lod
+ cvs_branch.source_id = cvs_rev.id
+ cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
+ for id in rev_1_1.branch_commit_ids:
+ cvs_rev2 = self[id]
+ cvs_rev2.prev_id = cvs_rev.id
+
+ def _delete_unneeded(self, cvs_item, metadata_db):
+ if isinstance(cvs_item, CVSRevisionNoop) \
+ and cvs_item.rev == '1.1' \
+ and isinstance(cvs_item.lod, Trunk) \
+ and len(cvs_item.branch_ids) >= 1 \
+ and self[cvs_item.branch_ids[0]].next_id is not None \
+ and not cvs_item.closed_symbols \
+ and not cvs_item.ntdbr:
+ # FIXME: This message will not match if the RCS file was renamed
+ # manually after it was created.
+ log_msg = metadata_db[cvs_item.metadata_id].log_msg
+ cvs_generated_msg = 'file %s was initially added on branch %s.\n' % (
+ self.cvs_file.basename,
+ self[cvs_item.branch_ids[0]].symbol.name,)
+ return log_msg == cvs_generated_msg
+ else:
+ return False
+
+ def remove_unneeded_deletes(self, metadata_db):
+ """Remove unneeded deletes for this file.
+
+ If a file is added on a branch, then a trunk revision is added at
+ the same time in the 'Dead' state. This revision doesn't do
+ anything useful, so delete it."""
+
+ for id in self.root_ids:
+ cvs_item = self[id]
+ if self._delete_unneeded(cvs_item, metadata_db):
+ Log().debug('Removing unnecessary delete %s' % (cvs_item,))
+
+ # Delete cvs_item:
+ self.root_ids.remove(cvs_item.id)
+ del self[id]
+ if cvs_item.next_id is not None:
+ cvs_rev_next = self[cvs_item.next_id]
+ cvs_rev_next.prev_id = None
+ self.root_ids.add(cvs_rev_next.id)
+
+ # Delete all CVSBranches rooted at this revision. If there is
+ # a CVSRevision on the branch, it should already be an add so
+ # it doesn't have to be changed.
+ for cvs_branch_id in cvs_item.branch_ids:
+ cvs_branch = self[cvs_branch_id]
+ del self[cvs_branch.id]
+
+ if cvs_branch.next_id is not None:
+ cvs_branch_next = self[cvs_branch.next_id]
+ cvs_branch_next.first_on_branch_id = None
+ cvs_branch_next.prev_id = None
+ self.root_ids.add(cvs_branch_next.id)
+
+ # Tagging a dead revision doesn't do anything, so remove any
+ # tags that were set on 1.1:
+ for cvs_tag_id in cvs_item.tag_ids:
+ del self[cvs_tag_id]
+
+ # This can only happen once per file, and we might have just
+ # changed self.root_ids, so break out of the loop:
+ break
+
+ def _initial_branch_delete_unneeded(self, lod_items, metadata_db):
+ """Return True iff the initial revision in LOD_ITEMS can be deleted."""
+
+ if lod_items.cvs_branch is not None \
+ and lod_items.cvs_branch.source_id is not None \
+ and len(lod_items.cvs_revisions) >= 2:
+ cvs_revision = lod_items.cvs_revisions[0]
+ cvs_rev_source = self[lod_items.cvs_branch.source_id]
+ if isinstance(cvs_revision, CVSRevisionAbsent) \
+ and not cvs_revision.tag_ids \
+ and not cvs_revision.branch_ids \
+ and abs(cvs_revision.timestamp - cvs_rev_source.timestamp) <= 2:
+ # FIXME: This message will not match if the RCS file was renamed
+ # manually after it was created.
+ log_msg = metadata_db[cvs_revision.metadata_id].log_msg
+ return bool(re.match(
+ r'file %s was added on branch .* on '
+ r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
+ '\n' % (re.escape(self.cvs_file.basename),),
+ log_msg,
+ ))
+ return False
+
+ def remove_initial_branch_deletes(self, metadata_db):
+ """If the first revision on a branch is an unnecessary delete, remove it.
+
+ If a file is added on a branch (whether or not it already existed
+ on trunk), then new versions of CVS add a first branch revision in
+ the 'dead' state (to indicate that the file did not exist on the
+ branch when the branch was created) followed by the second branch
+ revision, which is an add. When we encounter this situation, we
+ sever the branch from trunk and delete the first branch
+ revision."""
+
+ for lod_items in self.iter_lods():
+ if self._initial_branch_delete_unneeded(lod_items, metadata_db):
+ cvs_revision = lod_items.cvs_revisions[0]
+ Log().debug(
+ 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
+ )
+ cvs_branch = lod_items.cvs_branch
+ cvs_rev_source = self[cvs_branch.source_id]
+ cvs_rev_next = lod_items.cvs_revisions[1]
+
+ # Delete cvs_revision:
+ del self[cvs_revision.id]
+ cvs_rev_next.prev_id = None
+ self.root_ids.add(cvs_rev_next.id)
+ cvs_rev_source.branch_commit_ids.remove(cvs_revision.id)
+
+ # Delete the CVSBranch on which it is located:
+ del self[cvs_branch.id]
+ cvs_rev_source.branch_ids.remove(cvs_branch.id)
+
+ def _exclude_tag(self, cvs_tag):
+ """Exclude the specified CVS_TAG."""
+
+ del self[cvs_tag.id]
+
+ # A CVSTag is the successor of the CVSRevision that it
+ # sprouts from. Delete this tag from that revision's
+ # tag_ids:
+ self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
+
+ def _exclude_branch(self, lod_items):
+ """Exclude the branch described by LOD_ITEMS, including its revisions.
+
+ (Do not update the LOD_ITEMS instance itself.)
+
+ If the LOD starts with non-trunk default branch revisions, leave
+ the branch and the NTDB revisions in place, but delete any
+ subsequent revisions that are not NTDB revisions. In this case,
+ return True; otherwise return False"""
+
+ if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
+ for cvs_rev in lod_items.cvs_revisions:
+ if not cvs_rev.ntdbr:
+ # We've found the first non-NTDBR, and it's stored in cvs_rev:
+ break
+ else:
+ # There was no revision following the NTDBRs:
+ cvs_rev = None
+
+ if cvs_rev:
+ last_ntdbr = self[cvs_rev.prev_id]
+ last_ntdbr.next_id = None
+ while True:
+ del self[cvs_rev.id]
+ if cvs_rev.next_id is None:
+ break
+ cvs_rev = self[cvs_rev.next_id]
+
+ return True
+
+ else:
+ if lod_items.cvs_branch is not None:
+ # Delete the CVSBranch itself:
+ cvs_branch = lod_items.cvs_branch
+
+ del self[cvs_branch.id]
+
+ # A CVSBranch is the successor of the CVSRevision that it
+ # sprouts from. Delete this branch from that revision's
+ # branch_ids:
+ self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
+
+ if lod_items.cvs_revisions:
+ # The first CVSRevision on the branch has to be either detached
+ # from the revision from which the branch sprang, or removed
+ # from self.root_ids:
+ cvs_rev = lod_items.cvs_revisions[0]
+ if cvs_rev.prev_id is None:
+ self.root_ids.remove(cvs_rev.id)
+ else:
+ self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
+
+ for cvs_rev in lod_items.cvs_revisions:
+ del self[cvs_rev.id]
+
+ return False
+
+ def graft_ntdbr_to_trunk(self):
+ """Graft the non-trunk default branch revisions to trunk.
+
+ They should already be alone on a branch that may or may not have
+ a CVSBranch connecting it to trunk."""
+
+ for lod_items in self.iter_lods():
+ if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
+ assert lod_items.is_pure_ntdb()
+
+ first_rev = lod_items.cvs_revisions[0]
+ last_rev = lod_items.cvs_revisions[-1]
+ rev_1_1 = self.get(first_rev.prev_id)
+ rev_1_2 = self.get(last_rev.ntdbr_next_id)
+
+ if lod_items.cvs_branch is not None:
+ self._sever_branch(lod_items)
+
+ if rev_1_1 is not None:
+ rev_1_1.next_id = first_rev.id
+ first_rev.prev_id = rev_1_1.id
+
+ self.root_ids.remove(first_rev.id)
+
+ first_rev.__class__ = cvs_revision_type_map[(
+ isinstance(first_rev, CVSRevisionModification),
+ isinstance(rev_1_1, CVSRevisionModification),
+ )]
+
+ if rev_1_2 is not None:
+ rev_1_2.ntdbr_prev_id = None
+ last_rev.ntdbr_next_id = None
+
+ if rev_1_2.prev_id is None:
+ self.root_ids.remove(rev_1_2.id)
+
+ rev_1_2.prev_id = last_rev.id
+ last_rev.next_id = rev_1_2.id
+
+ # The effective_pred_id of rev_1_2 was not changed, so we
+ # don't have to change rev_1_2's type.
+
+ for cvs_rev in lod_items.cvs_revisions:
+ cvs_rev.ntdbr = False
+ cvs_rev.lod = self.trunk
+
+ for cvs_branch in lod_items.cvs_branches:
+ cvs_branch.source_lod = self.trunk
+
+ for cvs_tag in lod_items.cvs_tags:
+ cvs_tag.source_lod = self.trunk
+
+ return
+
+ def exclude_non_trunk(self):
+ """Delete all tags and branches."""
+
+ ntdbr_excluded = False
+ for lod_items in self.iter_lods():
+ for cvs_tag in lod_items.cvs_tags[:]:
+ self._exclude_tag(cvs_tag)
+ lod_items.cvs_tags.remove(cvs_tag)
+
+ if not isinstance(lod_items.lod, Trunk):
+ assert not lod_items.cvs_branches
+
+ ntdbr_excluded |= self._exclude_branch(lod_items)
+
+ if ntdbr_excluded:
+ self.graft_ntdbr_to_trunk()
+
+ def filter_excluded_symbols(self, revision_excluder):
+ """Delete any excluded symbols and references to them.
+
+ Call the revision_excluder's callback methods to let it know what
+ is being excluded."""
+
+ ntdbr_excluded = False
+ for lod_items in self.iter_lods():
+ # Delete any excluded tags:
+ for cvs_tag in lod_items.cvs_tags[:]:
+ if isinstance(cvs_tag.symbol, ExcludedSymbol):
+ self._exclude_tag(cvs_tag)
+
+ lod_items.cvs_tags.remove(cvs_tag)
+
+ # Delete the whole branch if it is to be excluded:
+ if isinstance(lod_items.lod, ExcludedSymbol):
+ # A symbol can only be excluded if no other symbols spring
+ # from it. This was already checked in CollateSymbolsPass, so
+ # these conditions should already be satisfied.
+ assert not list(lod_items.iter_blockers())
+
+ ntdbr_excluded |= self._exclude_branch(lod_items)
+
+ if ntdbr_excluded:
+ self.graft_ntdbr_to_trunk()
+
+ revision_excluder.process_file(self)
+
+ def _mutate_branch_to_tag(self, cvs_branch):
+ """Mutate the branch CVS_BRANCH into a tag."""
+
+ if cvs_branch.next_id is not None:
+ # This shouldn't happen because it was checked in
+ # CollateSymbolsPass:
+ raise FatalError('Attempt to exclude a branch with commits.')
+ cvs_tag = CVSTag(
+ cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
+ cvs_branch.source_lod, cvs_branch.source_id,
+ cvs_branch.revision_recorder_token,
+ )
+ self.add(cvs_tag)
+ cvs_revision = self[cvs_tag.source_id]
+ cvs_revision.branch_ids.remove(cvs_tag.id)
+ cvs_revision.tag_ids.append(cvs_tag.id)
+
+ def _mutate_tag_to_branch(self, cvs_tag):
+ """Mutate the tag into a branch."""
+
+ cvs_branch = CVSBranch(
+ cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
+ None, cvs_tag.source_lod, cvs_tag.source_id, None,
+ cvs_tag.revision_recorder_token,
+ )
+ self.add(cvs_branch)
+ cvs_revision = self[cvs_branch.source_id]
+ cvs_revision.tag_ids.remove(cvs_branch.id)
+ cvs_revision.branch_ids.append(cvs_branch.id)
+
+ def _mutate_symbol(self, cvs_symbol):
+ """Mutate CVS_SYMBOL if necessary."""
+
+ symbol = cvs_symbol.symbol
+ if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
+ self._mutate_branch_to_tag(cvs_symbol)
+ elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
+ self._mutate_tag_to_branch(cvs_symbol)
+
+ def mutate_symbols(self):
+ """Force symbols to be tags/branches based on self.symbol_db."""
+
+ for cvs_item in self.values():
+ if isinstance(cvs_item, CVSRevision):
+ # This CVSRevision may be affected by the mutation of any
+ # CVSSymbols that it references, but there is nothing to do
+ # here directly.
+ pass
+ elif isinstance(cvs_item, CVSSymbol):
+ self._mutate_symbol(cvs_item)
+ else:
+ raise RuntimeError('Unknown cvs item type')
+
+ def _adjust_tag_parent(self, cvs_tag):
+ """Adjust the parent of CVS_TAG if possible and preferred.
+
+ CVS_TAG is an instance of CVSTag. This method must be called in
+ leaf-to-trunk order."""
+
+ # The Symbol that cvs_tag would like to have as a parent:
+ preferred_parent = Ctx()._symbol_db.get_symbol(
+ cvs_tag.symbol.preferred_parent_id)
+
+ if cvs_tag.source_lod == preferred_parent:
+ # The preferred parent is already the parent.
+ return
+
+ # The CVSRevision that is its direct parent:
+ source = self[cvs_tag.source_id]
+ assert isinstance(source, CVSRevision)
+
+ if isinstance(preferred_parent, Trunk):
+ # It is not possible to graft *onto* Trunk:
+ return
+
+ # Try to find the preferred parent among the possible parents:
+ for branch_id in source.branch_ids:
+ if self[branch_id].symbol == preferred_parent:
+ # We found it!
+ break
+ else:
+ # The preferred parent is not a possible parent in this file.
+ return
+
+ parent = self[branch_id]
+ assert isinstance(parent, CVSBranch)
+
+ Log().debug('Grafting %s from %s (on %s) onto %s' % (
+ cvs_tag, source, source.lod, parent,))
+ # Switch parent:
+ source.tag_ids.remove(cvs_tag.id)
+ parent.tag_ids.append(cvs_tag.id)
+ cvs_tag.source_lod = parent.symbol
+ cvs_tag.source_id = parent.id
+
+ def _adjust_branch_parents(self, cvs_branch):
+ """Adjust the parent of CVS_BRANCH if possible and preferred.
+
+ CVS_BRANCH is an instance of CVSBranch. This method must be
+ called in leaf-to-trunk order."""
+
+ # The Symbol that cvs_branch would like to have as a parent:
+ preferred_parent = Ctx()._symbol_db.get_symbol(
+ cvs_branch.symbol.preferred_parent_id)
+
+ if cvs_branch.source_lod == preferred_parent:
+ # The preferred parent is already the parent.
+ return
+
+ # The CVSRevision that is its direct parent:
+ source = self[cvs_branch.source_id]
+ # This is always a CVSRevision because we haven't adjusted it yet:
+ assert isinstance(source, CVSRevision)
+
+ if isinstance(preferred_parent, Trunk):
+ # It is not possible to graft *onto* Trunk:
+ return
+
+ # Try to find the preferred parent among the possible parents:
+ for branch_id in source.branch_ids:
+ possible_parent = self[branch_id]
+ if possible_parent.symbol == preferred_parent:
+ # We found it!
+ break
+ elif possible_parent.symbol == cvs_branch.symbol:
+ # Only branches that precede the branch to be adjusted are
+ # considered possible parents. Leave parentage unchanged:
+ return
+ else:
+ # This point should never be reached.
+ raise InternalError(
+ 'Possible parent search did not terminate as expected')
+
+ parent = possible_parent
+ assert isinstance(parent, CVSBranch)
+
+ Log().debug('Grafting %s from %s (on %s) onto %s' % (
+ cvs_branch, source, source.lod, parent,))
+ # Switch parent:
+ source.branch_ids.remove(cvs_branch.id)
+ parent.branch_ids.append(cvs_branch.id)
+ cvs_branch.source_lod = parent.symbol
+ cvs_branch.source_id = parent.id
+
+ def adjust_parents(self):
+ """Adjust the parents of symbols to their preferred parents.
+
+ If a CVSSymbol has a preferred parent that is different than its
+ current parent, and if the preferred parent is an allowed parent
+ of the CVSSymbol in this file, then graft the CVSSymbol onto its
+ preferred parent."""
+
+ for lod_items in self.iter_lods():
+ for cvs_tag in lod_items.cvs_tags:
+ self._adjust_tag_parent(cvs_tag)
+
+ for cvs_branch in lod_items.cvs_branches:
+ self._adjust_branch_parents(cvs_branch)
+
+ def _get_revision_source(self, cvs_symbol):
+ """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
+
+ while True:
+ cvs_item = self[cvs_symbol.source_id]
+ if isinstance(cvs_item, CVSRevision):
+ return cvs_item
+ else:
+ cvs_symbol = cvs_item
+
+ def refine_symbols(self):
+ """Refine the types of the CVSSymbols in this file.
+
+ Adjust the symbol types based on whether the source exists:
+ CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
+
+ for lod_items in self.iter_lods():
+ for cvs_tag in lod_items.cvs_tags:
+ source = self._get_revision_source(cvs_tag)
+ cvs_tag.__class__ = cvs_tag_type_map[
+ isinstance(source, CVSRevisionModification)
+ ]
+
+ for cvs_branch in lod_items.cvs_branches:
+ source = self._get_revision_source(cvs_branch)
+ cvs_branch.__class__ = cvs_branch_type_map[
+ isinstance(source, CVSRevisionModification)
+ ]
+
+ def record_opened_symbols(self):
+ """Set CVSRevision.opened_symbols for the surviving revisions."""
+
+ for cvs_item in self.values():
+ if isinstance(cvs_item, (CVSRevision, CVSBranch)):
+ cvs_item.opened_symbols = []
+ for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
+ cvs_symbol_opened = self[cvs_symbol_opened_id]
+ cvs_item.opened_symbols.append(
+ (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
+ )
+
+ def record_closed_symbols(self):
+ """Set CVSRevision.closed_symbols for the surviving revisions.
+
+ A CVSRevision closes the symbols that were opened by the CVSItems
+ that the CVSRevision closes. Got it?
+
+ This method must be called after record_opened_symbols()."""
+
+ for cvs_item in self.values():
+ if isinstance(cvs_item, CVSRevision):
+ cvs_item.closed_symbols = []
+ for cvs_item_closed_id in cvs_item.get_ids_closed():
+ cvs_item_closed = self[cvs_item_closed_id]
+ cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)
+
+
diff --git a/cvs2svn_lib/cvs_item.py b/cvs2svn_lib/cvs_item.py
new file mode 100644
index 0000000..5c01a24
--- /dev/null
+++ b/cvs2svn_lib/cvs_item.py
@@ -0,0 +1,901 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to store atomic CVS events.
+
+A CVSItem is a single event, pertaining to a single file, that can be
+determined to have occured based on the information in the CVS
+repository.
+
+The inheritance tree is as follows:
+
+CVSItem
+|
++--CVSRevision
+| |
+| +--CVSRevisionModification (* -> 'Exp')
+| | |
+| | +--CVSRevisionAdd ('dead' -> 'Exp')
+| | |
+| | +--CVSRevisionChange ('Exp' -> 'Exp')
+| |
+| +--CVSRevisionAbsent (* -> 'dead')
+| |
+| +--CVSRevisionDelete ('Exp' -> 'dead')
+| |
+| +--CVSRevisionNoop ('dead' -> 'dead')
+|
++--CVSSymbol
+ |
+ +--CVSBranch
+ | |
+ | +--CVSBranchNoop
+ |
+ +--CVSTag
+ |
+ +--CVSTagNoop
+
+"""
+
+
+from cvs2svn_lib.context import Ctx
+
+
+class CVSItem(object):
+ __slots__ = [
+ 'id',
+ 'cvs_file',
+ 'revision_recorder_token',
+ ]
+
+ def __init__(self, id, cvs_file, revision_recorder_token):
+ self.id = id
+ self.cvs_file = cvs_file
+ self.revision_recorder_token = revision_recorder_token
+
+ def __eq__(self, other):
+ return self.id == other.id
+
+ def __cmp__(self, other):
+ return cmp(self.id, other.id)
+
+ def __hash__(self):
+ return self.id
+
+ def __getstate__(self):
+ raise NotImplementedError()
+
+ def __setstate__(self, data):
+ raise NotImplementedError()
+
+ def get_svn_path(self):
+ """Return the SVN path associated with this CVSItem."""
+
+ raise NotImplementedError()
+
+ def get_pred_ids(self):
+ """Return the CVSItem.ids of direct predecessors of SELF.
+
+ A predecessor is defined to be a CVSItem that has to have been
+ committed before this one."""
+
+ raise NotImplementedError()
+
+ def get_succ_ids(self):
+ """Return the CVSItem.ids of direct successors of SELF.
+
+ A direct successor is defined to be a CVSItem that has this one as
+ a direct predecessor."""
+
+ raise NotImplementedError()
+
+ def get_cvs_symbol_ids_opened(self):
+ """Return an iterable over the ids of CVSSymbols that this item opens.
+
+ The definition of 'open' is that the path corresponding to this
+ CVSItem will have to be copied when filling the corresponding
+ symbol."""
+
+ raise NotImplementedError()
+
+ def get_ids_closed(self):
+ """Return an iterable over the CVSItem.ids of CVSItems closed by this one.
+
+ A CVSItem A is said to close a CVSItem B if committing A causes B
+ to be overwritten or deleted (no longer available) in the SVN
+ repository. This is interesting because it sets the last SVN
+ revision number from which the contents of B can be copied (for
+ example, to fill a symbol). See the concrete implementations of
+ this method for the exact rules about what closes what."""
+
+ raise NotImplementedError()
+
+ def check_links(self, cvs_file_items):
+ """Check for consistency of links to other CVSItems.
+
+ Other items can be looked up in CVS_FILE_ITEMS, which is an
+ instance of CVSFileItems. Raise an AssertionError if there is a
+ problem."""
+
+ raise NotImplementedError()
+
+ def __repr__(self):
+ return '%s(%s)' % (self.__class__.__name__, self,)
+
+
+class CVSRevision(CVSItem):
+ """Information about a single CVS revision.
+
+ A CVSRevision holds the information known about a single version of
+ a single file.
+
+ Members:
+
+ id -- (int) unique ID for this revision.
+
+ cvs_file -- (CVSFile) CVSFile affected by this revision.
+
+ timestamp -- (int) date stamp for this revision.
+
+ metadata_id -- (int) id of metadata instance record in
+ metadata_db.
+
+ prev_id -- (int) id of the logically previous CVSRevision, either
+ on the same or the source branch (or None).
+
+ next_id -- (int) id of the logically next CVSRevision (or None).
+
+ rev -- (string) the CVS revision number, e.g., '1.3'.
+
+ deltatext_exists -- (bool) true iff this revision's deltatext is
+ not empty.
+
+ lod -- (LineOfDevelopment) LOD on which this revision occurred.
+
+ first_on_branch_id -- (int or None) if this revision is the first
+ on its branch, the cvs_branch_id of that branch; else, None.
+
+ ntdbr -- (bool) true iff this is a non-trunk default branch
+ revision.
+
+ ntdbr_prev_id -- (int or None) Iff this is the 1.2 revision after
+ the end of a default branch, the id of the last rev on the
+ default branch; else, None.
+
+ ntdbr_next_id -- (int or None) Iff this is the last revision on a
+ default branch preceding a 1.2 rev, the id of the 1.2
+ revision; else, None.
+
+ tag_ids -- (list of int) ids of all CVSTags rooted at this
+ CVSRevision.
+
+ branch_ids -- (list of int) ids of all CVSBranches rooted at this
+ CVSRevision.
+
+ branch_commit_ids -- (list of int) ids of first CVSRevision
+ committed on each branch rooted in this revision (for branches
+ with commits).
+
+ opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+ tuples) information about all CVSSymbols opened by this
+ revision. This member is set in FilterSymbolsPass; before
+ then, it is None.
+
+ closed_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+ tuples) information about all CVSSymbols closed by this
+ revision. This member is set in FilterSymbolsPass; before
+ then, it is None.
+
+ revision_recorder_token -- (arbitrary) a token that can be set by
+ RevisionRecorder for the later use of RevisionReader.
+
+ """
+
+ __slots__ = [
+ 'timestamp',
+ 'metadata_id',
+ 'prev_id',
+ 'next_id',
+ 'rev',
+ 'deltatext_exists',
+ 'lod',
+ 'first_on_branch_id',
+ 'ntdbr',
+ 'ntdbr_prev_id',
+ 'ntdbr_next_id',
+ 'tag_ids',
+ 'branch_ids',
+ 'branch_commit_ids',
+ 'opened_symbols',
+ 'closed_symbols',
+ ]
+
+ def __init__(self,
+ id, cvs_file,
+ timestamp, metadata_id,
+ prev_id, next_id,
+ rev, deltatext_exists,
+ lod, first_on_branch_id, ntdbr,
+ ntdbr_prev_id, ntdbr_next_id,
+ tag_ids, branch_ids, branch_commit_ids,
+ revision_recorder_token):
+ """Initialize a new CVSRevision object."""
+
+ CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
+
+ self.timestamp = timestamp
+ self.metadata_id = metadata_id
+ self.prev_id = prev_id
+ self.next_id = next_id
+ self.rev = rev
+ self.deltatext_exists = deltatext_exists
+ self.lod = lod
+ self.first_on_branch_id = first_on_branch_id
+ self.ntdbr = ntdbr
+ self.ntdbr_prev_id = ntdbr_prev_id
+ self.ntdbr_next_id = ntdbr_next_id
+ self.tag_ids = tag_ids
+ self.branch_ids = branch_ids
+ self.branch_commit_ids = branch_commit_ids
+ self.opened_symbols = None
+ self.closed_symbols = None
+
+ def _get_cvs_path(self):
+ return self.cvs_file.cvs_path
+
+ cvs_path = property(_get_cvs_path)
+
+ def get_svn_path(self):
+ return self.lod.get_path(self.cvs_file.cvs_path)
+
+ def __getstate__(self):
+ """Return the contents of this instance, for pickling.
+
+ The presence of this method improves the space efficiency of
+ pickling CVSRevision instances."""
+
+ return (
+ self.id, self.cvs_file.id,
+ self.timestamp, self.metadata_id,
+ self.prev_id, self.next_id,
+ self.rev,
+ self.deltatext_exists,
+ self.lod.id,
+ self.first_on_branch_id,
+ self.ntdbr,
+ self.ntdbr_prev_id, self.ntdbr_next_id,
+ self.tag_ids, self.branch_ids, self.branch_commit_ids,
+ self.opened_symbols, self.closed_symbols,
+ self.revision_recorder_token,
+ )
+
+ def __setstate__(self, data):
+ (self.id, cvs_file_id,
+ self.timestamp, self.metadata_id,
+ self.prev_id, self.next_id,
+ self.rev,
+ self.deltatext_exists,
+ lod_id,
+ self.first_on_branch_id,
+ self.ntdbr,
+ self.ntdbr_prev_id, self.ntdbr_next_id,
+ self.tag_ids, self.branch_ids, self.branch_commit_ids,
+ self.opened_symbols, self.closed_symbols,
+ self.revision_recorder_token) = data
+ self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+ self.lod = Ctx()._symbol_db.get_symbol(lod_id)
+
+ def get_effective_prev_id(self):
+ """Return the ID of the effective predecessor of this item.
+
+ This is the ID of the item that determines whether the object
+ existed before this CVSRevision."""
+
+ if self.ntdbr_prev_id is not None:
+ return self.ntdbr_prev_id
+ else:
+ return self.prev_id
+
+ def get_symbol_pred_ids(self):
+ """Return the pred_ids for symbol predecessors."""
+
+ retval = set()
+ if self.first_on_branch_id is not None:
+ retval.add(self.first_on_branch_id)
+ return retval
+
+ def get_pred_ids(self):
+ retval = self.get_symbol_pred_ids()
+ if self.prev_id is not None:
+ retval.add(self.prev_id)
+ if self.ntdbr_prev_id is not None:
+ retval.add(self.ntdbr_prev_id)
+ return retval
+
+ def get_symbol_succ_ids(self):
+ """Return the succ_ids for symbol successors."""
+
+ retval = set()
+ for id in self.branch_ids + self.tag_ids:
+ retval.add(id)
+ return retval
+
+ def get_succ_ids(self):
+ retval = self.get_symbol_succ_ids()
+ if self.next_id is not None:
+ retval.add(self.next_id)
+ if self.ntdbr_next_id is not None:
+ retval.add(self.ntdbr_next_id)
+ for id in self.branch_commit_ids:
+ retval.add(id)
+ return retval
+
+ def get_ids_closed(self):
+ # Special handling is needed in the case of non-trunk default
+ # branches. The following cases have to be handled:
+ #
+ # Case 1: Revision 1.1 not deleted; revision 1.2 exists:
+ #
+ # 1.1 -----------------> 1.2
+ # \ ^ ^ /
+ # \ | | /
+ # 1.1.1.1 -> 1.1.1.2
+ #
+ # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
+ # on trunk)
+ #
+ # * 1.1.1.2 closes 1.1.1.1
+ #
+ # * 1.2 doesn't close anything (the post-commit from 1.1.1.1
+ # already closed 1.1, and no symbols can sprout from the
+ # post-commit of 1.1.1.2)
+ #
+ # Case 2: Revision 1.1 not deleted; revision 1.2 does not exist:
+ #
+ # 1.1 ..................
+ # \ ^ ^
+ # \ | |
+ # 1.1.1.1 -> 1.1.1.2
+ #
+ # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
+ # on trunk)
+ #
+ # * 1.1.1.2 closes 1.1.1.1
+ #
+ # Case 3: Revision 1.1 deleted; revision 1.2 exists:
+ #
+ # ............... 1.2
+ # ^ ^ /
+ # | | /
+ # 1.1.1.1 -> 1.1.1.2
+ #
+ # * 1.1.1.1 doesn't close anything
+ #
+ # * 1.1.1.2 closes 1.1.1.1
+ #
+ # * 1.2 doesn't close anything (no symbols can sprout from the
+ # post-commit of 1.1.1.2)
+ #
+ # Case 4: Revision 1.1 deleted; revision 1.2 doesn't exist:
+ #
+ # ...............
+ # ^ ^
+ # | |
+ # 1.1.1.1 -> 1.1.1.2
+ #
+ # * 1.1.1.1 doesn't close anything
+ #
+ # * 1.1.1.2 closes 1.1.1.1
+
+ if self.first_on_branch_id is not None:
+ # The first CVSRevision on a branch is considered to close the
+ # branch:
+ yield self.first_on_branch_id
+ if self.ntdbr:
+ # If the 1.1 revision was not deleted, the 1.1.1.1 revision is
+ # considered to close it:
+ yield self.prev_id
+ elif self.ntdbr_prev_id is not None:
+ # This is the special case of a 1.2 revision that follows a
+ # non-trunk default branch. Either 1.1 was deleted or the first
+ # default branch revision closed 1.1, so we don't have to close
+ # 1.1. Technically, we close the revision on trunk that was
+ # copied from the last non-trunk default branch revision in a
+ # post-commit, but for now no symbols can sprout from that
+ # revision so we ignore that one, too.
+ pass
+ elif self.prev_id is not None:
+ # Since this CVSRevision is not the first on a branch, its
+ # prev_id is on the same LOD and this item closes that one:
+ yield self.prev_id
+
+ def _get_branch_ids_recursively(self, cvs_file_items):
+ """Return the set of all CVSBranches that sprout from this CVSRevision.
+
+ After parent adjustment in FilterSymbolsPass, it is possible for
+ branches to sprout directly from a CVSRevision, or from those
+ branches, etc. Return all branches that sprout from this
+ CVSRevision, directly or indirectly."""
+
+ retval = set()
+ branch_ids_to_process = list(self.branch_ids)
+ while branch_ids_to_process:
+ branch = cvs_file_items[branch_ids_to_process.pop()]
+ retval.add(branch)
+ branch_ids_to_process.extend(branch.branch_ids)
+
+ return retval
+
+ def check_links(self, cvs_file_items):
+ assert self.cvs_file == cvs_file_items.cvs_file
+
+ prev = cvs_file_items.get(self.prev_id)
+ next = cvs_file_items.get(self.next_id)
+ first_on_branch = cvs_file_items.get(self.first_on_branch_id)
+ ntdbr_next = cvs_file_items.get(self.ntdbr_next_id)
+ ntdbr_prev = cvs_file_items.get(self.ntdbr_prev_id)
+ effective_prev = cvs_file_items.get(self.get_effective_prev_id())
+
+ if prev is None:
+ # This is the first CVSRevision on trunk or a detached branch:
+ assert self.id in cvs_file_items.root_ids
+ elif first_on_branch is not None:
+ # This is the first CVSRevision on an existing branch:
+ assert isinstance(first_on_branch, CVSBranch)
+ assert first_on_branch.symbol == self.lod
+ assert first_on_branch.next_id == self.id
+ cvs_revision_source = first_on_branch.get_cvs_revision_source(
+ cvs_file_items
+ )
+ assert cvs_revision_source.id == prev.id
+ assert self.id in prev.branch_commit_ids
+ else:
+ # This revision follows another revision on the same LOD:
+ assert prev.next_id == self.id
+ assert prev.lod == self.lod
+
+ if next is not None:
+ assert next.prev_id == self.id
+ assert next.lod == self.lod
+
+ if ntdbr_next is not None:
+ assert self.ntdbr
+ assert ntdbr_next.ntdbr_prev_id == self.id
+
+ if ntdbr_prev is not None:
+ assert ntdbr_prev.ntdbr_next_id == self.id
+
+ for tag_id in self.tag_ids:
+ tag = cvs_file_items[tag_id]
+ assert isinstance(tag, CVSTag)
+ assert tag.source_id == self.id
+ assert tag.source_lod == self.lod
+
+ for branch_id in self.branch_ids:
+ branch = cvs_file_items[branch_id]
+ assert isinstance(branch, CVSBranch)
+ assert branch.source_id == self.id
+ assert branch.source_lod == self.lod
+
+ branch_commit_ids = list(self.branch_commit_ids)
+
+ for branch in self._get_branch_ids_recursively(cvs_file_items):
+ assert isinstance(branch, CVSBranch)
+ if branch.next_id is not None:
+ assert branch.next_id in branch_commit_ids
+ branch_commit_ids.remove(branch.next_id)
+
+ assert not branch_commit_ids
+
+ assert self.__class__ == cvs_revision_type_map[(
+ isinstance(self, CVSRevisionModification),
+ effective_prev is not None
+ and isinstance(effective_prev, CVSRevisionModification),
+ )]
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s:%s<%x>' % (self.cvs_file, self.rev, self.id,)
+
+
+class CVSRevisionModification(CVSRevision):
+ """Base class for CVSRevisionAdd or CVSRevisionChange."""
+
+ __slots__ = []
+
+ def get_cvs_symbol_ids_opened(self):
+ return self.tag_ids + self.branch_ids
+
+
+class CVSRevisionAdd(CVSRevisionModification):
+ """A CVSRevision that creates a file that previously didn't exist.
+
+ The file might have never existed on this LOD, or it might have
+ existed previously but been deleted by a CVSRevisionDelete."""
+
+ __slots__ = []
+
+
+class CVSRevisionChange(CVSRevisionModification):
+ """A CVSRevision that modifies a file that already existed on this LOD."""
+
+ __slots__ = []
+
+
+class CVSRevisionAbsent(CVSRevision):
+ """A CVSRevision for which the file is nonexistent on this LOD."""
+
+ __slots__ = []
+
+ def get_cvs_symbol_ids_opened(self):
+ return []
+
+
+class CVSRevisionDelete(CVSRevisionAbsent):
+ """A CVSRevision that deletes a file that existed on this LOD."""
+
+ __slots__ = []
+
+
+class CVSRevisionNoop(CVSRevisionAbsent):
+ """A CVSRevision that doesn't do anything.
+
+ The revision was 'dead' and the predecessor either didn't exist or
+ was also 'dead'. These revisions can't necessarily be thrown away
+ because (1) they impose ordering constraints on other items; (2)
+ they might have a nontrivial log message that we don't want to throw
+ away."""
+
+ __slots__ = []
+
+
+# A map
+#
+# {(nondead(cvs_rev), nondead(prev_cvs_rev)) : cvs_revision_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_REVISION_SUBTYPE is the subtype of CVSRevision that
+# should be used for CVS_REV.
+cvs_revision_type_map = {
+ (False, False) : CVSRevisionNoop,
+ (False, True) : CVSRevisionDelete,
+ (True, False) : CVSRevisionAdd,
+ (True, True) : CVSRevisionChange,
+ }
+
+
+class CVSSymbol(CVSItem):
+ """Represent a symbol on a particular CVSFile.
+
+ This is the base class for CVSBranch and CVSTag.
+
+ Members:
+
+ id -- (int) unique ID for this item.
+
+ cvs_file -- (CVSFile) CVSFile affected by this item.
+
+ symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+ source_lod -- (LineOfDevelopment) the LOD that is the source for
+ this CVSSymbol.
+
+ source_id -- (int) the ID of the CVSRevision or CVSBranch that is
+ the source for this item. This initially points to a
+ CVSRevision, but can be changed to a CVSBranch via parent
+ adjustment in FilterSymbolsPass.
+
+ revision_recorder_token -- (arbitrary) a token that can be set by
+ RevisionRecorder for the later use of RevisionReader.
+
+ """
+
+ __slots__ = [
+ 'symbol',
+ 'source_lod',
+ 'source_id',
+ ]
+
+ def __init__(
+ self, id, cvs_file, symbol, source_lod, source_id,
+ revision_recorder_token
+ ):
+ """Initialize a CVSSymbol object."""
+
+ CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
+
+ self.symbol = symbol
+ self.source_lod = source_lod
+ self.source_id = source_id
+
+ def get_cvs_revision_source(self, cvs_file_items):
+ """Return the CVSRevision that is the ultimate source of this symbol."""
+
+ cvs_source = cvs_file_items[self.source_id]
+ while not isinstance(cvs_source, CVSRevision):
+ cvs_source = cvs_file_items[cvs_source.source_id]
+
+ return cvs_source
+
+ def get_svn_path(self):
+ return self.symbol.get_path(self.cvs_file.cvs_path)
+
+ def get_ids_closed(self):
+ # A Symbol does not close any other CVSItems:
+ return []
+
+
+class CVSBranch(CVSSymbol):
+ """Represent the creation of a branch in a particular CVSFile.
+
+ Members:
+
+ id -- (int) unique ID for this item.
+
+ cvs_file -- (CVSFile) CVSFile affected by this item.
+
+ symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+ branch_number -- (string) the number of this branch (e.g.,
+ '1.3.4'), or None if this is a converted CVSTag.
+
+ source_lod -- (LineOfDevelopment) the LOD that is the source for
+ this CVSSymbol.
+
+ source_id -- (int) id of the CVSRevision or CVSBranch from which
+ this branch sprouts. This initially points to a CVSRevision,
+ but can be changed to a CVSBranch via parent adjustment in
+ FilterSymbolsPass.
+
+ next_id -- (int or None) id of first CVSRevision on this branch,
+ if any; else, None.
+
+ tag_ids -- (list of int) ids of all CVSTags rooted at this
+ CVSBranch (can be set due to parent adjustment in
+ FilterSymbolsPass).
+
+ branch_ids -- (list of int) ids of all CVSBranches rooted at this
+ CVSBranch (can be set due to parent adjustment in
+ FilterSymbolsPass).
+
+ opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+ tuples) information about all CVSSymbols opened by this
+ branch. This member is set in FilterSymbolsPass; before then,
+ it is None.
+
+ revision_recorder_token -- (arbitrary) a token that can be set by
+ RevisionRecorder for the later use of RevisionReader.
+
+ """
+
+ __slots__ = [
+ 'branch_number',
+ 'next_id',
+ 'tag_ids',
+ 'branch_ids',
+ 'opened_symbols',
+ ]
+
+ def __init__(
+ self, id, cvs_file, symbol, branch_number,
+ source_lod, source_id, next_id,
+ revision_recorder_token,
+ ):
+ """Initialize a CVSBranch."""
+
+ CVSSymbol.__init__(
+ self, id, cvs_file, symbol, source_lod, source_id,
+ revision_recorder_token
+ )
+ self.branch_number = branch_number
+ self.next_id = next_id
+ self.tag_ids = []
+ self.branch_ids = []
+ self.opened_symbols = None
+
+ def __getstate__(self):
+ return (
+ self.id, self.cvs_file.id,
+ self.symbol.id, self.branch_number,
+ self.source_lod.id, self.source_id, self.next_id,
+ self.tag_ids, self.branch_ids,
+ self.opened_symbols,
+ self.revision_recorder_token,
+ )
+
+ def __setstate__(self, data):
+ (
+ self.id, cvs_file_id,
+ symbol_id, self.branch_number,
+ source_lod_id, self.source_id, self.next_id,
+ self.tag_ids, self.branch_ids,
+ self.opened_symbols,
+ self.revision_recorder_token,
+ ) = data
+ self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+ self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+ self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
+
+ def get_pred_ids(self):
+ return set([self.source_id])
+
+ def get_succ_ids(self):
+ retval = set(self.tag_ids + self.branch_ids)
+ if self.next_id is not None:
+ retval.add(self.next_id)
+ return retval
+
+ def get_cvs_symbol_ids_opened(self):
+ return self.tag_ids + self.branch_ids
+
+ def check_links(self, cvs_file_items):
+ source = cvs_file_items.get(self.source_id)
+ next = cvs_file_items.get(self.next_id)
+
+ assert self.id in source.branch_ids
+ if isinstance(source, CVSRevision):
+ assert self.source_lod == source.lod
+ elif isinstance(source, CVSBranch):
+ assert self.source_lod == source.symbol
+ else:
+ assert False
+
+ if next is not None:
+ assert isinstance(next, CVSRevision)
+ assert next.lod == self.symbol
+ assert next.first_on_branch_id == self.id
+
+ for tag_id in self.tag_ids:
+ tag = cvs_file_items[tag_id]
+ assert isinstance(tag, CVSTag)
+ assert tag.source_id == self.id
+ assert tag.source_lod == self.symbol
+
+ for branch_id in self.branch_ids:
+ branch = cvs_file_items[branch_id]
+ assert isinstance(branch, CVSBranch)
+ assert branch.source_id == self.id
+ assert branch.source_lod == self.symbol
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s:%s:%s<%x>' \
+ % (self.cvs_file, self.symbol, self.branch_number, self.id,)
+
+
+class CVSBranchNoop(CVSBranch):
+ """A CVSBranch whose source is a CVSRevisionAbsent."""
+
+ __slots__ = []
+
+ def get_cvs_symbol_ids_opened(self):
+ return []
+
+
+# A map
+#
+# {nondead(source_cvs_rev) : cvs_branch_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_BRANCH_SUBTYPE is the subtype of CVSBranch that
+# should be used.
+cvs_branch_type_map = {
+ False : CVSBranchNoop,
+ True : CVSBranch,
+ }
+
+
+class CVSTag(CVSSymbol):
+ """Represent the creation of a tag on a particular CVSFile.
+
+ Members:
+
+ id -- (int) unique ID for this item.
+
+ cvs_file -- (CVSFile) CVSFile affected by this item.
+
+ symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+ source_lod -- (LineOfDevelopment) the LOD that is the source for
+ this CVSSymbol.
+
+ source_id -- (int) the ID of the CVSRevision or CVSBranch that is
+ being tagged. This initially points to a CVSRevision, but can
+ be changed to a CVSBranch via parent adjustment in
+ FilterSymbolsPass.
+
+ revision_recorder_token -- (arbitrary) a token that can be set by
+ RevisionRecorder for the later use of RevisionReader.
+
+ """
+
+ __slots__ = []
+
+ def __init__(
+ self, id, cvs_file, symbol, source_lod, source_id,
+ revision_recorder_token,
+ ):
+ """Initialize a CVSTag."""
+
+ CVSSymbol.__init__(
+ self, id, cvs_file, symbol, source_lod, source_id,
+ revision_recorder_token,
+ )
+
+ def __getstate__(self):
+ return (
+ self.id, self.cvs_file.id, self.symbol.id,
+ self.source_lod.id, self.source_id,
+ self.revision_recorder_token,
+ )
+
+ def __setstate__(self, data):
+ (
+ self.id, cvs_file_id, symbol_id, source_lod_id, self.source_id,
+ self.revision_recorder_token,
+ ) = data
+ self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+ self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+ self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
+
+ def get_pred_ids(self):
+ return set([self.source_id])
+
+ def get_succ_ids(self):
+ return set()
+
+ def get_cvs_symbol_ids_opened(self):
+ return []
+
+ def check_links(self, cvs_file_items):
+ source = cvs_file_items.get(self.source_id)
+
+ assert self.id in source.tag_ids
+ if isinstance(source, CVSRevision):
+ assert self.source_lod == source.lod
+ elif isinstance(source, CVSBranch):
+ assert self.source_lod == source.symbol
+ else:
+ assert False
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s:%s<%x>' \
+ % (self.cvs_file, self.symbol, self.id,)
+
+
+class CVSTagNoop(CVSTag):
+ """A CVSTag whose source is a CVSRevisionAbsent."""
+
+ __slots__ = []
+
+
+# A map
+#
+# {nondead(source_cvs_rev) : cvs_tag_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_TAG_SUBTYPE is the subtype of CVSTag that should be
+# used.
+cvs_tag_type_map = {
+ False : CVSTagNoop,
+ True : CVSTag,
+ }
+
+
diff --git a/cvs2svn_lib/cvs_item_database.py b/cvs2svn_lib/cvs_item_database.py
new file mode 100644
index 0000000..f072252
--- /dev/null
+++ b/cvs2svn_lib/cvs_item_database.py
@@ -0,0 +1,248 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a database that can store arbitrary CVSItems."""
+
+
+import re
+import cPickle
+
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSBranchNoop
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import CVSTagNoop
+from cvs2svn_lib.cvs_file_items import CVSFileItems
+from cvs2svn_lib.serializer import Serializer
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.database import IndexedStore
+
+
+cvs_item_primer = (
+ CVSRevisionAdd, CVSRevisionChange,
+ CVSRevisionDelete, CVSRevisionNoop,
+ CVSBranch, CVSBranchNoop,
+ CVSTag, CVSTagNoop,
+ )
+
+
+class NewCVSItemStore:
+ """A file of sequential CVSItems, grouped by CVSFile.
+
+ The file consists of a sequence of pickles. The zeroth one is a
+ Serializer as described in the serializer module. Subsequent ones
+ are pickled lists of CVSItems, each list containing all of the
+ CVSItems for a single file.
+
+ We don't use a single pickler for all items because the memo would
+ grow too large."""
+
+ def __init__(self, filename):
+ """Initialize an instance, creating the file and writing the primer."""
+
+ self.f = open(filename, 'wb')
+
+ self.serializer = PrimedPickleSerializer(
+ cvs_item_primer + (CVSFileItems,)
+ )
+ cPickle.dump(self.serializer, self.f, -1)
+
+ def add(self, cvs_file_items):
+ """Write CVS_FILE_ITEMS into the database."""
+
+ self.serializer.dumpf(self.f, cvs_file_items)
+
+ def close(self):
+ self.f.close()
+ self.f = None
+
+
+class OldCVSItemStore:
+ """Read a file created by NewCVSItemStore.
+
+ The file must be read sequentially, one CVSFileItems instance at a
+ time."""
+
+ def __init__(self, filename):
+ self.f = open(filename, 'rb')
+
+ # Read the memo from the first pickle:
+ self.serializer = cPickle.load(self.f)
+
+ def iter_cvs_file_items(self):
+ """Iterate through the CVSFileItems instances, one file at a time.
+
+ Each time yield a CVSFileItems instance for one CVSFile."""
+
+ try:
+ while True:
+ yield self.serializer.loadf(self.f)
+ except EOFError:
+ return
+
+ def close(self):
+ self.f.close()
+ self.f = None
+
+
+class LinewiseSerializer(Serializer):
+ """A serializer that writes exactly one line for each object.
+
+ The actual serialization is done by a wrapped serializer; this class
+ only escapes any newlines in the serialized data then appends a
+ single newline."""
+
+ def __init__(self, wrapee):
+ self.wrapee = wrapee
+
+ @staticmethod
+ def _encode_newlines(s):
+ """Return s with newlines and backslashes encoded.
+
+ The string is returned with the following character transformations:
+
+ LF -> \n
+ CR -> \r
+ ^Z -> \z (needed for Windows)
+ \ -> \\
+
+ """
+
+ return s.replace('\\', '\\\\') \
+ .replace('\n', '\\n') \
+ .replace('\r', '\\r') \
+ .replace('\x1a', '\\z')
+
+ _escape_re = re.compile(r'(\\\\|\\n|\\r|\\z)')
+ _subst = {'\\n' : '\n', '\\r' : '\r', '\\z' : '\x1a', '\\\\' : '\\'}
+
+ @staticmethod
+ def _decode_newlines(s):
+ """Return s with newlines and backslashes decoded.
+
+ This function reverses the encoding of _encode_newlines().
+
+ """
+
+ def repl(m):
+ return LinewiseSerializer._subst[m.group(1)]
+
+ return LinewiseSerializer._escape_re.sub(repl, s)
+
+ def dumpf(self, f, object):
+ f.write(self.dumps(object))
+
+ def dumps(self, object):
+ return self._encode_newlines(self.wrapee.dumps(object)) + '\n'
+
+ def loadf(self, f):
+ return self.loads(f.readline())
+
+ def loads(self, s):
+ return self.wrapee.loads(self._decode_newlines(s[:-1]))
+
+
+class NewSortableCVSRevisionDatabase(object):
+ """A serially-accessible, sortable file for holding CVSRevisions.
+
+ This class creates such files."""
+
+ def __init__(self, filename, serializer):
+ self.f = open(filename, 'w')
+ self.serializer = LinewiseSerializer(serializer)
+
+ def add(self, cvs_rev):
+ self.f.write(
+ '%x %08x %s' % (
+ cvs_rev.metadata_id, cvs_rev.timestamp,
+ self.serializer.dumps(cvs_rev),
+ )
+ )
+
+ def close(self):
+ self.f.close()
+ self.f = None
+
+
+class OldSortableCVSRevisionDatabase(object):
+ """A serially-accessible, sortable file for holding CVSRevisions.
+
+ This class reads such files."""
+
+ def __init__(self, filename, serializer):
+ self.filename = filename
+ self.serializer = LinewiseSerializer(serializer)
+
+ def __iter__(self):
+ f = open(self.filename, 'r')
+ for l in f:
+ s = l.split(' ', 2)[-1]
+ yield self.serializer.loads(s)
+ f.close()
+
+ def close(self):
+ pass
+
+
+class NewSortableCVSSymbolDatabase(object):
+ """A serially-accessible, sortable file for holding CVSSymbols.
+
+ This class creates such files."""
+
+ def __init__(self, filename, serializer):
+ self.f = open(filename, 'w')
+ self.serializer = LinewiseSerializer(serializer)
+
+ def add(self, cvs_symbol):
+ self.f.write(
+ '%x %s' % (cvs_symbol.symbol.id, self.serializer.dumps(cvs_symbol))
+ )
+
+ def close(self):
+ self.f.close()
+ self.f = None
+
+
+class OldSortableCVSSymbolDatabase(object):
+ """A serially-accessible, sortable file for holding CVSSymbols.
+
+ This class reads such files."""
+
+ def __init__(self, filename, serializer):
+ self.filename = filename
+ self.serializer = LinewiseSerializer(serializer)
+
+ def __iter__(self):
+ f = open(self.filename, 'r')
+ for l in f:
+ s = l.split(' ', 1)[-1]
+ yield self.serializer.loads(s)
+ f.close()
+
+ def close(self):
+ pass
+
+
+def IndexedCVSItemStore(filename, index_filename, mode):
+ return IndexedStore(
+ filename, index_filename, mode,
+ PrimedPickleSerializer(cvs_item_primer)
+ )
+
+
diff --git a/cvs2svn_lib/cvs_revision_manager.py b/cvs2svn_lib/cvs_revision_manager.py
new file mode 100644
index 0000000..6f5de3b
--- /dev/null
+++ b/cvs2svn_lib/cvs_revision_manager.py
@@ -0,0 +1,85 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Access the CVS repository via CVS's 'cvs' command."""
+
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import PipeStream
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.revision_manager import RevisionReader
+
+
+class CVSRevisionReader(RevisionReader):
+ """A RevisionReader that reads the contents via CVS."""
+
+ # Different versions of CVS support different global arguments.
+ # Here are the global arguments that we try to use, in order of
+ # decreasing preference:
+ _possible_global_arguments = [
+ ['-q', '-R', '-f'],
+ ['-q', '-R'],
+ ['-q', '-f'],
+ ['-q'],
+ ]
+
+ def __init__(self, cvs_executable):
+ self.cvs_executable = cvs_executable
+
+ for global_arguments in self._possible_global_arguments:
+ try:
+ self._check_cvs_runs(global_arguments)
+ except CommandFailedException, e:
+ pass
+ else:
+ # Those global arguments were OK; use them for all CVS invocations.
+ self.global_arguments = global_arguments
+ break
+ else:
+ raise FatalError(
+ '%s\n'
+ 'Please check that cvs is installed and in your PATH.' % (e,)
+ )
+
+ def _check_cvs_runs(self, global_arguments):
+ """Check that CVS can be started.
+
+ Try running 'cvs --version' with the current setting for
+ self.cvs_executable and the specified global_arguments. If not
+ successful, raise a CommandFailedException."""
+
+ check_command_runs(
+ [self.cvs_executable] + global_arguments + ['--version'],
+ self.cvs_executable,
+ )
+
+ def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+ project = cvs_rev.cvs_file.project
+ pipe_cmd = [
+ self.cvs_executable
+ ] + self.global_arguments + [
+ '-d', project.cvs_repository_root,
+ 'co',
+ '-r' + cvs_rev.rev,
+ '-p'
+ ]
+ if suppress_keyword_substitution:
+ pipe_cmd.append('-kk')
+ pipe_cmd.append(project.cvs_module + cvs_rev.cvs_path)
+ return PipeStream(pipe_cmd)
+
+
diff --git a/cvs2svn_lib/database.py b/cvs2svn_lib/database.py
new file mode 100644
index 0000000..9db9be2
--- /dev/null
+++ b/cvs2svn_lib/database.py
@@ -0,0 +1,322 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import sys
+import os
+import cPickle
+
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.record_table import FileOffsetPacker
+from cvs2svn_lib.record_table import RecordTable
+
+
+# DBM module selection
+
+# 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
+# so that the dbhash module used by anydbm will use bsddb3.
+try:
+ import bsddb3
+ sys.modules['bsddb'] = sys.modules['bsddb3']
+except ImportError:
+ pass
+
+# 2. These DBM modules are not good for cvs2svn.
+import anydbm
+if anydbm._defaultmod.__name__ in ['dumbdbm', 'dbm']:
+ Log().error(
+ '%s: cvs2svn uses the anydbm package, which depends on lower level '
+ 'dbm\n'
+ 'libraries. Your system has %s, with which cvs2svn is known to have\n'
+ 'problems. To use cvs2svn, you must install a Python dbm library '
+ 'other than\n'
+ 'dumbdbm or dbm. See '
+ 'http://python.org/doc/current/lib/module-anydbm.html\n'
+ 'for more information.\n'
+ % (error_prefix, anydbm._defaultmod.__name__,)
+ )
+ sys.exit(1)
+
+# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
+# Unfortunately, gdbm appears not to be trouble free, either.
+if hasattr(anydbm._defaultmod, 'bsddb') \
+ and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
+ try:
+ gdbm = __import__('gdbm')
+ except ImportError:
+ Log().warn(
+ '%s: The version of the bsddb module found on your computer '
+ 'has been\n'
+ 'reported to malfunction on some datasets, causing KeyError '
+ 'exceptions.\n'
+ % (warning_prefix,)
+ )
+ else:
+ anydbm._defaultmod = gdbm
+
+
+class Database:
+ """A database that uses a Serializer to store objects of a certain type.
+
+ The serializer is stored in the database under the key
+ self.serializer_key. (This implies that self.serializer_key may not
+ be used as a key for normal entries.)
+
+ The backing database is an anydbm-based DBM.
+
+ """
+
+ serializer_key = '_.%$1\t;_ '
+
+ def __init__(self, filename, mode, serializer=None):
+ """Constructor.
+
+ The database stores its Serializer, so none needs to be supplied
+ when opening an existing database."""
+
+ # pybsddb3 has a bug which prevents it from working with
+ # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
+ # causes the DB_TRUNCATE flag to be passed, which is disallowed
+ # for databases protected by lock and transaction support
+ # (bsddb databases use locking from bsddb version 4.2.4 onwards).
+ #
+ # Therefore, manually perform the removal (we can do this, because
+ # we know that for bsddb - but *not* anydbm in general - the database
+ # consists of one file with the name we specify, rather than several
+ # based on that name).
+ if mode == DB_OPEN_NEW and anydbm._defaultmod.__name__ == 'dbhash':
+ if os.path.isfile(filename):
+ os.unlink(filename)
+ self.db = anydbm.open(filename, 'c')
+ else:
+ self.db = anydbm.open(filename, mode)
+
+ # Import implementations for many mapping interface methods.
+ for meth_name in ('__delitem__',
+ '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
+ meth_ref = getattr(self.db, meth_name, None)
+ if meth_ref:
+ setattr(self, meth_name, meth_ref)
+
+ if mode == DB_OPEN_NEW:
+ self.serializer = serializer
+ self.db[self.serializer_key] = cPickle.dumps(self.serializer)
+ else:
+ self.serializer = cPickle.loads(self.db[self.serializer_key])
+
+ def __getitem__(self, key):
+ return self.serializer.loads(self.db[key])
+
+ def __setitem__(self, key, value):
+ self.db[key] = self.serializer.dumps(value)
+
+ def __delitem__(self, key):
+ # gdbm defines a __delitem__ method, but it cannot be assigned. So
+ # this method provides a fallback definition via explicit delegation:
+ del self.db[key]
+
+ def keys(self):
+ retval = self.db.keys()
+ retval.remove(self.serializer_key)
+ return retval
+
+ def __iter__(self):
+ for key in self.keys():
+ yield key
+
+ def has_key(self, key):
+ try:
+ self.db[key]
+ return True
+ except KeyError:
+ return False
+
+ def __contains__(self, key):
+ return self.has_key(key)
+
+ def iterkeys(self):
+ return self.__iter__()
+
+ def clear(self):
+ for key in self.keys():
+ del self[key]
+
+ def items(self):
+ return [(key, self[key],) for key in self.keys()]
+
+ def values(self):
+ return [self[key] for key in self.keys()]
+
+ def get(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ return default
+
+ def close(self):
+ self.db.close()
+ self.db = None
+
+
+class IndexedDatabase:
+ """A file of objects that are written sequentially and read randomly.
+
+ The objects are indexed by small non-negative integers, and a
+ RecordTable is used to store the index -> fileoffset map.
+ fileoffset=0 is used to represent an empty record. (An offset of 0
+ cannot occur for a legitimate record because the serializer is
+ written there.)
+
+ The main file consists of a sequence of pickles (or other serialized
+ data format). The zeroth record is a pickled Serializer.
+ Subsequent ones are objects serialized using the serializer. The
+ offset of each object in the file is stored to an index table so
+ that the data can later be retrieved randomly.
+
+ Objects are always stored to the end of the file. If an object is
+ deleted or overwritten, the fact is recorded in the index_table but
+ the space in the pickle file is not garbage collected. This has the
+ advantage that one can create a modified version of a database that
+ shares the main data file with an old version by copying the index
+ file. But it has the disadvantage that space is wasted whenever
+ objects are written multiple times."""
+
+ def __init__(self, filename, index_filename, mode, serializer=None):
+ """Initialize an IndexedDatabase, writing the serializer if necessary.
+
+ SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the
+ serializer is read from the file."""
+
+ self.filename = filename
+ self.index_filename = index_filename
+ self.mode = mode
+ if self.mode == DB_OPEN_NEW:
+ self.f = open(self.filename, 'wb+')
+ elif self.mode == DB_OPEN_WRITE:
+ self.f = open(self.filename, 'rb+')
+ elif self.mode == DB_OPEN_READ:
+ self.f = open(self.filename, 'rb')
+ else:
+ raise RuntimeError('Invalid mode %r' % self.mode)
+
+ self.index_table = RecordTable(
+ self.index_filename, self.mode, FileOffsetPacker()
+ )
+
+ if self.mode == DB_OPEN_NEW:
+ assert serializer is not None
+ self.serializer = serializer
+ cPickle.dump(self.serializer, self.f, -1)
+ else:
+ # Read the memo from the first pickle:
+ self.serializer = cPickle.load(self.f)
+
+ # Seek to the end of the file, and record that position:
+ self.f.seek(0, 2)
+ self.fp = self.f.tell()
+ self.eofp = self.fp
+
+ def __setitem__(self, index, item):
+ """Write ITEM into the database indexed by INDEX."""
+
+ # Make sure we're at the end of the file:
+ if self.fp != self.eofp:
+ self.f.seek(self.eofp)
+ self.index_table[index] = self.eofp
+ s = self.serializer.dumps(item)
+ self.f.write(s)
+ self.eofp += len(s)
+ self.fp = self.eofp
+
+ def _fetch(self, offset):
+ if self.fp != offset:
+ self.f.seek(offset)
+
+ # There is no easy way to tell how much data will be read, so just
+ # indicate that we don't know the current file pointer:
+ self.fp = None
+
+ return self.serializer.loadf(self.f)
+
+ def iterkeys(self):
+ return self.index_table.iterkeys()
+
+ def itervalues(self):
+ for offset in self.index_table.itervalues():
+ yield self._fetch(offset)
+
+ def __getitem__(self, index):
+ offset = self.index_table[index]
+ return self._fetch(offset)
+
+ def get(self, item, default=None):
+ try:
+ return self[item]
+ except KeyError:
+ return default
+
+ def get_many(self, indexes, default=None):
+ """Yield (index,item) tuples for INDEXES, in arbitrary order.
+
+ Yield (index,default) for indexes with no defined values."""
+
+ offsets = []
+ for (index, offset) in self.index_table.get_many(indexes):
+ if offset is None:
+ yield (index, default)
+ else:
+ offsets.append((offset, index))
+
+ # Sort the offsets to reduce disk seeking:
+ offsets.sort()
+ for (offset,index) in offsets:
+ yield (index, self._fetch(offset))
+
+ def __delitem__(self, index):
+ # We don't actually free the data in self.f.
+ del self.index_table[index]
+
+ def close(self):
+ self.index_table.close()
+ self.index_table = None
+ self.f.close()
+ self.f = None
+
+ def __str__(self):
+ return 'IndexedDatabase(%r)' % (self.filename,)
+
+
+class IndexedStore(IndexedDatabase):
+ """A file of items that is written sequentially and read randomly.
+
+ This is just like IndexedDatabase, except that it has an additional
+ add() method which assumes that the object to be written to the
+ database has an 'id' member, which is used as its database index.
+ See IndexedDatabase for more information."""
+
+ def add(self, item):
+ """Write ITEM into the database indexed by ITEM.id."""
+
+ self[item.id] = item
+
+
diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py
new file mode 100644
index 0000000..092cfca
--- /dev/null
+++ b/cvs2svn_lib/dumpfile_delegate.py
@@ -0,0 +1,510 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import new as md5
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import path_split
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
+from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
+
+
+# Things that can happen to a file.
+OP_ADD = 'add'
+OP_CHANGE = 'change'
+
+
+class DumpfileDelegate(SVNRepositoryDelegate):
+ """Create a Subversion dumpfile."""
+
+ def __init__(self, revision_reader, dumpfile_path):
+ """Return a new DumpfileDelegate instance, attached to a dumpfile
+ DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
+
+ self._revision_reader = revision_reader
+ self.dumpfile_path = dumpfile_path
+
+ self.dumpfile = open(self.dumpfile_path, 'wb')
+ self._write_dumpfile_header(self.dumpfile)
+
+ # A set of the basic project infrastructure project directories
+ # that have been created so far, as SVN paths. (The root
+ # directory is considered to be present at initialization.) This
+ # includes all of the LOD paths, and all of their parent
+ # directories etc.
+ self._basic_directories = set([''])
+
+ def _write_dumpfile_header(self, dumpfile):
+ # Initialize the dumpfile with the standard headers.
+ #
+ # Since the CVS repository doesn't have a UUID, and the Subversion
+ # repository will be created with one anyway, we don't specify a
+ # UUID in the dumpflie
+ dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
+
+ def _utf8_path(self, path):
+ """Return a copy of PATH encoded in UTF-8."""
+
+ # Convert each path component separately (as they may each use
+ # different encodings).
+ try:
+ return '/'.join([
+ Ctx().cvs_filename_decoder(piece).encode('utf8')
+ for piece in path.split('/')
+ ])
+ except UnicodeError:
+ raise FatalError(
+ "Unable to convert a path '%s' to internal encoding.\n"
+ "Consider rerunning with one or more '--encoding' parameters or\n"
+ "with '--fallback-encoding'."
+ % (path,))
+
+ def _string_for_prop(self, name, value):
+ """Return a property in the form needed for the dumpfile."""
+
+ return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
+
+ def start_commit(self, revnum, revprops):
+ """Emit the start of SVN_COMMIT (an SVNCommit)."""
+
+ self.revision = revnum
+
+ # The start of a new commit typically looks like this:
+ #
+ # Revision-number: 1
+ # Prop-content-length: 129
+ # Content-length: 129
+ #
+ # K 7
+ # svn:log
+ # V 27
+ # Log message for revision 1.
+ # K 10
+ # svn:author
+ # V 7
+ # jrandom
+ # K 8
+ # svn:date
+ # V 27
+ # 2003-04-22T22:57:58.132837Z
+ # PROPS-END
+ #
+ # Notice that the length headers count everything -- not just the
+ # length of the data but also the lengths of the lengths, including
+ # the 'K ' or 'V ' prefixes.
+ #
+ # The reason there are both Prop-content-length and Content-length
+ # is that the former includes just props, while the latter includes
+ # everything. That's the generic header form for any entity in a
+ # dumpfile. But since revisions only have props, the two lengths
+ # are always the same for revisions.
+
+ # Calculate the output needed for the property definitions.
+ prop_names = revprops.keys()
+ prop_names.sort()
+ prop_strings = []
+ for propname in prop_names:
+ if revprops[propname] is not None:
+ prop_strings.append(
+ self._string_for_prop(propname, revprops[propname]))
+
+ all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
+ total_len = len(all_prop_strings)
+
+ # Print the revision header and revprops
+ self.dumpfile.write(
+ 'Revision-number: %d\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ '\n'
+ % (self.revision, total_len, total_len, all_prop_strings)
+ )
+
+ def end_commit(self):
+ pass
+
+ def _make_any_dir(self, path):
+ """Emit the creation of directory PATH."""
+
+ self.dumpfile.write(
+ "Node-path: %s\n"
+ "Node-kind: dir\n"
+ "Node-action: add\n"
+ "\n"
+ "\n"
+ % self._utf8_path(path)
+ )
+
+ def _register_basic_directory(self, path, create):
+ """Register the creation of PATH if it is not already there.
+
+ Create any parent directories that do not already exist. If
+ CREATE is set, also create PATH if it doesn't already exist. This
+ method should only be used for the LOD paths and the directories
+ containing them, not for directories within an LOD path."""
+
+ if path not in self._basic_directories:
+ # Make sure that the parent directory is present:
+ self._register_basic_directory(path_split(path)[0], True)
+ if create:
+ self._make_any_dir(path)
+ self._basic_directories.add(path)
+
+ def initialize_project(self, project):
+ """Create any initial directories for the project.
+
+ The trunk, tags, and branches directories directories are created
+ the first time the project is seen. Be sure not to create parent
+ directories that already exist (e.g., because two directories
+ share part of their paths either within or across projects)."""
+
+ for path in project.get_initial_directories():
+ self._register_basic_directory(path, True)
+
+ def initialize_lod(self, lod):
+ lod_path = lod.get_path()
+ if lod_path:
+ self._register_basic_directory(lod_path, True)
+
+ def mkdir(self, lod, cvs_directory):
+ self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
+
+ def _add_or_change_path(self, s_item, op):
+ """Emit the addition or change corresponding to S_ITEM.
+
+ OP is either the constant OP_ADD or OP_CHANGE."""
+
+ assert op in [OP_ADD, OP_CHANGE]
+
+ # Convenience variables
+ cvs_rev = s_item.cvs_rev
+
+ # The property handling here takes advantage of an undocumented
+ # but IMHO consistent feature of the Subversion dumpfile-loading
+ # code. When a node's properties aren't mentioned (that is, the
+ # "Prop-content-length:" header is absent, no properties are
+ # listed at all, and there is no "PROPS-END\n" line) then no
+ # change is made to the node's properties.
+ #
+ # This is consistent with the way dumpfiles behave w.r.t. text
+ # content changes, so I'm comfortable relying on it. If you
+ # commit a change to *just* the properties of some node that
+ # already has text contents from a previous revision, then in the
+ # dumpfile output for the prop change, no "Text-content-length:"
+ # nor "Text-content-md5:" header will be present, and the text of
+ # the file will not be given. But this does not cause the file's
+ # text to be erased! It simply remains unchanged.
+ #
+ # This works out great for cvs2svn, due to lucky coincidences:
+ #
+ # For files, the only properties we ever set are set in the first
+ # revision; all other revisions (including on branches) inherit
+ # from that. After the first revision, we never change file
+ # properties, therefore, there is no need to remember the full set
+ # of properties on a given file once we've set it.
+ #
+ # For directories, the only property we set is "svn:ignore", and
+ # while we may change it after the first revision, we always do so
+ # based on the contents of a ".cvsignore" file -- in other words,
+ # CVS is doing the remembering for us, so we still don't have to
+ # preserve the previous value of the property ourselves.
+
+ # Calculate the (sorted-by-name) property string and length, if any.
+ if s_item.svn_props_changed:
+ svn_props = s_item.svn_props
+ prop_contents = ''
+ prop_names = svn_props.keys()
+ prop_names.sort()
+ for pname in prop_names:
+ pvalue = svn_props[pname]
+ if pvalue is not None:
+ prop_contents += self._string_for_prop(pname, pvalue)
+ prop_contents += 'PROPS-END\n'
+ props_header = 'Prop-content-length: %d\n' % len(prop_contents)
+ else:
+ prop_contents = ''
+ props_header = ''
+
+ # If the file has keywords, we must prevent CVS/RCS from expanding
+ # the keywords because they must be unexpanded in the repository,
+ # or Subversion will get confused.
+ stream = self._revision_reader.get_content_stream(
+ cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
+ )
+
+ if Ctx().decode_apple_single:
+ # Insert a filter to decode any files that are in AppleSingle
+ # format:
+ stream = get_maybe_apple_single_stream(stream)
+
+ # Insert a filter to convert all EOLs to LFs if neccessary
+
+ eol_style = s_item.svn_props.get('svn:eol-style', None)
+ if eol_style:
+ stream = LF_EOL_Filter(stream, eol_style)
+
+ buf = None
+
+ # treat .cvsignore as a directory property
+ dir_path, basename = path_split(cvs_rev.get_svn_path())
+ if basename == '.cvsignore':
+ buf = stream.read()
+ ignore_vals = generate_ignores(buf)
+ ignore_contents = '\n'.join(ignore_vals)
+ if ignore_contents:
+ ignore_contents += '\n'
+ ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
+ (len(ignore_contents), ignore_contents))
+ ignore_contents += 'PROPS-END\n'
+ ignore_len = len(ignore_contents)
+
+ # write headers, then props
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: change\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ % (self._utf8_path(dir_path),
+ ignore_len, ignore_len, ignore_contents)
+ )
+ if not Ctx().keep_cvsignore:
+ stream.close()
+ return
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: file\n'
+ 'Node-action: %s\n'
+ '%s' # no property header if no props
+ % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
+ )
+
+ pos = self.dumpfile.tell()
+
+ content_header_fmt = (
+ 'Text-content-length: %16d\n'
+ 'Text-content-md5: %32s\n'
+ 'Content-length: %16d\n'
+ '\n'
+ )
+
+ self.dumpfile.write(content_header_fmt % (0, '', 0,))
+
+ if prop_contents:
+ self.dumpfile.write(prop_contents)
+
+ # Insert the rev contents, calculating length and checksum as we go.
+ checksum = md5()
+ length = 0
+ if buf is None:
+ buf = stream.read(config.PIPE_READ_SIZE)
+ while buf != '':
+ checksum.update(buf)
+ length += len(buf)
+ self.dumpfile.write(buf)
+ buf = stream.read(config.PIPE_READ_SIZE)
+
+ stream.close()
+
+ # Go back to overwrite the length and checksum headers with the
+ # correct values. The content length is the length of property
+ # data, text data, and any metadata around/inside around them:
+ self.dumpfile.seek(pos, 0)
+ self.dumpfile.write(
+ content_header_fmt
+ % (length, checksum.hexdigest(), length + len(prop_contents),)
+ )
+
+ # Jump back to the end of the stream
+ self.dumpfile.seek(0, 2)
+
+ # This record is done (write two newlines -- one to terminate
+ # contents that weren't themselves newline-termination, one to
+ # provide a blank line for readability.
+ self.dumpfile.write('\n\n')
+
+ def add_path(self, s_item):
+ """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
+
+ self._add_or_change_path(s_item, OP_ADD)
+
+ def change_path(self, s_item):
+ """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
+
+ self._add_or_change_path(s_item, OP_CHANGE)
+
+ def delete_lod(self, lod):
+ """Emit the deletion of LOD."""
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-action: delete\n'
+ '\n'
+ % (self._utf8_path(lod.get_path()),)
+ )
+ self._basic_directories.remove(lod.get_path())
+
+ def delete_path(self, lod, cvs_path):
+ dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
+ if basename == '.cvsignore':
+ # When a .cvsignore file is deleted, the directory's svn:ignore
+ # property needs to be deleted.
+ ignore_contents = 'PROPS-END\n'
+ ignore_len = len(ignore_contents)
+
+ # write headers, then props
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: change\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ % (self._utf8_path(dir_path),
+ ignore_len, ignore_len, ignore_contents)
+ )
+ if not Ctx().keep_cvsignore:
+ return
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-action: delete\n'
+ '\n'
+ % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
+ )
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ # Register the main LOD directory, and create parent directories
+ # as needed:
+ self._register_basic_directory(dest_lod.get_path(), False)
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: add\n'
+ 'Node-copyfrom-rev: %d\n'
+ 'Node-copyfrom-path: %s\n'
+ '\n'
+ % (self._utf8_path(dest_lod.get_path()),
+ src_revnum, self._utf8_path(src_lod.get_path()))
+ )
+
+ def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+ if isinstance(cvs_path, CVSFile):
+ node_kind = 'file'
+ if cvs_path.basename == '.cvsignore':
+ # FIXME: Here we have to adjust the containing directory's
+ # svn:ignore property to reflect the addition of the
+ # .cvsignore file to the LOD! This is awkward because we
+ # don't have the contents of the .cvsignore file available.
+ if not Ctx().keep_cvsignore:
+ return
+ elif isinstance(cvs_path, CVSDirectory):
+ node_kind = 'dir'
+ else:
+ raise InternalError()
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: %s\n'
+ 'Node-action: add\n'
+ 'Node-copyfrom-rev: %d\n'
+ 'Node-copyfrom-path: %s\n'
+ '\n'
+ % (
+ self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
+ node_kind,
+ src_revnum,
+ self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
+ )
+ )
+
+ def finish(self):
+ """Perform any cleanup necessary after all revisions have been
+ committed."""
+
+ self.dumpfile.close()
+
+
+def generate_ignores(raw_ignore_val):
+ ignore_vals = [ ]
+ for ignore in raw_ignore_val.split():
+ # Reset the list if we encounter a '!'
+ # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
+ if ignore == '!':
+ ignore_vals = [ ]
+ else:
+ ignore_vals.append(ignore)
+ return ignore_vals
+
+
+class LF_EOL_Filter:
+ """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
+ into the appropriate canonical eol style."""
+
+ eol_style_replacements = {
+ 'LF' : '\n',
+ 'CR' : '\r',
+ 'CRLF' : '\r\n',
+ 'native' : '\n',
+ }
+
+ def __init__(self, stream, eol_style):
+ self.stream = stream
+ self.replacement = self.eol_style_replacements[eol_style]
+ self.carry_cr = False
+ self.eof = False
+
+ def read(self, size=-1):
+ while True:
+ buf = self.stream.read(size)
+ self.eof = len(buf) == 0
+ if self.carry_cr:
+ buf = '\r' + buf
+ self.carry_cr = False
+ if not self.eof and buf[-1] == '\r':
+ self.carry_cr = True
+ buf = buf[:-1]
+ buf = buf.replace('\r\n', '\n')
+ buf = buf.replace('\r', '\n')
+ if self.replacement != '\n':
+ buf = buf.replace('\n', self.replacement)
+ if buf or self.eof:
+ return buf
+
+ def close(self):
+ self.stream.close()
+ self.stream = None
+
+
diff --git a/cvs2svn_lib/fill_source.py b/cvs2svn_lib/fill_source.py
new file mode 100644
index 0000000..2bb8e4c
--- /dev/null
+++ b/cvs2svn_lib/fill_source.py
@@ -0,0 +1,192 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes describing the sources of symbol fills."""
+
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+from cvs2svn_lib.svn_revision_range import SVNRevisionRange
+from cvs2svn_lib.svn_revision_range import RevisionScores
+
+
+class FillSource:
+ """Representation of a fill source.
+
+ A FillSource keeps track of the paths that have to be filled in a
+ particular symbol fill.
+
+ This class holds a SVNRevisionRange instance for each CVSFile that
+ has to be filled within the subtree of the repository rooted at
+ self.cvs_path. The SVNRevisionRange objects are stored in a tree
+ in which the directory nodes are dictionaries mapping CVSPaths to
+ subnodes and the leaf nodes are the SVNRevisionRange objects telling
+ for what source_lod and what range of revisions the leaf could serve
+ as a source.
+
+ FillSource objects are able to compute the score for arbitrary
+ source LODs and source revision numbers.
+
+ These objects are used by the symbol filler in SVNOutputOption."""
+
+ def __init__(self, cvs_path, symbol, node_tree):
+ """Create a fill source.
+
+ The best LOD and SVN REVNUM to use as the copy source can be
+ determined by calling compute_best_source().
+
+ Members:
+
+ cvs_path -- (CVSPath): the CVSPath described by this FillSource.
+
+ _symbol -- (Symbol) the symbol to be filled.
+
+ _node_tree -- (dict) a tree stored as a map { CVSPath : node },
+ where subnodes have the same form. Leaves are
+ SVNRevisionRange instances telling the source_lod and range
+ of SVN revision numbers from which the CVSPath can be
+ copied.
+
+ """
+
+ self.cvs_path = cvs_path
+ self._symbol = symbol
+ self._node_tree = node_tree
+
+ def _set_node(self, cvs_file, svn_revision_range):
+ parent_node = self._get_node(cvs_file.parent_directory, create=True)
+ if cvs_file in parent_node:
+ raise InternalError(
+ '%s appeared twice in sources for %s' % (cvs_file, self._symbol)
+ )
+ parent_node[cvs_file] = svn_revision_range
+
+ def _get_node(self, cvs_path, create=False):
+ if cvs_path == self.cvs_path:
+ return self._node_tree
+ else:
+ parent_node = self._get_node(cvs_path.parent_directory, create=create)
+ try:
+ return parent_node[cvs_path]
+ except KeyError:
+ if create:
+ node = {}
+ parent_node[cvs_path] = node
+ return node
+ else:
+ raise
+
+ def compute_best_source(self, preferred_source):
+ """Determine the best source_lod and subversion revision number to copy.
+
+ Return the best source found, as an SVNRevisionRange instance. If
+ PREFERRED_SOURCE is not None and its opening is among the sources
+ with the best scores, return it; otherwise, return the oldest such
+ revision on the first such source_lod (ordered by the natural LOD
+ sort order). The return value's source_lod is the best LOD to
+ copy from, and its opening_revnum is the best SVN revision."""
+
+ # Aggregate openings and closings from our rev tree
+ svn_revision_ranges = self._get_revision_ranges(self._node_tree)
+
+ # Score the lists
+ revision_scores = RevisionScores(svn_revision_ranges)
+
+ best_source_lod, best_revnum, best_score = \
+ revision_scores.get_best_revnum()
+
+ if (
+ preferred_source is not None
+ and revision_scores.get_score(preferred_source) == best_score
+ ):
+ best_source_lod = preferred_source.source_lod
+ best_revnum = preferred_source.opening_revnum
+
+ if best_revnum == SVN_INVALID_REVNUM:
+ raise FatalError(
+ "failed to find a revision to copy from when copying %s"
+ % self._symbol.name
+ )
+
+ return SVNRevisionRange(best_source_lod, best_revnum)
+
+ def _get_revision_ranges(self, node):
+ """Return a list of all the SVNRevisionRanges at and under NODE.
+
+ Include duplicates. This is a helper method used by
+ compute_best_source()."""
+
+ if isinstance(node, SVNRevisionRange):
+ # It is a leaf node.
+ return [ node ]
+ else:
+ # It is an intermediate node.
+ revision_ranges = []
+ for key, subnode in node.items():
+ revision_ranges.extend(self._get_revision_ranges(subnode))
+ return revision_ranges
+
+ def get_subsources(self):
+ """Generate (CVSPath, FillSource) for all direct subsources."""
+
+ if not isinstance(self._node_tree, SVNRevisionRange):
+ for cvs_path, node in self._node_tree.items():
+ fill_source = FillSource(cvs_path, self._symbol, node)
+ yield (cvs_path, fill_source)
+
+ def get_subsource_map(self):
+ """Return the map {CVSPath : FillSource} of direct subsources."""
+
+ src_entries = {}
+
+ for (cvs_path, fill_subsource) in self.get_subsources():
+ src_entries[cvs_path] = fill_subsource
+
+ return src_entries
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s(%s:%s)' % (
+ self.__class__.__name__, self._symbol, self.cvs_path,
+ )
+
+ def __repr__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s%r' % (self, self._node_tree,)
+
+
+def get_source_set(symbol, range_map):
+ """Return a FillSource describing the fill sources for RANGE_MAP.
+
+ SYMBOL is either a Branch or a Tag. RANGE_MAP is a map { CVSSymbol
+ : SVNRevisionRange } as returned by
+ SymbolingsReader.get_range_map().
+
+ Use the SVNRevisionRanges from RANGE_MAP to create a FillSource
+ instance describing the sources for filling SYMBOL."""
+
+ root_cvs_directory = symbol.project.get_root_cvs_directory()
+ fill_source = FillSource(root_cvs_directory, symbol, {})
+
+ for cvs_symbol, svn_revision_range in range_map.items():
+ fill_source._set_node(cvs_symbol.cvs_file, svn_revision_range)
+
+ return fill_source
+
+
diff --git a/cvs2svn_lib/fulltext_revision_recorder.py b/cvs2svn_lib/fulltext_revision_recorder.py
new file mode 100644
index 0000000..ad057b7
--- /dev/null
+++ b/cvs2svn_lib/fulltext_revision_recorder.py
@@ -0,0 +1,127 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""An abstract class that contructs file contents during CollectRevsPass.
+
+It calls its record_fulltext() method with the full text of every
+revision. This method should be overridden to do something with the
+fulltext and possibly return a revision_recorder_token."""
+
+
+from cvs2svn_lib.revision_manager import RevisionRecorder
+
+
+class FulltextRevisionRecorder:
+ """Similar to a RevisionRecorder, but it requires the fulltext."""
+
+ def register_artifacts(self, which_pass):
+ pass
+
+ def start(self):
+ pass
+
+ def start_file(self, cvs_file_items):
+ pass
+
+ def record_fulltext(self, cvs_rev, log, fulltext):
+ """Record the fulltext for CVS_REV.
+
+ CVS_REV has the log message LOG and the fulltext FULLTEXT. This
+ method should be overridden to do something sensible with them."""
+
+ raise NotImplementedError()
+
+ def finish_file(self, cvs_file_items):
+ pass
+
+ def finish(self):
+ pass
+
+
+class FulltextRevisionRecorderAdapter(RevisionRecorder):
+ """Reconstruct the fulltext and pass it to a FulltextRevisionRecorder.
+
+ This class implements RevisionRecorder (so it can be passed directly
+ to CollectRevsPass). But it doesn't actually record anything.
+ Instead, it reconstructs the fulltext of each revision, and passes
+ the fulltext to a fulltext_revision_recorder."""
+
+ def __init__(self, fulltext_revision_recorder):
+ RevisionRecorder.__init__(self)
+ self.fulltext_revision_recorder = fulltext_revision_recorder
+
+ def register_artifacts(self, which_pass):
+ self.fulltext_revision_recorder.register_artifacts(which_pass)
+
+ def start(self):
+ self.fulltext_revision_recorder.start()
+
+ def start_file(self, cvs_file_items):
+ self.fulltext_revision_recorder.start_file(cvs_file_items)
+
+ def record_text(self, cvs_rev, log, text):
+ """This method should be overwridden.
+
+ It should determine the fulltext of CVS_REV, then pass it to
+ self.fulltext_revision_recorder.record_fulltext() and return the
+ result."""
+
+ raise NotImplementedError()
+
+ def finish_file(self, cvs_file_items):
+ self.fulltext_revision_recorder.finish_file(cvs_file_items)
+
+ def finish(self):
+ self.fulltext_revision_recorder.finish()
+
+
+class SimpleFulltextRevisionRecorderAdapter(FulltextRevisionRecorderAdapter):
+ """Reconstruct the fulltext using a RevisionReader.
+
+ To create the fulltext, this class simply uses a RevisionReader (for
+ example, RCSRevisionReader or CVSRevisionReader). This is not quite
+ as wasteful as using one of these RevisionReaders in OutputPass,
+ because the same RCS file will be read over and over (and so
+ presumably stay in the disk cache). But it is still pretty silly,
+ considering that we have all the RCS deltas available to us."""
+
+ def __init__(self, revision_reader, fulltext_revision_recorder):
+ FulltextRevisionRecorderAdapter.__init__(self, fulltext_revision_recorder)
+ self.revision_reader = revision_reader
+
+ def register_artifacts(self, which_pass):
+ FulltextRevisionRecorderAdapter.register_artifacts(self, which_pass)
+ self.revision_reader.register_artifacts(which_pass)
+
+ def start(self):
+ FulltextRevisionRecorderAdapter.start(self)
+ self.revision_reader.start()
+
+ def record_text(self, cvs_rev, log, text):
+ # FIXME: We have to decide what to do about keyword substitution
+ # and eol_style here:
+ fulltext = self.revision_reader.get_content_stream(
+ cvs_rev, suppress_keyword_substitution=False
+ ).read()
+ return self.fulltext_revision_recorder.record_fulltext(
+ cvs_rev, log, fulltext
+ )
+
+ def finish(self):
+ FulltextRevisionRecorderAdapter.finish(self)
+ self.revision_reader.finish()
+
+
diff --git a/cvs2svn_lib/git_output_option.py b/cvs2svn_lib/git_output_option.py
new file mode 100644
index 0000000..a1e46b9
--- /dev/null
+++ b/cvs2svn_lib/git_output_option.py
@@ -0,0 +1,658 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes for outputting the converted repository to git.
+
+For information about the format allowed by git-fast-import, see:
+
+ http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
+
+"""
+
+import bisect
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.openings_closings import SymbolingsReader
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.output_option import OutputOption
+from cvs2svn_lib.svn_revision_range import RevisionScores
+from cvs2svn_lib.repository_mirror import RepositoryMirror
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+# The branch name to use for the "tag fixup branches". The
+# git-fast-import documentation suggests using 'TAG_FIXUP' (outside of
+# the refs/heads namespace), but this is currently broken. Use a name
+# containing '.', which is not allowed in CVS symbols, to avoid
+# conflicts (though of course a conflict could still result if the
+# user requests symbol transformations).
+FIXUP_BRANCH_NAME = 'refs/heads/TAG.FIXUP'
+
+
+class ExpectedDirectoryError(Exception):
+ """A file was found where a directory was expected."""
+
+ pass
+
+
+class ExpectedFileError(Exception):
+ """A directory was found where a file was expected."""
+
+ pass
+
+
+class GitRevisionWriter(object):
+ def register_artifacts(self, which_pass):
+ pass
+
+ def start(self, f, mirror):
+ self.f = f
+ self._mirror = mirror
+
+ def _modify_file(self, cvs_item, post_commit):
+ raise NotImplementedError()
+
+ def _mkdir_p(self, cvs_directory, lod):
+ """Make sure that CVS_DIRECTORY exists in LOD.
+
+ If not, create it. Return the node for CVS_DIRECTORY."""
+
+ try:
+ node = self._mirror.get_current_lod_directory(lod)
+ except KeyError:
+ node = self._mirror.add_lod(lod)
+
+ for sub_path in cvs_directory.get_ancestry()[1:]:
+ try:
+ node = node[sub_path]
+ except KeyError:
+ node = node.mkdir(sub_path)
+ if node is None:
+ raise ExpectedDirectoryError(
+ 'File found at \'%s\' where directory was expected.' % (sub_path,)
+ )
+
+ return node
+
+ def add_file(self, cvs_rev, post_commit):
+ cvs_file = cvs_rev.cvs_file
+ if post_commit:
+ lod = cvs_file.project.get_trunk()
+ else:
+ lod = cvs_rev.lod
+ parent_node = self._mkdir_p(cvs_file.parent_directory, lod)
+ parent_node.add_file(cvs_file)
+ self._modify_file(cvs_rev, post_commit)
+
+ def modify_file(self, cvs_rev, post_commit):
+ cvs_file = cvs_rev.cvs_file
+ if post_commit:
+ lod = cvs_file.project.get_trunk()
+ else:
+ lod = cvs_rev.lod
+ if self._mirror.get_current_path(cvs_file, lod) is not None:
+ raise ExpectedFileError(
+ 'Directory found at \'%s\' where file was expected.' % (cvs_file,)
+ )
+ self._modify_file(cvs_rev, post_commit)
+
+ def delete_file(self, cvs_rev, post_commit):
+ cvs_file = cvs_rev.cvs_file
+ if post_commit:
+ lod = cvs_file.project.get_trunk()
+ else:
+ lod = cvs_rev.lod
+ parent_node = self._mirror.get_current_path(
+ cvs_file.parent_directory, lod
+ )
+ if parent_node[cvs_file] is not None:
+ raise ExpectedFileError(
+ 'Directory found at \'%s\' where file was expected.' % (cvs_file,)
+ )
+ del parent_node[cvs_file]
+ self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
+
+ def process_revision(self, cvs_rev, post_commit):
+ if isinstance(cvs_rev, CVSRevisionAdd):
+ self.add_file(cvs_rev, post_commit)
+ elif isinstance(cvs_rev, CVSRevisionChange):
+ self.modify_file(cvs_rev, post_commit)
+ elif isinstance(cvs_rev, CVSRevisionDelete):
+ self.delete_file(cvs_rev, post_commit)
+ elif isinstance(cvs_rev, CVSRevisionNoop):
+ pass
+ else:
+ raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
+
+ def branch_file(self, cvs_symbol):
+ cvs_file = cvs_symbol.cvs_file
+ parent_node = self._mkdir_p(cvs_file.parent_directory, cvs_symbol.symbol)
+ parent_node.add_file(cvs_file)
+ self._modify_file(cvs_symbol, post_commit=False)
+
+ def finish(self):
+ del self._mirror
+ del self.f
+
+
+class GitRevisionMarkWriter(GitRevisionWriter):
+ def _modify_file(self, cvs_item, post_commit):
+ if cvs_item.cvs_file.executable:
+ mode = '100755'
+ else:
+ mode = '100644'
+
+ self.f.write(
+ 'M %s :%d %s\n'
+ % (mode, cvs_item.revision_recorder_token,
+ cvs_item.cvs_file.cvs_path,)
+ )
+
+
+class GitRevisionInlineWriter(GitRevisionWriter):
+ def __init__(self, revision_reader):
+ self.revision_reader = revision_reader
+
+ def register_artifacts(self, which_pass):
+ GitRevisionWriter.register_artifacts(self, which_pass)
+ self.revision_reader.register_artifacts(which_pass)
+
+ def start(self, f, mirror):
+ GitRevisionWriter.start(self, f, mirror)
+ self.revision_reader.start()
+
+ def _modify_file(self, cvs_item, post_commit):
+ if cvs_item.cvs_file.executable:
+ mode = '100755'
+ else:
+ mode = '100644'
+
+ self.f.write(
+ 'M %s inline %s\n'
+ % (mode, cvs_item.cvs_file.cvs_path,)
+ )
+
+ if isinstance(cvs_item, CVSSymbol):
+ cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
+ else:
+ cvs_rev = cvs_item
+
+ # FIXME: We have to decide what to do about keyword substitution
+ # and eol_style here:
+ fulltext = self.revision_reader.get_content_stream(
+ cvs_rev, suppress_keyword_substitution=False
+ ).read()
+
+ self.f.write('data %d\n' % (len(fulltext),))
+ self.f.write(fulltext)
+ self.f.write('\n')
+
+ def finish(self):
+ GitRevisionWriter.finish(self)
+ self.revision_reader.finish()
+
+
+def get_chunks(iterable, chunk_size):
+ """Generate lists containing chunks of the output of ITERABLE.
+
+ Each list contains at most CHUNK_SIZE items. If CHUNK_SIZE is None,
+ yield the whole contents of ITERABLE in one list."""
+
+ if chunk_size is None:
+ yield list(iterable)
+ else:
+ it = iter(iterable)
+ while True:
+ # If this call to it.next() raises StopIteration, then we have
+ # no more chunks to emit, so simply pass the exception through:
+ chunk = [it.next()]
+
+ # Now try filling the rest of the chunk:
+ try:
+ while len(chunk) < chunk_size:
+ chunk.append(it.next())
+ except StopIteration:
+ # The iterator was exhausted while filling chunk, but chunk
+ # contains at least one element. Yield it, then we're done.
+ yield chunk
+ break
+
+ # Yield the full chunk then continue with the next chunk:
+ yield chunk
+ del chunk
+
+
+class GitOutputOption(OutputOption):
+ """An OutputOption that outputs to a git-fast-import formatted file.
+
+ Members:
+
+ dump_filename -- (string) the name of the file to which the
+ git-fast-import commands for defining revisions will be
+ written.
+
+ author_transforms -- a map {cvsauthor : (fullname, email)} from
+ CVS author names to git full name and email address. All of
+ the contents are 8-bit strings encoded as UTF-8.
+
+ """
+
+ # The first mark number used for git-fast-import commit marks. This
+ # value needs to be large to avoid conflicts with blob marks.
+ _first_commit_mark = 1000000000
+
+ def __init__(
+ self, dump_filename, revision_writer,
+ max_merges=None, author_transforms=None,
+ ):
+ """Constructor.
+
+ DUMP_FILENAME is the name of the file to which the git-fast-import
+ commands for defining revisions should be written. (Please note
+ that depending on the style of revision writer, the actual file
+ contents might not be written to this file.)
+
+ REVISION_WRITER is a GitRevisionWriter that is used to output
+ either the content of revisions or a mark that was previously used
+ to label a blob.
+
+ MAX_MERGES can be set to an integer telling the maximum number of
+ parents that can be merged into a commit at once (aside from the
+ natural parent). If it is set to None, then there is no limit.
+
+ AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
+ CVS author names to git full name and email address. All of the
+ contents should either be Unicode strings or 8-bit strings encoded
+ as UTF-8.
+
+ """
+
+ self.dump_filename = dump_filename
+ self.revision_writer = revision_writer
+ self.max_merges = max_merges
+
+ def to_utf8(s):
+ if isinstance(s, unicode):
+ return s.encode('utf8')
+ else:
+ return s
+
+ self.author_transforms = {}
+ if author_transforms is not None:
+ for (cvsauthor, (name, email,)) in author_transforms.iteritems():
+ cvsauthor = to_utf8(cvsauthor)
+ name = to_utf8(name)
+ email = to_utf8(email)
+ self.author_transforms[cvsauthor] = (name, email,)
+
+ self._mirror = RepositoryMirror()
+
+ self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
+
+ def register_artifacts(self, which_pass):
+ # These artifacts are needed for SymbolingsReader:
+ artifact_manager.register_temp_file_needed(
+ config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.SYMBOL_OFFSETS_DB, which_pass
+ )
+ self.revision_writer.register_artifacts(which_pass)
+ self._mirror.register_artifacts(which_pass)
+
+ def check(self):
+ if Ctx().cross_project_commits:
+ raise FatalError(
+ 'Git output is not supported with cross-project commits'
+ )
+ if Ctx().cross_branch_commits:
+ raise FatalError(
+ 'Git output is not supported with cross-branch commits'
+ )
+ if Ctx().username is None:
+ raise FatalError(
+ 'Git output requires a default commit username'
+ )
+
+ def check_symbols(self, symbol_map):
+ # FIXME: What constraints does git impose on symbols?
+ pass
+
+ def setup(self, svn_rev_count):
+ self._symbolings_reader = SymbolingsReader()
+ self.f = open(self.dump_filename, 'wb')
+
+ # The youngest revnum that has been committed so far:
+ self._youngest = 0
+
+ # A map {lod : [(revnum, mark)]} giving each of the revision
+ # numbers in which there was a commit to lod, and the mark active
+ # at the end of the revnum.
+ self._marks = {}
+
+ self._mirror.open()
+ self.revision_writer.start(self.f, self._mirror)
+
+ def _create_commit_mark(self, lod, revnum):
+ mark = self._mark_generator.gen_id()
+ self._set_lod_mark(lod, revnum, mark)
+ return mark
+
+ def _set_lod_mark(self, lod, revnum, mark):
+ """Record MARK as the status of LOD for REVNUM.
+
+ If there is already an entry for REVNUM, overwrite it. If not,
+ append a new entry to the self._marks list for LOD."""
+
+ assert revnum >= self._youngest
+ entry = (revnum, mark)
+ try:
+ modifications = self._marks[lod]
+ except KeyError:
+ # This LOD hasn't appeared before; create a new list and add the
+ # entry:
+ self._marks[lod] = [entry]
+ else:
+ # A record exists, so it necessarily has at least one element:
+ if modifications[-1][0] == revnum:
+ modifications[-1] = entry
+ else:
+ modifications.append(entry)
+ self._youngest = revnum
+
+ def _get_author(self, svn_commit):
+ """Return the author to be used for SVN_COMMIT.
+
+ Return the author in the form needed by git; that is, 'foo <bar>'."""
+
+ author = svn_commit.get_author()
+ (name, email,) = self.author_transforms.get(author, (author, author,))
+ return '%s <%s>' % (name, email,)
+
+ @staticmethod
+ def _get_log_msg(svn_commit):
+ return svn_commit.get_log_msg()
+
+ def process_initial_project_commit(self, svn_commit):
+ self._mirror.start_commit(svn_commit.revnum)
+ self._mirror.end_commit()
+
+ def process_primary_commit(self, svn_commit):
+ author = self._get_author(svn_commit)
+ log_msg = self._get_log_msg(svn_commit)
+
+ lods = set()
+ for cvs_rev in svn_commit.get_cvs_items():
+ lods.add(cvs_rev.lod)
+ if len(lods) != 1:
+ raise InternalError('Commit affects %d LODs' % (len(lods),))
+ lod = lods.pop()
+
+ self._mirror.start_commit(svn_commit.revnum)
+ if isinstance(lod, Trunk):
+ # FIXME: is this correct?:
+ self.f.write('commit refs/heads/master\n')
+ else:
+ self.f.write('commit refs/heads/%s\n' % (lod.name,))
+ self.f.write(
+ 'mark :%d\n'
+ % (self._create_commit_mark(lod, svn_commit.revnum),)
+ )
+ self.f.write(
+ 'committer %s %d +0000\n' % (author, svn_commit.date,)
+ )
+ self.f.write('data %d\n' % (len(log_msg),))
+ self.f.write('%s\n' % (log_msg,))
+ for cvs_rev in svn_commit.get_cvs_items():
+ self.revision_writer.process_revision(cvs_rev, post_commit=False)
+
+ self.f.write('\n')
+ self._mirror.end_commit()
+
+ def process_post_commit(self, svn_commit):
+ author = self._get_author(svn_commit)
+ log_msg = self._get_log_msg(svn_commit)
+
+ source_lods = set()
+ for cvs_rev in svn_commit.cvs_revs:
+ source_lods.add(cvs_rev.lod)
+ if len(source_lods) != 1:
+ raise InternalError('Commit is from %d LODs' % (len(source_lods),))
+ source_lod = source_lods.pop()
+
+ self._mirror.start_commit(svn_commit.revnum)
+ # FIXME: is this correct?:
+ self.f.write('commit refs/heads/master\n')
+ self.f.write(
+ 'mark :%d\n'
+ % (self._create_commit_mark(None, svn_commit.revnum),)
+ )
+ self.f.write(
+ 'committer %s %d +0000\n' % (author, svn_commit.date,)
+ )
+ self.f.write('data %d\n' % (len(log_msg),))
+ self.f.write('%s\n' % (log_msg,))
+ self.f.write(
+ 'merge :%d\n'
+ % (self._get_source_mark(source_lod, svn_commit.revnum),)
+ )
+ for cvs_rev in svn_commit.cvs_revs:
+ self.revision_writer.process_revision(cvs_rev, post_commit=True)
+
+ self.f.write('\n')
+ self._mirror.end_commit()
+
+ def _get_source_groups(self, svn_commit):
+ """Return groups of sources for SVN_COMMIT.
+
+ SVN_COMMIT is an instance of SVNSymbolCommit. Yield tuples
+ (source_lod, svn_revnum, cvs_symbols) where source_lod is the line
+ of development and svn_revnum is the revision that should serve as
+ a source, and cvs_symbols is a list of CVSSymbolItems that can be
+ copied from that source. The groups are returned in arbitrary
+ order."""
+
+ # Get a map {CVSSymbol : SVNRevisionRange}:
+ range_map = self._symbolings_reader.get_range_map(svn_commit)
+
+ # range_map, split up into one map per LOD; i.e., {LOD :
+ # {CVSSymbol : SVNRevisionRange}}:
+ lod_range_maps = {}
+
+ for (cvs_symbol, range) in range_map.iteritems():
+ lod_range_map = lod_range_maps.get(range.source_lod)
+ if lod_range_map is None:
+ lod_range_map = {}
+ lod_range_maps[range.source_lod] = lod_range_map
+ lod_range_map[cvs_symbol] = range
+
+ # Sort the sources so that the branch that serves most often as
+ # parent is processed first:
+ lod_ranges = lod_range_maps.items()
+ lod_ranges.sort(
+ lambda (lod1,lod_range_map1),(lod2,lod_range_map2):
+ -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)
+ )
+
+ for (lod, lod_range_map) in lod_ranges:
+ while lod_range_map:
+ revision_scores = RevisionScores(lod_range_map.values())
+ (source_lod, revnum, score) = revision_scores.get_best_revnum()
+ assert source_lod == lod
+ cvs_symbols = []
+ for (cvs_symbol, range) in lod_range_map.items():
+ if revnum in range:
+ cvs_symbols.append(cvs_symbol)
+ del lod_range_map[cvs_symbol]
+ yield (lod, revnum, cvs_symbols)
+
+ def _get_all_files(self, node):
+ """Generate all of the CVSFiles under NODE."""
+
+ for cvs_path in node:
+ subnode = node[cvs_path]
+ if subnode is None:
+ yield cvs_path
+ else:
+ for sub_cvs_path in self._get_all_files(subnode):
+ yield sub_cvs_path
+
+ def _is_simple_copy(self, svn_commit, source_groups):
+ """Return True iff SVN_COMMIT can be created as a simple copy.
+
+ SVN_COMMIT is an SVNTagCommit. Return True iff it can be created
+ as a simple copy from an existing revision (i.e., if the fixup
+ branch can be avoided for this tag creation)."""
+
+ # The first requirement is that there be exactly one source:
+ if len(source_groups) != 1:
+ return False
+
+ (source_lod, svn_revnum, cvs_symbols) = source_groups[0]
+
+ # The second requirement is that the destination LOD not already
+ # exist:
+ try:
+ self._mirror.get_current_lod_directory(svn_commit.symbol)
+ except KeyError:
+ # The LOD doesn't already exist. This is good.
+ pass
+ else:
+ # The LOD already exists. It cannot be created by a copy.
+ return False
+
+ # The third requirement is that the source LOD contains exactly
+ # the same files as we need to add to the symbol:
+ try:
+ source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum)
+ except KeyError:
+ raise InternalError('Source %r does not exist' % (source_lod,))
+ return (
+ set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols])
+ == set(self._get_all_files(source_node))
+ )
+
+ def _get_source_mark(self, source_lod, revnum):
+ """Return the mark active on SOURCE_LOD at the end of REVNUM."""
+
+ modifications = self._marks[source_lod]
+ i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
+ (revnum, mark) = modifications[i]
+ return mark
+
+ def _process_symbol_commit(
+ self, svn_commit, git_branch, source_groups, mark
+ ):
+ author = self._get_author(svn_commit)
+ log_msg = self._get_log_msg(svn_commit)
+
+ self.f.write('commit %s\n' % (git_branch,))
+ self.f.write('mark :%d\n' % (mark,))
+ self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
+ self.f.write('data %d\n' % (len(log_msg),))
+ self.f.write('%s\n' % (log_msg,))
+
+ for (source_lod, source_revnum, cvs_symbols,) in source_groups:
+ self.f.write(
+ 'merge :%d\n'
+ % (self._get_source_mark(source_lod, source_revnum),)
+ )
+
+ for (source_lod, source_revnum, cvs_symbols,) in source_groups:
+ for cvs_symbol in cvs_symbols:
+ self.revision_writer.branch_file(cvs_symbol)
+
+ self.f.write('\n')
+
+ def process_branch_commit(self, svn_commit):
+ self._mirror.start_commit(svn_commit.revnum)
+ source_groups = list(self._get_source_groups(svn_commit))
+ for groups in get_chunks(source_groups, self.max_merges):
+ self._process_symbol_commit(
+ svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
+ groups,
+ self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
+ )
+ self._mirror.end_commit()
+
+ def _set_symbol(self, symbol, mark):
+ if isinstance(symbol, Branch):
+ category = 'heads'
+ elif isinstance(symbol, Tag):
+ category = 'tags'
+ else:
+ raise InternalError()
+ self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
+ self.f.write('from :%d\n' % (mark,))
+
+ def process_tag_commit(self, svn_commit):
+ # FIXME: For now we create a fixup branch with the same name as
+ # the tag, then the tag. We never delete the fixup branch. Also,
+ # a fixup branch is created even if the tag could be created from
+ # a single source.
+ self._mirror.start_commit(svn_commit.revnum)
+
+ source_groups = list(self._get_source_groups(svn_commit))
+ if self._is_simple_copy(svn_commit, source_groups):
+ (source_lod, source_revnum, cvs_symbols) = source_groups[0]
+ Log().debug(
+ '%s will be created via a simple copy from %s:r%d'
+ % (svn_commit.symbol, source_lod, source_revnum,)
+ )
+ mark = self._get_source_mark(source_lod, source_revnum)
+ self._set_symbol(svn_commit.symbol, mark)
+ else:
+ Log().debug(
+ '%s will be created via a fixup branch' % (svn_commit.symbol,)
+ )
+
+ # Create the fixup branch (which might involve making more than
+ # one commit):
+ for groups in get_chunks(source_groups, self.max_merges):
+ mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
+ self._process_symbol_commit(
+ svn_commit, FIXUP_BRANCH_NAME, groups, mark
+ )
+
+ # Store the mark of the last commit to the fixup branch as the
+ # value of the tag:
+ self._set_symbol(svn_commit.symbol, mark)
+ self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,))
+ self.f.write('\n')
+
+ self._mirror.end_commit()
+
+ def cleanup(self):
+ self.revision_writer.finish()
+ self._mirror.close()
+ self.f.close()
+ del self.f
+ self._symbolings_reader.close()
+ del self._symbolings_reader
+
+
diff --git a/cvs2svn_lib/git_revision_recorder.py b/cvs2svn_lib/git_revision_recorder.py
new file mode 100644
index 0000000..604f8ac
--- /dev/null
+++ b/cvs2svn_lib/git_revision_recorder.py
@@ -0,0 +1,114 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Write file contents to a stream of git-fast-import blobs."""
+
+import itertools
+
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.fulltext_revision_recorder import FulltextRevisionRecorder
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+class GitRevisionRecorder(FulltextRevisionRecorder):
+ """Output file revisions to git-fast-import."""
+
+ def __init__(self, blob_filename):
+ self.blob_filename = blob_filename
+
+ def start(self):
+ self.dump_file = open(self.blob_filename, 'wb')
+ self._mark_generator = KeyGenerator()
+
+ def start_file(self, cvs_file_items):
+ self._cvs_file_items = cvs_file_items
+
+ def _get_original_source(self, cvs_rev):
+ """Return the original source of the contents of CVS_REV.
+
+ Return the first non-delete CVSRevision with the same contents as
+ CVS_REV. 'First' here refers to deltatext order; i.e., the very
+ first revision is HEAD on trunk, then backwards to the root of a
+ branch, then out to the tip of a branch.
+
+ The candidates are all revisions along the CVS delta-dependency
+ chain until the next one that has a deltatext (inclusive). Of the
+ candidates, CVSRevisionDeletes are disqualified because, even
+ though CVS records their contents, it is impossible to extract
+ their fulltext using commands like 'cvs checkout -p'.
+
+ If there is no other CVSRevision that has the same content, return
+ CVS_REV itself."""
+
+ # Keep track of the "best" source CVSRevision found so far:
+ best_source_rev = None
+
+ for cvs_rev in itertools.chain(
+ [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)
+ ):
+ if not isinstance(cvs_rev, CVSRevisionDelete):
+ best_source_rev = cvs_rev
+
+ if cvs_rev.deltatext_exists:
+ break
+
+ return best_source_rev
+
+ def record_fulltext(self, cvs_rev, log, fulltext):
+ """Write the fulltext to a blob if it is original and not a delete.
+
+ The reason we go to this trouble is to avoid writing the same file
+ contents multiple times for a string of revisions that don't have
+ deltatexts (as, for example, happens with dead revisions and
+ imported revisions)."""
+
+ if isinstance(cvs_rev, CVSRevisionDelete):
+ # There is no need to record a delete revision, and its token
+ # will never be needed:
+ return None
+
+ source = self._get_original_source(cvs_rev)
+
+ if source.id == cvs_rev.id:
+ # Revision is its own source; write it out:
+ mark = self._mark_generator.gen_id()
+ self.dump_file.write('blob\n')
+ self.dump_file.write('mark :%d\n' % (mark,))
+ self.dump_file.write('data %d\n' % (len(fulltext),))
+ self.dump_file.write(fulltext)
+ self.dump_file.write('\n')
+ return mark
+ else:
+ # Return as revision_recorder_token the CVSRevision.id of the
+ # original source revision:
+ return source.revision_recorder_token
+
+ def finish_file(self, cvs_file_items):
+ # Determine the original source of each CVSSymbol, and store it as
+ # the symbol's revision_recorder_token.
+ for cvs_item in cvs_file_items.values():
+ if isinstance(cvs_item, CVSSymbol):
+ cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
+ cvs_item.revision_recorder_token = cvs_source.revision_recorder_token
+
+ del self._cvs_file_items
+
+ def finish(self):
+ self.dump_file.close()
+
+
diff --git a/cvs2svn_lib/git_run_options.py b/cvs2svn_lib/git_run_options.py
new file mode 100644
index 0000000..726b127
--- /dev/null
+++ b/cvs2svn_lib/git_run_options.py
@@ -0,0 +1,274 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2git run options."""
+
+
+import sys
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder
+from cvs2svn_lib.git_output_option import GitRevisionMarkWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.fulltext_revision_recorder \
+ import SimpleFulltextRevisionRecorderAdapter
+
+
+short_desc = 'convert a cvs repository into a git repository'
+
+synopsis = """\
+.B cvs2git
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
+.br
+.B cvs2git
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+long_desc = """\
+Create a new git repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the git
+repository, including such information as date of commit and id of the
+committer.
+.P
+The output of this program are a "blobfile" and a "dumpfile", which
+together can be loaded into a git repository using "git fast-import".
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert. This path doesn't have to be the
+top level directory of a CVS repository; it can point at a project
+within a repository, in which case only that project will be
+converted. This path or one of its parent directories has to contain
+a subdirectory called CVSROOT (though the CVSROOT directory can be
+empty).
+.P
+It is not possible directly to convert a CVS repository to which you
+only have remote access, but the FAQ describes tools that may be used
+to create a local copy of a remote CVS repository.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+ ('cvs', '1'),
+ ('git', '1'),
+ ('git-fast-import', '1'),
+ ]
+
+
+class GitRunOptions(RunOptions):
+ def __init__(self, progname, cmd_args, pass_manager):
+ Ctx().cross_project_commits = False
+ Ctx().cross_branch_commits = False
+ RunOptions.__init__(self, progname, cmd_args, pass_manager)
+
+ def _get_output_options_group(self):
+ group = RunOptions._get_output_options_group(self)
+
+ group.add_option(IncompatibleOption(
+ '--blobfile', type='string',
+ action='store',
+ help='path to which the "blob" data should be written',
+ man_help=(
+ 'Write the "blob" data (containing revision contents) to '
+ '\\fIpath\\fR.'
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(IncompatibleOption(
+ '--dumpfile', type='string',
+ action='store',
+ help='path to which the revision data should be written',
+ man_help=(
+ 'Write the revision data (branches and commits) to \\fIpath\\fR.'
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(ContextOption(
+ '--dry-run',
+ action='store_true',
+ help=(
+ 'do not create any output; just print what would happen.'
+ ),
+ man_help=(
+ 'Do not create any output; just print what would happen.'
+ ),
+ ))
+
+ return group
+
+ def _get_extraction_options_group(self):
+ group = RunOptions._get_extraction_options_group(self)
+
+ self.parser.set_default('use_cvs', False)
+ group.add_option(IncompatibleOption(
+ '--use-cvs',
+ action='store_true',
+ help=(
+ 'use CVS to extract revision contents (slower than '
+ '--use-rcs but more reliable) (default)'
+ ),
+ man_help=(
+ 'Use CVS to extract revision contents. This option is slower '
+ 'than \\fB--use-rcs\\fR but more reliable.'
+ ),
+ ))
+ self.parser.set_default('use_rcs', False)
+ group.add_option(IncompatibleOption(
+ '--use-rcs',
+ action='store_true',
+ help=(
+ 'use RCS to extract revision contents (faster than '
+ '--use-cvs but fails in some cases)'
+ ),
+ man_help=(
+ 'Use RCS \'co\' to extract revision contents. This option is '
+ 'faster than \\fB--use-cvs\\fR but fails in some cases.'
+ ),
+ ))
+
+ return group
+
+ def callback_manpage(self, option, opt_str, value, parser):
+ f = codecs.getwriter('utf_8')(sys.stdout)
+ ManWriter(
+ parser,
+ section='1',
+ date=datetime.date.today(),
+ source='Version %s' % (VERSION,),
+ manual='User Commands',
+ short_desc=short_desc,
+ synopsis=synopsis,
+ long_desc=long_desc,
+ files=files,
+ authors=authors,
+ see_also=see_also,
+ ).write_manpage(f)
+ sys.exit(0)
+
+ def process_io_options(self):
+ """Process input/output options.
+
+ Process options related to extracting data from the CVS repository
+ and writing to 'git fast-import'-formatted files."""
+
+ ctx = Ctx()
+ options = self.options
+
+ not_both(options.use_rcs, '--use-rcs',
+ options.use_cvs, '--use-cvs')
+
+ if options.use_rcs:
+ revision_reader = RCSRevisionReader(
+ co_executable=options.co_executable
+ )
+ else:
+ # --use-cvs is the default:
+ revision_reader = CVSRevisionReader(
+ cvs_executable=options.cvs_executable
+ )
+
+ if ctx.dry_run:
+ ctx.revision_recorder = NullRevisionRecorder()
+ else:
+ if not (options.blobfile and options.dumpfile):
+ raise FatalError("must pass '--blobfile' and '--dumpfile' options.")
+ ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter(
+ revision_reader,
+ GitRevisionRecorder(options.blobfile),
+ )
+
+ ctx.revision_excluder = NullRevisionExcluder()
+ ctx.revision_reader = None
+
+ ctx.output_option = GitOutputOption(
+ options.dumpfile,
+ GitRevisionMarkWriter(),
+ max_merges=None,
+ # Optional map from CVS author names to git author names:
+ author_transforms={}, # FIXME
+ )
+
+ def set_project(
+ self,
+ project_cvs_repos_path,
+ symbol_transforms=None,
+ symbol_strategy_rules=[],
+ ):
+ """Set the project to be converted.
+
+ If a project had already been set, overwrite it.
+
+ Most arguments are passed straight through to the Project
+ constructor. SYMBOL_STRATEGY_RULES is an iterable of
+ SymbolStrategyRules that will be applied to symbols in this
+ project."""
+
+ symbol_strategy_rules = list(symbol_strategy_rules)
+
+ project = Project(
+ 0,
+ project_cvs_repos_path,
+ symbol_transforms=symbol_transforms,
+ )
+
+ self.projects = [project]
+ self.project_symbol_strategy_rules = [symbol_strategy_rules]
+
+ def process_options(self):
+ # Consistency check for options and arguments.
+ if len(self.args) == 0:
+ self.usage()
+ sys.exit(1)
+
+ if len(self.args) > 1:
+ Log().error(error_prefix + ": must pass only one CVS repository.\n")
+ self.usage()
+ sys.exit(1)
+
+ cvsroot = self.args[0]
+
+ self.process_io_options()
+ self.process_symbol_strategy_options()
+ self.process_property_setter_options()
+
+ # Create the project:
+ self.set_project(
+ cvsroot,
+ symbol_transforms=self.options.symbol_transforms,
+ symbol_strategy_rules=self.options.symbol_strategy_rules,
+ )
+
+
diff --git a/cvs2svn_lib/key_generator.py b/cvs2svn_lib/key_generator.py
new file mode 100644
index 0000000..d580d6b
--- /dev/null
+++ b/cvs2svn_lib/key_generator.py
@@ -0,0 +1,45 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the KeyGenerator class."""
+
+
+class KeyGenerator:
+ """Generate a series of unique keys."""
+
+ def __init__(self, first_id=1):
+ """Initialize a KeyGenerator with the specified FIRST_ID.
+
+ FIRST_ID should be an int or long, and the generated keys will be
+ of the same type."""
+
+ self._key_base = first_id
+ self._last_id = None
+
+ def gen_id(self):
+ """Generate and return a previously-unused key, as an integer."""
+
+ self._last_id = self._key_base
+ self._key_base += 1
+
+ return self._last_id
+
+ def get_last_id(self):
+ """Return the last id that was generated, as an integer."""
+
+ return self._last_id
+
+
diff --git a/cvs2svn_lib/log.py b/cvs2svn_lib/log.py
new file mode 100644
index 0000000..798350c
--- /dev/null
+++ b/cvs2svn_lib/log.py
@@ -0,0 +1,174 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a simple logging facility for cvs2svn."""
+
+
+import sys
+import time
+import threading
+
+
+class Log:
+ """A Simple logging facility.
+
+ If self.log_level is DEBUG or higher, each line will be timestamped
+ with the number of wall-clock seconds since the time when this
+ module was first imported.
+
+ If self.use_timestamps is True, each line will be timestamped with a
+ human-readable clock time.
+
+ The public methods of this class are thread-safe.
+
+ This class is a Borg; see
+ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
+
+ # These constants represent the log levels that this class supports.
+ # The increase_verbosity() and decrease_verbosity() methods rely on
+ # these constants being consecutive integers:
+ ERROR = -2
+ WARN = -1
+ QUIET = 0
+ NORMAL = 1
+ VERBOSE = 2
+ DEBUG = 3
+
+ start_time = time.time()
+
+ __shared_state = {}
+
+ def __init__(self):
+ self.__dict__ = self.__shared_state
+ if self.__dict__:
+ return
+
+ self.log_level = Log.NORMAL
+
+ # Set this to True if you want to see timestamps on each line output.
+ self.use_timestamps = False
+
+ # The output file to use for errors:
+ self._err = sys.stderr
+
+ # The output file to use for lower-priority messages:
+ self._out = sys.stdout
+
+ # Lock to serialize writes to the log:
+ self.lock = threading.Lock()
+
+ def increase_verbosity(self):
+ self.lock.acquire()
+ try:
+ self.log_level = min(self.log_level + 1, Log.DEBUG)
+ finally:
+ self.lock.release()
+
+ def decrease_verbosity(self):
+ self.lock.acquire()
+ try:
+ self.log_level = max(self.log_level - 1, Log.ERROR)
+ finally:
+ self.lock.release()
+
+ def is_on(self, level):
+ """Return True iff messages at the specified LEVEL are currently on.
+
+ LEVEL should be one of the constants Log.WARN, Log.QUIET, etc."""
+
+ return self.log_level >= level
+
+ def _timestamp(self):
+ """Return a timestamp if needed, as a string with a trailing space."""
+
+ retval = []
+
+ if self.log_level >= Log.DEBUG:
+ retval.append('%f: ' % (time.time() - self.start_time,))
+
+ if self.use_timestamps:
+ retval.append(time.strftime('[%Y-%m-%d %I:%M:%S %Z] - '))
+
+ return ''.join(retval)
+
+ def _write(self, out, *args):
+ """Write a message to OUT.
+
+ If there are multiple ARGS, they will be separated by spaces. If
+ there are multiple lines, they will be output one by one with the
+ same timestamp prefix."""
+
+ timestamp = self._timestamp()
+ s = ' '.join(map(str, args))
+ lines = s.split('\n')
+ if lines and not lines[-1]:
+ del lines[-1]
+
+ self.lock.acquire()
+ try:
+ for s in lines:
+ out.write('%s%s\n' % (timestamp, s,))
+ # Ensure that log output doesn't get out-of-order with respect to
+ # stderr output.
+ out.flush()
+ finally:
+ self.lock.release()
+
+ def write(self, *args):
+ """Write a message to SELF._out.
+
+ This is a public method to use for writing to the output log
+ unconditionally."""
+
+ self._write(self._out, *args)
+
+ def error(self, *args):
+ """Log a message at the ERROR level."""
+
+ if self.is_on(Log.ERROR):
+ self._write(self._err, *args)
+
+ def warn(self, *args):
+ """Log a message at the WARN level."""
+
+ if self.is_on(Log.WARN):
+ self._write(self._out, *args)
+
+ def quiet(self, *args):
+ """Log a message at the QUIET level."""
+
+ if self.is_on(Log.QUIET):
+ self._write(self._out, *args)
+
+ def normal(self, *args):
+ """Log a message at the NORMAL level."""
+
+ if self.is_on(Log.NORMAL):
+ self._write(self._out, *args)
+
+ def verbose(self, *args):
+ """Log a message at the VERBOSE level."""
+
+ if self.is_on(Log.VERBOSE):
+ self._write(self._out, *args)
+
+ def debug(self, *args):
+ """Log a message at the DEBUG level."""
+
+ if self.is_on(Log.DEBUG):
+ self._write(self._out, *args)
+
+
diff --git a/cvs2svn_lib/main.py b/cvs2svn_lib/main.py
new file mode 100644
index 0000000..492c49e
--- /dev/null
+++ b/cvs2svn_lib/main.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python2
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+import os
+import errno
+import gc
+
+try:
+ # Try to get access to a bunch of encodings for use with --encoding.
+ # See http://cjkpython.i18n.org/ for details.
+ import iconv_codec
+except ImportError:
+ pass
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.svn_run_options import SVNRunOptions
+from cvs2svn_lib.git_run_options import GitRunOptions
+from cvs2svn_lib.bzr_run_options import BzrRunOptions
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.pass_manager import PassManager
+from cvs2svn_lib.passes import passes
+
+
+def main(progname, run_options, pass_manager):
+ # Disable garbage collection, as we try not to create any circular
+ # data structures:
+ gc.disable()
+
+ # Convenience var, so we don't have to keep instantiating this Borg.
+ ctx = Ctx()
+
+ # Make sure the tmp directory exists. Note that we don't check if
+ # it's empty -- we want to be able to use, for example, "." to hold
+ # tempfiles. But if we *did* want check if it were empty, we'd do
+ # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
+ if not os.path.exists(ctx.tmpdir):
+ erase_tmpdir = True
+ os.mkdir(ctx.tmpdir)
+ elif not os.path.isdir(ctx.tmpdir):
+ raise FatalError(
+ "cvs2svn tried to use '%s' for temporary files, but that path\n"
+ " exists and is not a directory. Please make it be a directory,\n"
+ " or specify some other directory for temporary files."
+ % (ctx.tmpdir,))
+ else:
+ erase_tmpdir = False
+
+ # But do lock the tmpdir, to avoid process clash.
+ try:
+ os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
+ except OSError, e:
+ if e.errno == errno.EACCES:
+ raise FatalError("Permission denied:"
+ + " No write access to directory '%s'." % ctx.tmpdir)
+ if e.errno == errno.EEXIST:
+ raise FatalError(
+ "cvs2svn is using directory '%s' for temporary files, but\n"
+ " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
+ " cvs2svn process is currently using '%s' as its temporary\n"
+ " workspace. If you are certain that is not the case,\n"
+ " then remove the '%s/cvs2svn.lock' subdirectory."
+ % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
+ raise
+
+ try:
+ if run_options.profiling:
+ import hotshot
+ prof = hotshot.Profile('cvs2svn.hotshot')
+ prof.runcall(pass_manager.run, run_options)
+ prof.close()
+ else:
+ pass_manager.run(run_options)
+ finally:
+ try:
+ os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
+ except:
+ pass
+
+ if erase_tmpdir:
+ try:
+ os.rmdir(ctx.tmpdir)
+ except:
+ pass
+
+
+def svn_main(progname, cmd_args):
+ pass_manager = PassManager(passes)
+ run_options = SVNRunOptions(progname, cmd_args, pass_manager)
+ main(progname, run_options, pass_manager)
+
+
+def git_main(progname, cmd_args):
+ pass_manager = PassManager(passes)
+ run_options = GitRunOptions(progname, cmd_args, pass_manager)
+ main(progname, run_options, pass_manager)
+
+
+def bzr_main(progname, cmd_args):
+ pass_manager = PassManager(passes)
+ run_options = BzrRunOptions(progname, cmd_args, pass_manager)
+ main(progname, run_options, pass_manager)
+
+
diff --git a/cvs2svn_lib/man_writer.py b/cvs2svn_lib/man_writer.py
new file mode 100644
index 0000000..3cca8c9
--- /dev/null
+++ b/cvs2svn_lib/man_writer.py
@@ -0,0 +1,197 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the ManWriter class for outputting manpages."""
+
+
+import datetime
+import optparse
+import re
+
+
+whitespace_re = re.compile(r'\s+')
+
+def wrap(s, width=70):
+ # Convert all whitespace substrings to single spaces:
+ s = whitespace_re.sub(' ', s)
+ s = s.strip()
+ retval = []
+ while s:
+ if len(s) <= width:
+ retval.append(s)
+ break
+ i = s.rfind(' ', 0, width + 1)
+ if i == -1:
+ # There were no spaces within the first width+1 characters; break
+ # at the next space after width:
+ i = s.find(' ', width + 1)
+ if i == -1:
+ # There were no spaces in s at all.
+ retval.append(s)
+ break
+
+ retval.append(s[:i].rstrip())
+ s = s[i+1:].lstrip()
+
+ for (i,line) in enumerate(retval):
+ if line.startswith('\'') or line.startswith('.'):
+ # These are roff control characters and have to be escaped:
+ retval[i] = '\\' + line
+
+ return '\n'.join(retval)
+
+
+class ManOption(optparse.Option):
+ """An optparse.Option that holds an explicit string for the man page."""
+
+ def __init__(self, *args, **kw):
+ self.man_help = kw.pop('man_help')
+ optparse.Option.__init__(self, *args, **kw)
+
+
+class ManWriter(object):
+ def __init__(
+ self,
+ parser,
+ section, date, source, manual,
+ short_desc, synopsis, long_desc, files, authors, see_also,
+ ):
+ self.parser = parser
+ self.section = section
+ self.date = date
+ self.source = source
+ self.manual = manual
+ self.short_desc = short_desc
+ self.synopsis = synopsis
+ self.long_desc = long_desc
+ self.files = files
+ self.authors = authors
+ self.see_also = see_also
+
+ def write_title(self, f):
+ f.write('.\\" Process this file with\n')
+ f.write(
+ '.\\" groff -man -Tascii %s.%s\n' % (
+ self.parser.get_prog_name(),
+ self.section,
+ )
+ )
+ f.write(
+ '.TH %s "%s" "%s" "%s" "%s"\n' % (
+ self.parser.get_prog_name().upper(),
+ self.section,
+ self.date.strftime('%b %d, %Y'),
+ self.source,
+ self.manual,
+ )
+ )
+
+ def write_name(self, f):
+ f.write('.SH "NAME"\n')
+ f.write(
+ '%s \- %s\n' % (
+ self.parser.get_prog_name(),
+ self.short_desc,
+ )
+ )
+
+ def write_synopsis(self, f):
+ f.write('.SH "SYNOPSIS"\n')
+ f.write(self.synopsis)
+
+ def write_description(self, f):
+ f.write('.SH "DESCRIPTION"\n')
+ f.write(self.long_desc)
+
+ def _get_option_strings(self, option):
+ """Return a list of option strings formatted with their metavariables.
+
+ This method is very similar to
+ optparse.HelpFormatter.format_option_strings().
+
+ """
+
+ if option.takes_value():
+ metavar = (option.metavar or option.dest).lower()
+ short_opts = [
+ '\\fB%s\\fR \\fI%s\\fR' % (opt, metavar)
+ for opt in option._short_opts
+ ]
+ long_opts = [
+ '\\fB%s\\fR=\\fI%s\\fR' % (opt, metavar)
+ for opt in option._long_opts
+ ]
+ else:
+ short_opts = [
+ '\\fB%s\\fR' % (opt,)
+ for opt in option._short_opts
+ ]
+ long_opts = [
+ '\\fB%s\\fR' % (opt,)
+ for opt in option._long_opts
+ ]
+
+ return short_opts + long_opts
+
+ def _write_option(self, f, option):
+ man_help = getattr(option, 'man_help', option.help)
+
+ if man_help is not optparse.SUPPRESS_HELP:
+ man_help = wrap(man_help)
+ f.write('.IP "%s"\n' % (', '.join(self._get_option_strings(option)),))
+ f.write('%s\n' % (man_help,))
+
+ def _write_container_help(self, f, container):
+ for option in container.option_list:
+ if option.help is not optparse.SUPPRESS_HELP:
+ self._write_option(f, option)
+
+ def write_options(self, f):
+ f.write('.SH "OPTIONS"\n')
+ if self.parser.option_list:
+ (self._write_container_help(f, self.parser))
+ for group in self.parser.option_groups:
+ f.write('.SH "%s"\n' % (group.title.upper(),))
+ if group.description:
+ f.write(self.format_description(group.description) + '\n')
+ self._write_container_help(f, group)
+
+ def write_files(self, f):
+ f.write('.SH "FILES"\n')
+ f.write(self.files)
+
+ def write_authors(self, f):
+ f.write('.SH "AUTHORS"\n')
+ f.write(self.authors)
+
+ def write_see_also(self, f):
+ f.write('.SH "SEE ALSO"\n')
+ f.write(', '.join([
+ '%s(%s)' % (name, section,)
+ for (name, section,) in self.see_also
+ ]) + '\n')
+
+ def write_manpage(self, f):
+ self.write_title(f)
+ self.write_name(f)
+ self.write_synopsis(f)
+ self.write_description(f)
+ self.write_options(f)
+ self.write_files(f)
+ self.write_authors(f)
+ self.write_see_also(f)
+
+
diff --git a/cvs2svn_lib/metadata.py b/cvs2svn_lib/metadata.py
new file mode 100644
index 0000000..6cd1337
--- /dev/null
+++ b/cvs2svn_lib/metadata.py
@@ -0,0 +1,26 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Represent CVSRevision metadata."""
+
+
+class Metadata(object):
+ def __init__(self, id, author, log_msg):
+ self.id = id
+ self.author = author
+ self.log_msg = log_msg
+
+
diff --git a/cvs2svn_lib/metadata_database.py b/cvs2svn_lib/metadata_database.py
new file mode 100644
index 0000000..de01920
--- /dev/null
+++ b/cvs2svn_lib/metadata_database.py
@@ -0,0 +1,102 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to manage CVSRevision metadata."""
+
+
+try:
+ from hashlib import sha1
+except ImportError:
+ from sha import new as sha1
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.metadata import Metadata
+
+
+def MetadataDatabase(store_filename, index_table_filename, mode):
+ """A database to store Metadata instances that describe CVSRevisions.
+
+ This database manages a map
+
+ id -> Metadata instance
+
+ where id is a unique identifier for the metadata."""
+
+ return IndexedDatabase(
+ store_filename, index_table_filename,
+ mode, PrimedPickleSerializer((Metadata,)),
+ )
+
+
+class MetadataLogger:
+ """Store and generate IDs for the metadata associated with CVSRevisions.
+
+ We want CVSRevisions that might be able to be combined to have the
+ same metadata ID, so we want a one-to-one relationship id <->
+ metadata. We could simply construct a map {metadata : id}, but the
+ map would grow too large. Therefore, we generate a digest
+ containing the significant parts of the metadata, and construct a
+ map {digest : id}.
+
+ To get the ID for a new set of metadata, we first create the digest.
+ If there is already an ID registered for that digest, we simply
+ return it. If not, we generate a new ID, store the metadata in the
+ metadata database under that ID, record the mapping {digest : id},
+ and return the new id.
+
+ What metadata is included in the digest? The author, log_msg,
+ project_id (if Ctx().cross_project_commits is not set), and
+ branch_name (if Ctx().cross_branch_commits is not set)."""
+
+ def __init__(self, metadata_db):
+ self._metadata_db = metadata_db
+
+ # A map { digest : id }:
+ self._digest_to_id = {}
+
+ # A key_generator to generate keys for metadata that haven't been
+ # seen yet:
+ self.key_generator = KeyGenerator()
+
+ def store(self, project, branch_name, author, log_msg):
+ """Store the metadata and return its id.
+
+ Locate the record for a commit with the specified (PROJECT,
+ BRANCH_NAME, AUTHOR, LOG_MSG) and return its id. (Depending on
+ policy, not all of these items are necessarily used when creating
+ the unique id.) If there is no such record, create one and return
+ its newly-generated id."""
+
+ key = [author, log_msg]
+ if not Ctx().cross_project_commits:
+ key.append('%x' % project.id)
+ if not Ctx().cross_branch_commits:
+ key.append(branch_name or '')
+
+ digest = sha1('\0'.join(key)).digest()
+ try:
+ # See if it is already known:
+ return self._digest_to_id[digest]
+ except KeyError:
+ id = self.key_generator.gen_id()
+ self._digest_to_id[digest] = id
+ self._metadata_db[id] = Metadata(id, author, log_msg)
+ return id
+
+
diff --git a/cvs2svn_lib/openings_closings.py b/cvs2svn_lib/openings_closings.py
new file mode 100644
index 0000000..b1d4093
--- /dev/null
+++ b/cvs2svn_lib/openings_closings.py
@@ -0,0 +1,236 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to keep track of symbol openings/closings."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.svn_revision_range import SVNRevisionRange
+
+
+# Constants used in SYMBOL_OPENINGS_CLOSINGS
+OPENING = 'O'
+CLOSING = 'C'
+
+
+class SymbolingsLogger:
+ """Manage the file that contains lines for symbol openings and closings.
+
+ This data will later be used to determine valid SVNRevision ranges
+ from which a file can be copied when creating a branch or tag in
+ Subversion. Do this by finding 'Openings' and 'Closings' for each
+ file copied onto a branch or tag.
+
+ An 'Opening' is the beginning of the lifetime of the source
+ (CVSRevision or CVSBranch) from which a given CVSSymbol sprouts.
+
+ The 'Closing' is the SVN revision when the source is deleted or
+ overwritten.
+
+ For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
+ obviously sprouts from revision 1.2. Therefore, the SVN revision
+ when 1.2 is committed is the opening for BEE on path 'foo.c', and
+ the SVN revision when 1.3 is committed is the closing for BEE on
+ path 'foo.c'. Note that there may be many revisions chronologically
+ between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
+ perhaps even including on branch BEE itself. But 1.3 is the next
+ revision *on the same line* as 1.2, that is why it is the closing
+ revision for those symbolic names of which 1.2 is the opening.
+
+ The reason for doing all this hullabaloo is (1) to determine what
+ range of SVN revision numbers can be used as the source of a copy of
+ a particular file onto a branch/tag, and (2) to minimize the number
+ of copies and deletes per creation by choosing source SVN revision
+ numbers that can be used for as many files as possible.
+
+ For example, revisions 1.2 and 1.3 of foo.c might correspond to
+ revisions 17 and 30 in Subversion. That means that when creating
+ branch BEE, foo.c has to be copied from a Subversion revision number
+ in the range 17 <= revnum < 30. Now if there were another file,
+ 'bar.c', in the same directory, and 'bar.c's opening and closing for
+ BEE correspond to revisions 24 and 39 in Subversion, then we can
+ kill two birds with one stone by copying the whole directory from
+ somewhere in the range 24 <= revnum < 30."""
+
+ def __init__(self):
+ self.symbolings = open(
+ artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), 'w')
+
+ def log_revision(self, cvs_rev, svn_revnum):
+ """Log any openings and closings found in CVS_REV."""
+
+ for (symbol_id, cvs_symbol_id,) in cvs_rev.opened_symbols:
+ self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
+
+ for (symbol_id, cvs_symbol_id) in cvs_rev.closed_symbols:
+ self._log_closing(symbol_id, cvs_symbol_id, svn_revnum)
+
+ def log_branch_revision(self, cvs_branch, svn_revnum):
+ """Log any openings and closings found in CVS_BRANCH."""
+
+ for (symbol_id, cvs_symbol_id,) in cvs_branch.opened_symbols:
+ self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
+
+ def _log(self, symbol_id, cvs_symbol_id, svn_revnum, type):
+ """Log an opening or closing to self.symbolings.
+
+ Write out a single line to the symbol_openings_closings file
+ representing that SVN_REVNUM is either the opening or closing
+ (TYPE) of CVS_SYMBOL_ID for SYMBOL_ID.
+
+ TYPE should be one of the following constants: OPENING or CLOSING."""
+
+ self.symbolings.write(
+ '%x %d %s %x\n' % (symbol_id, svn_revnum, type, cvs_symbol_id)
+ )
+
+ def _log_opening(self, symbol_id, cvs_symbol_id, svn_revnum):
+ """Log an opening to self.symbolings.
+
+ See _log() for more information."""
+
+ self._log(symbol_id, cvs_symbol_id, svn_revnum, OPENING)
+
+ def _log_closing(self, symbol_id, cvs_symbol_id, svn_revnum):
+ """Log a closing to self.symbolings.
+
+ See _log() for more information."""
+
+ self._log(symbol_id, cvs_symbol_id, svn_revnum, CLOSING)
+
+ def close(self):
+ self.symbolings.close()
+ self.symbolings = None
+
+
+class SymbolingsReader:
+ """Provides an interface to retrieve symbol openings and closings.
+
+ This class accesses the SYMBOL_OPENINGS_CLOSINGS_SORTED file and the
+ SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and returning
+ the correct opening and closing Subversion revision numbers for a
+ given symbolic name and SVN revision number range."""
+
+ def __init__(self):
+ """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
+ reads the offsets database into memory."""
+
+ self.symbolings = open(
+ artifact_manager.get_temp_file(
+ config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+ 'r')
+ # The offsets_db is really small, and we need to read and write
+ # from it a fair bit, so suck it into memory
+ offsets_db = file(
+ artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'rb')
+ # A map from symbol_id to offset. The values of this map are
+ # incremented as the openings and closings for a symbol are
+ # consumed.
+ self.offsets = cPickle.load(offsets_db)
+ offsets_db.close()
+
+ def close(self):
+ self.symbolings.close()
+ del self.symbolings
+ del self.offsets
+
+ def _generate_lines(self, symbol):
+ """Generate the lines for SYMBOL.
+
+ SYMBOL is a TypedSymbol instance. Yield the tuple (revnum, type,
+ cvs_symbol_id) for all openings and closings for SYMBOL."""
+
+ if symbol.id in self.offsets:
+ # Set our read offset for self.symbolings to the offset for this
+ # symbol:
+ self.symbolings.seek(self.offsets[symbol.id])
+
+ while True:
+ line = self.symbolings.readline().rstrip()
+ if not line:
+ break
+ (id, revnum, type, cvs_symbol_id) = line.split()
+ id = int(id, 16)
+ revnum = int(revnum)
+ if id != symbol.id:
+ break
+ cvs_symbol_id = int(cvs_symbol_id, 16)
+
+ yield (revnum, type, cvs_symbol_id)
+
+ def get_range_map(self, svn_symbol_commit):
+ """Return the ranges of all CVSSymbols in SVN_SYMBOL_COMMIT.
+
+ Return a map { CVSSymbol : SVNRevisionRange }."""
+
+ # A map { cvs_symbol_id : CVSSymbol }:
+ cvs_symbol_map = {}
+ for cvs_symbol in svn_symbol_commit.get_cvs_items():
+ cvs_symbol_map[cvs_symbol.id] = cvs_symbol
+
+ range_map = {}
+
+ for (revnum, type, cvs_symbol_id) \
+ in self._generate_lines(svn_symbol_commit.symbol):
+ cvs_symbol = cvs_symbol_map.get(cvs_symbol_id)
+ if cvs_symbol is None:
+ # This CVSSymbol is not part of SVN_SYMBOL_COMMIT.
+ continue
+ range = range_map.get(cvs_symbol)
+ if type == OPENING:
+ if range is not None:
+ raise InternalError(
+ 'Multiple openings logged for %r' % (cvs_symbol,)
+ )
+ range_map[cvs_symbol] = SVNRevisionRange(
+ cvs_symbol.source_lod, revnum
+ )
+ else:
+ if range is None:
+ raise InternalError(
+ 'Closing precedes opening for %r' % (cvs_symbol,)
+ )
+ if range.closing_revnum is not None:
+ raise InternalError(
+ 'Multiple closings logged for %r' % (cvs_symbol,)
+ )
+ range.add_closing(revnum)
+
+ # Make sure that all CVSSymbols are accounted for, and adjust the
+ # closings to be not later than svn_symbol_commit.revnum.
+ for cvs_symbol in cvs_symbol_map.itervalues():
+ try:
+ range = range_map[cvs_symbol]
+ except KeyError:
+ raise InternalError('No opening for %s' % (cvs_symbol,))
+
+ if range.opening_revnum >= svn_symbol_commit.revnum:
+ raise InternalError(
+ 'Opening in r%d not ready for %s in r%d'
+ % (range.opening_revnum, cvs_symbol, svn_symbol_commit.revnum,)
+ )
+
+ if range.closing_revnum is not None \
+ and range.closing_revnum > svn_symbol_commit.revnum:
+ range.closing_revnum = None
+
+ return range_map
+
+
diff --git a/cvs2svn_lib/output_option.py b/cvs2svn_lib/output_option.py
new file mode 100644
index 0000000..70419e6
--- /dev/null
+++ b/cvs2svn_lib/output_option.py
@@ -0,0 +1,85 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that hold the cvs2svn output options."""
+
+
+class OutputOption:
+ """Represents an output choice for a run of cvs2svn."""
+
+ def register_artifacts(self, which_pass):
+ """Register artifacts that will be needed for this output option.
+
+ WHICH_PASS is the pass that will call our callbacks, so it should
+ be used to do the registering (e.g., call
+ WHICH_PASS.register_temp_file() and/or
+ WHICH_PASS.register_temp_file_needed())."""
+
+ pass
+
+ def check(self):
+ """Check that the options stored in SELF are sensible.
+
+ This might including the existence of a repository on disk, etc."""
+
+ raise NotImplementedError()
+
+ def check_symbols(self, symbol_map):
+ """Check that the symbols in SYMBOL_MAP are OK for this output option.
+
+ SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)},
+ indicating how each symbol is planned to be converted. Raise a
+ FatalError if the symbol plan is not acceptable for this output
+ option."""
+
+ raise NotImplementedError()
+
+ def setup(self, svn_rev_count):
+ """Prepare this output option."""
+
+ raise NotImplementedError()
+
+ def process_initial_project_commit(self, svn_commit):
+ """Process SVN_COMMIT, which is an SVNInitialProjectCommit."""
+
+ raise NotImplementedError()
+
+ def process_primary_commit(self, svn_commit):
+ """Process SVN_COMMIT, which is an SVNPrimaryCommit."""
+
+ raise NotImplementedError()
+
+ def process_post_commit(self, svn_commit):
+ """Process SVN_COMMIT, which is an SVNPostCommit."""
+
+ raise NotImplementedError()
+
+ def process_branch_commit(self, svn_commit):
+ """Process SVN_COMMIT, which is an SVNBranchCommit."""
+
+ raise NotImplementedError()
+
+ def process_tag_commit(self, svn_commit):
+ """Process SVN_COMMIT, which is an SVNTagCommit."""
+
+ raise NotImplementedError()
+
+ def cleanup(self):
+ """Perform any required cleanup related to this output option."""
+
+ raise NotImplementedError()
+
+
diff --git a/cvs2svn_lib/pass_manager.py b/cvs2svn_lib/pass_manager.py
new file mode 100644
index 0000000..90fa2dc
--- /dev/null
+++ b/cvs2svn_lib/pass_manager.py
@@ -0,0 +1,215 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains tools to manage the passes of a conversion."""
+
+
+import time
+import gc
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.stats_keeper import StatsKeeper
+from cvs2svn_lib.stats_keeper import read_stats_keeper
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class InvalidPassError(FatalError):
+ def __init__(self, msg):
+ FatalError.__init__(
+ self, msg + '\nUse --help-passes for more information.')
+
+
+def check_for_garbage():
+ # We've turned off the garbage collector because we shouldn't
+ # need it (we don't create circular dependencies) and because it
+ # is therefore a waste of time. So here we check for any
+ # unreachable objects and generate a debug-level warning if any
+ # occur:
+ gc.set_debug(gc.DEBUG_SAVEALL)
+ gc_count = gc.collect()
+ if gc_count:
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'INTERNAL: %d unreachable object(s) were garbage collected:'
+ % (gc_count,)
+ )
+ for g in gc.garbage:
+ Log().debug(' %s' % (g,))
+ del gc.garbage[:]
+
+
+class Pass(object):
+ """Base class for one step of the conversion."""
+
+ def __init__(self):
+ # By default, use the pass object's class name as the pass name:
+ self.name = self.__class__.__name__
+
+ def register_artifacts(self):
+ """Register artifacts (created and needed) in artifact_manager."""
+
+ raise NotImplementedError
+
+ def _register_temp_file(self, basename):
+ """Helper method; for brevity only."""
+
+ artifact_manager.register_temp_file(basename, self)
+
+ def _register_temp_file_needed(self, basename):
+ """Helper method; for brevity only."""
+
+ artifact_manager.register_temp_file_needed(basename, self)
+
+ def run(self, run_options, stats_keeper):
+ """Carry out this step of the conversion.
+
+ RUN_OPTIONS is an instance of RunOptions. STATS_KEEPER is an
+ instance of StatsKeeper."""
+
+ raise NotImplementedError
+
+
+class PassManager:
+ """Manage a list of passes that can be executed separately or all at once.
+
+ Passes are numbered starting with 1."""
+
+ def __init__(self, passes):
+ """Construct a PassManager with the specified PASSES.
+
+ Internally, passes are numbered starting with 1. So PASSES[0] is
+ considered to be pass number 1."""
+
+ self.passes = passes
+ self.num_passes = len(self.passes)
+
+ def get_pass_number(self, pass_name, default=None):
+ """Return the number of the pass indicated by PASS_NAME.
+
+ PASS_NAME should be a string containing the name or number of a
+ pass. If a number, it should be in the range 1 <= value <=
+ self.num_passes. Return an integer in the same range. If
+ PASS_NAME is the empty string and DEFAULT is specified, return
+ DEFAULT. Raise InvalidPassError if PASS_NAME cannot be converted
+ into a valid pass number."""
+
+ if not pass_name and default is not None:
+ assert 1 <= default <= self.num_passes
+ return default
+
+ try:
+ # Does pass_name look like an integer?
+ pass_number = int(pass_name)
+ if not 1 <= pass_number <= self.num_passes:
+ raise InvalidPassError(
+ 'illegal value (%d) for pass number. Must be 1 through %d or\n'
+ 'the name of a known pass.'
+ % (pass_number,self.num_passes,))
+ return pass_number
+ except ValueError:
+ # Is pass_name the name of one of the passes?
+ for (i, the_pass) in enumerate(self.passes):
+ if the_pass.name == pass_name:
+ return i + 1
+ raise InvalidPassError('Unknown pass name (%r).' % (pass_name,))
+
+ def run(self, run_options):
+ """Run the specified passes, one after another.
+
+ RUN_OPTIONS will be passed to the Passes' run() methods.
+ RUN_OPTIONS.start_pass is the number of the first pass that should
+ be run. RUN_OPTIONS.end_pass is the number of the last pass that
+ should be run. It must be that 1 <= RUN_OPTIONS.start_pass <=
+ RUN_OPTIONS.end_pass <= self.num_passes."""
+
+ # Convert start_pass and end_pass into the indices of the passes
+ # to execute, using the Python index range convention (i.e., first
+ # pass executed and first pass *after* the ones that should be
+ # executed).
+ index_start = run_options.start_pass - 1
+ index_end = run_options.end_pass
+
+ # Inform the artifact manager when artifacts are created and used:
+ for (i, the_pass) in enumerate(self.passes):
+ the_pass.register_artifacts()
+ # Each pass creates a new version of the statistics file:
+ artifact_manager.register_temp_file(
+ config.STATISTICS_FILE % (i + 1,), the_pass
+ )
+ if i != 0:
+ # Each pass subsequent to the first reads the statistics file
+ # from the preceding pass:
+ artifact_manager.register_temp_file_needed(
+ config.STATISTICS_FILE % (i + 1 - 1,), the_pass
+ )
+
+ # Tell the artifact manager about passes that are being skipped this run:
+ for the_pass in self.passes[0:index_start]:
+ artifact_manager.pass_skipped(the_pass)
+
+ start_time = time.time()
+ for i in range(index_start, index_end):
+ the_pass = self.passes[i]
+ Log().quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,))
+ artifact_manager.pass_started(the_pass)
+
+ if i == 0:
+ stats_keeper = StatsKeeper()
+ else:
+ stats_keeper = read_stats_keeper(
+ artifact_manager.get_temp_file(
+ config.STATISTICS_FILE % (i + 1 - 1,)
+ )
+ )
+
+ the_pass.run(run_options, stats_keeper)
+ end_time = time.time()
+ stats_keeper.log_duration_for_pass(
+ end_time - start_time, i + 1, the_pass.name
+ )
+ Log().normal(stats_keeper.single_pass_timing(i + 1))
+ stats_keeper.archive(
+ artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,))
+ )
+ start_time = end_time
+ Ctx().clean()
+ # Allow the artifact manager to clean up artifacts that are no
+ # longer needed:
+ artifact_manager.pass_done(the_pass, Ctx().skip_cleanup)
+
+ check_for_garbage()
+
+ # Tell the artifact manager about passes that are being deferred:
+ for the_pass in self.passes[index_end:]:
+ artifact_manager.pass_deferred(the_pass)
+
+ Log().quiet(stats_keeper)
+ Log().normal(stats_keeper.timings())
+
+ # Consistency check:
+ artifact_manager.check_clean()
+
+ def help_passes(self):
+ """Output (to sys.stdout) the indices and names of available passes."""
+
+ print 'PASSES:'
+ for (i, the_pass) in enumerate(self.passes):
+ print '%5d : %s' % (i + 1, the_pass.name,)
+
+
diff --git a/cvs2svn_lib/passes.py b/cvs2svn_lib/passes.py
new file mode 100644
index 0000000..af14692
--- /dev/null
+++ b/cvs2svn_lib/passes.py
@@ -0,0 +1,1837 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines the passes that make up a conversion."""
+
+
+import sys
+import os
+import shutil
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import Timestamper
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.pass_manager import Pass
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.cvs_file_database import CVSFileDatabase
+from cvs2svn_lib.metadata_database import MetadataDatabase
+from cvs2svn_lib.project import read_projects
+from cvs2svn_lib.project import write_projects
+from cvs2svn_lib.symbol import LineOfDevelopment
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Symbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.symbol_database import SymbolDatabase
+from cvs2svn_lib.symbol_database import create_symbol_database
+from cvs2svn_lib.symbol_statistics import SymbolPlanError
+from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
+from cvs2svn_lib.symbol_statistics import SymbolStatistics
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.cvs_item_database import OldCVSItemStore
+from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
+from cvs2svn_lib.cvs_item_database import cvs_item_primer
+from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
+from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
+from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
+from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import SymbolChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import create_symbol_changeset
+from cvs2svn_lib.changeset_graph import ChangesetGraph
+from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
+from cvs2svn_lib.changeset_database import ChangesetDatabase
+from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
+from cvs2svn_lib.svn_commit import SVNRevisionCommit
+from cvs2svn_lib.openings_closings import SymbolingsLogger
+from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
+from cvs2svn_lib.persistence_manager import PersistenceManager
+from cvs2svn_lib.collect_data import CollectData
+from cvs2svn_lib.process import call_command
+from cvs2svn_lib.check_dependencies_pass \
+ import CheckItemStoreDependenciesPass
+from cvs2svn_lib.check_dependencies_pass \
+ import CheckIndexedItemStoreDependenciesPass
+
+
+def sort_file(infilename, outfilename, options=[]):
+ """Sort file INFILENAME, storing the results to OUTFILENAME.
+
+ OPTIONS is an optional list of strings that are passed as additional
+ options to the sort command."""
+
+ # GNU sort will sort our dates differently (incorrectly!) if our
+ # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
+ # it to 'C'
+ lc_all_tmp = os.environ.get('LC_ALL', None)
+ os.environ['LC_ALL'] = 'C'
+
+ # The -T option to sort has a nice side effect. The Win32 sort is
+ # case insensitive and cannot be used, and since it does not
+ # understand the -T option and dies if we try to use it, there is no
+ # risk that we use that sort by accident.
+ command = [
+ Ctx().sort_executable,
+ '-T', Ctx().tmpdir
+ ] + options + [
+ infilename
+ ]
+
+ try:
+ # Under Windows, the subprocess module uses the Win32
+ # CreateProcess, which always looks in the Windows system32
+ # directory before it looks in the directories listed in the PATH
+ # environment variable. Since the Windows sort.exe is in the
+ # system32 directory it will always be chosen. A simple
+ # workaround is to launch the sort in a shell. When the shell
+ # (cmd.exe) searches it only examines the directories in the PATH
+ # so putting the directory with GNU sort ahead of the Windows
+ # system32 directory will cause GNU sort to be chosen.
+ call_command(
+ command, stdout=open(outfilename, 'w'), shell=(sys.platform=='win32')
+ )
+ finally:
+ if lc_all_tmp is None:
+ del os.environ['LC_ALL']
+ else:
+ os.environ['LC_ALL'] = lc_all_tmp
+
+ # On some versions of Windows, os.system() does not return an error
+ # if the command fails. So add little consistency tests here that
+ # the output file was created and has the right size:
+
+ if not os.path.exists(outfilename):
+ raise FatalError('Sort output file missing: %r' % (outfilename,))
+
+ if os.path.getsize(outfilename) != os.path.getsize(infilename):
+ raise FatalError(
+ 'Sort input and output file sizes differ:\n'
+ ' %r (%d bytes)\n'
+ ' %r (%d bytes)' % (
+ infilename, os.path.getsize(infilename),
+ outfilename, os.path.getsize(outfilename),
+ )
+ )
+
+
+class CollectRevsPass(Pass):
+ """This pass was formerly known as pass1."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.PROJECTS)
+ self._register_temp_file(config.SYMBOL_STATISTICS)
+ self._register_temp_file(config.METADATA_INDEX_TABLE)
+ self._register_temp_file(config.METADATA_STORE)
+ self._register_temp_file(config.CVS_FILES_DB)
+ self._register_temp_file(config.CVS_ITEMS_STORE)
+ Ctx().revision_recorder.register_artifacts(self)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Examining all CVS ',v' files...")
+ Ctx()._projects = {}
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW)
+ cd = CollectData(Ctx().revision_recorder, stats_keeper)
+ for project in run_options.projects:
+ cd.process_project(project)
+ run_options.projects = None
+
+ fatal_errors = cd.close()
+
+ if fatal_errors:
+ raise FatalException("Pass 1 complete.\n"
+ + "=" * 75 + "\n"
+ + "Error summary:\n"
+ + "\n".join(fatal_errors) + "\n"
+ + "Exited due to fatal error(s).")
+
+ Ctx()._cvs_file_db.close()
+ write_projects(artifact_manager.get_temp_file(config.PROJECTS))
+ Log().quiet("Done")
+
+
+class CleanMetadataPass(Pass):
+ """Clean up CVS revision metadata and write it to a new database."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
+ self._register_temp_file(config.METADATA_CLEAN_STORE)
+ self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
+ self._register_temp_file_needed(config.METADATA_STORE)
+
+ def _get_clean_author(self, author):
+ """Return AUTHOR, converted appropriately to UTF8.
+
+ Raise a UnicodeException if it cannot be converted using the
+ configured cvs_author_decoder."""
+
+ try:
+ return self._authors[author]
+ except KeyError:
+ pass
+
+ try:
+ clean_author = Ctx().cvs_author_decoder(author)
+ except UnicodeError:
+ self._authors[author] = author
+ raise UnicodeError('Problem decoding author \'%s\'' % (author,))
+
+ try:
+ clean_author = clean_author.encode('utf8')
+ except UnicodeError:
+ self._authors[author] = author
+ raise UnicodeError('Problem encoding author \'%s\'' % (author,))
+
+ self._authors[author] = clean_author
+ return clean_author
+
+ def _get_clean_log_msg(self, log_msg):
+ """Return LOG_MSG, converted appropriately to UTF8.
+
+ Raise a UnicodeException if it cannot be converted using the
+ configured cvs_log_decoder."""
+
+ try:
+ clean_log_msg = Ctx().cvs_log_decoder(log_msg)
+ except UnicodeError:
+ raise UnicodeError(
+ 'Problem decoding log message:\n'
+ '%s\n'
+ '%s\n'
+ '%s'
+ % ('-' * 75, log_msg, '-' * 75,)
+ )
+
+ try:
+ return clean_log_msg.encode('utf8')
+ except UnicodeError:
+ raise UnicodeError(
+ 'Problem encoding log message:\n'
+ '%s\n'
+ '%s\n'
+ '%s'
+ % ('-' * 75, log_msg, '-' * 75,)
+ )
+
+ def _clean_metadata(self, metadata):
+ """Clean up METADATA by overwriting its members as necessary."""
+
+ try:
+ metadata.author = self._get_clean_author(metadata.author)
+ except UnicodeError, e:
+ Log().warn('%s: %s' % (warning_prefix, e,))
+ self.warnings = True
+
+ try:
+ metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
+ except UnicodeError, e:
+ Log().warn('%s: %s' % (warning_prefix, e,))
+ self.warnings = True
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Converting metadata to UTF8...")
+ metadata_db = MetadataDatabase(
+ artifact_manager.get_temp_file(config.METADATA_STORE),
+ artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
+ DB_OPEN_READ,
+ )
+ metadata_clean_db = MetadataDatabase(
+ artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
+ artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
+ DB_OPEN_NEW,
+ )
+
+ self.warnings = False
+
+ # A map {author : clean_author} for those known (to avoid
+ # repeating warnings):
+ self._authors = {}
+
+ for id in metadata_db.iterkeys():
+ metadata = metadata_db[id]
+
+ # Record the original author name because it might be needed for
+ # expanding CVS keywords:
+ metadata.original_author = metadata.author
+
+ self._clean_metadata(metadata)
+
+ metadata_clean_db[id] = metadata
+
+ if self.warnings:
+ raise FatalError(
+ 'There were warnings converting author names and/or log messages\n'
+ 'to Unicode (see messages above). Please restart this pass\n'
+ 'with one or more \'--encoding\' parameters or with\n'
+ '\'--fallback-encoding\'.'
+ )
+
+ metadata_clean_db.close()
+ metadata_db.close()
+ Log().quiet("Done")
+
+
+class CollateSymbolsPass(Pass):
+ """Divide symbols into branches, tags, and excludes."""
+
+ conversion_names = {
+ Trunk : 'trunk',
+ Branch : 'branch',
+ Tag : 'tag',
+ ExcludedSymbol : 'exclude',
+ Symbol : '.',
+ }
+
+ def register_artifacts(self):
+ self._register_temp_file(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_STATISTICS)
+
+ def get_symbol(self, run_options, stats):
+ """Use StrategyRules to decide what to do with a symbol.
+
+ STATS is an instance of symbol_statistics._Stats describing an
+ instance of Symbol or Trunk. To determine how the symbol is to be
+ converted, consult the StrategyRules in the project's
+ symbol_strategy_rules. Each rule is allowed a chance to change
+ the way the symbol will be converted. If the symbol is not a
+ Trunk or TypedSymbol after all rules have run, raise
+ IndeterminateSymbolException."""
+
+ symbol = stats.lod
+ rules = run_options.project_symbol_strategy_rules[symbol.project.id]
+ for rule in rules:
+ symbol = rule.get_symbol(symbol, stats)
+ assert symbol is not None
+
+ stats.check_valid(symbol)
+
+ return symbol
+
+ def log_symbol_summary(self, stats, symbol):
+ if not self.symbol_info_file:
+ return
+
+ if isinstance(symbol, Trunk):
+ name = '.trunk.'
+ preferred_parent_name = '.'
+ else:
+ name = stats.lod.name
+ if symbol.preferred_parent_id is None:
+ preferred_parent_name = '.'
+ else:
+ preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
+ if isinstance(preferred_parent, Trunk):
+ preferred_parent_name = '.trunk.'
+ else:
+ preferred_parent_name = preferred_parent.name
+
+ if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
+ symbol_path = symbol.base_path
+ else:
+ symbol_path = '.'
+
+ self.symbol_info_file.write(
+ '%-5d %-30s %-10s %s %s\n' % (
+ stats.lod.project.id,
+ name,
+ self.conversion_names[symbol.__class__],
+ symbol_path,
+ preferred_parent_name,
+ )
+ )
+ self.symbol_info_file.write(' # %s\n' % (stats,))
+ parent_counts = stats.possible_parents.items()
+ if parent_counts:
+ self.symbol_info_file.write(' # Possible parents:\n')
+ parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
+ for (pp, count) in parent_counts:
+ if isinstance(pp, Trunk):
+ self.symbol_info_file.write(
+ ' # .trunk. : %d\n' % (count,)
+ )
+ else:
+ self.symbol_info_file.write(
+ ' # %s : %d\n' % (pp.name, count,)
+ )
+
+ def get_symbols(self, run_options):
+ """Return a map telling how to convert symbols.
+
+ The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
+ indicating how each symbol should be converted. Trunk objects in
+ SYMBOL_STATS are passed through unchanged. One object is included
+ in the return value for each line of development described in
+ SYMBOL_STATS.
+
+ Raise FatalError if there was an error."""
+
+ errors = []
+ mismatches = []
+
+ if Ctx().symbol_info_filename is not None:
+ self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
+ self.symbol_info_file.write(
+ '# Columns: project_id symbol_name conversion symbol_path '
+ 'preferred_parent_name\n'
+ )
+ else:
+ self.symbol_info_file = None
+
+ # Initialize each symbol strategy rule a single time, even if it
+ # is used in more than one project. First define a map from
+ # object id to symbol strategy rule:
+ rules = {}
+ for rule_list in run_options.project_symbol_strategy_rules:
+ for rule in rule_list:
+ rules[id(rule)] = rule
+
+ for rule in rules.itervalues():
+ rule.start(self.symbol_stats)
+
+ retval = {}
+
+ for stats in self.symbol_stats:
+ try:
+ symbol = self.get_symbol(run_options, stats)
+ except IndeterminateSymbolException, e:
+ self.log_symbol_summary(stats, stats.lod)
+ mismatches.append(e.stats)
+ except SymbolPlanError, e:
+ self.log_symbol_summary(stats, stats.lod)
+ errors.append(e)
+ else:
+ self.log_symbol_summary(stats, symbol)
+ retval[stats.lod] = symbol
+
+ for rule in rules.itervalues():
+ rule.finish()
+
+ if self.symbol_info_file:
+ self.symbol_info_file.close()
+
+ del self.symbol_info_file
+
+ if errors or mismatches:
+ s = ['Problems determining how symbols should be converted:\n']
+ for e in errors:
+ s.append('%s\n' % (e,))
+ if mismatches:
+ s.append(
+ 'It is not clear how the following symbols '
+ 'should be converted.\n'
+ 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
+ 'and/or\n'
+ '--symbol-default to resolve the ambiguity.\n'
+ )
+ for stats in mismatches:
+ s.append(' %s\n' % (stats,))
+ raise FatalError(''.join(s))
+ else:
+ return retval
+
+ def run(self, run_options, stats_keeper):
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ self.symbol_stats = SymbolStatistics(
+ artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
+ )
+
+ symbol_map = self.get_symbols(run_options)
+
+ # Check the symbols for consistency and bail out if there were errors:
+ self.symbol_stats.check_consistency(symbol_map)
+
+ # Check that the symbols all have SVN paths set and that the paths
+ # are disjoint:
+ Ctx().output_option.check_symbols(symbol_map)
+
+ for symbol in symbol_map.itervalues():
+ if isinstance(symbol, ExcludedSymbol):
+ self.symbol_stats.exclude_symbol(symbol)
+
+ create_symbol_database(symbol_map.values())
+
+ del self.symbol_stats
+
+ Log().quiet("Done")
+
+
+class FilterSymbolsPass(Pass):
+ """Delete any branches/tags that are to be excluded.
+
+ Also delete revisions on excluded branches, and delete other
+ references to the excluded symbols."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.SUMMARY_SERIALIZER)
+ self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE)
+ self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_STORE)
+ Ctx().revision_excluder.register_artifacts(self)
+
+ def run(self, run_options, stats_keeper):
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ cvs_item_store = OldCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
+
+ cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
+ f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb')
+ cPickle.dump(cvs_item_serializer, f, -1)
+ f.close()
+
+ rev_db = NewSortableCVSRevisionDatabase(
+ artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
+ cvs_item_serializer,
+ )
+
+ symbol_db = NewSortableCVSSymbolDatabase(
+ artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
+ cvs_item_serializer,
+ )
+
+ revision_excluder = Ctx().revision_excluder
+
+ Log().quiet("Filtering out excluded symbols and summarizing items...")
+
+ stats_keeper.reset_cvs_rev_info()
+ revision_excluder.start()
+
+ # Process the cvs items store one file at a time:
+ for cvs_file_items in cvs_item_store.iter_cvs_file_items():
+ Log().verbose(cvs_file_items.cvs_file.filename)
+ cvs_file_items.filter_excluded_symbols(revision_excluder)
+ cvs_file_items.mutate_symbols()
+ cvs_file_items.adjust_parents()
+ cvs_file_items.refine_symbols()
+ cvs_file_items.record_opened_symbols()
+ cvs_file_items.record_closed_symbols()
+ cvs_file_items.check_link_consistency()
+
+ # Store whatever is left to the new file and update statistics:
+ stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
+ for cvs_item in cvs_file_items.values():
+ stats_keeper.record_cvs_item(cvs_item)
+
+ if isinstance(cvs_item, CVSRevision):
+ rev_db.add(cvs_item)
+ elif isinstance(cvs_item, CVSSymbol):
+ symbol_db.add(cvs_item)
+
+ stats_keeper.set_stats_reflect_exclude(True)
+
+ rev_db.close()
+ symbol_db.close()
+ revision_excluder.finish()
+ cvs_item_store.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class SortRevisionSummaryPass(Pass):
+ """Sort the revision summary file."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
+ self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Sorting CVS revision summaries...")
+ sort_file(
+ artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
+ artifact_manager.get_temp_file(
+ config.CVS_REVS_SUMMARY_SORTED_DATAFILE))
+ Log().quiet("Done")
+
+
+class SortSymbolSummaryPass(Pass):
+ """Sort the symbol summary file."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
+ self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Sorting CVS symbol summaries...")
+ sort_file(
+ artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
+ artifact_manager.get_temp_file(
+ config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE))
+ Log().quiet("Done")
+
+
+class InitializeChangesetsPass(Pass):
+ """Create preliminary CommitSets."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
+ self._register_temp_file(config.CHANGESETS_STORE)
+ self._register_temp_file(config.CHANGESETS_INDEX)
+ self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.SUMMARY_SERIALIZER)
+ self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
+ self._register_temp_file_needed(
+ config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
+
+ def get_revision_changesets(self):
+ """Generate revision changesets, one at a time.
+
+ Each time, yield a list of CVSRevisions that might potentially
+ consititute a changeset."""
+
+ # Create changesets for CVSRevisions:
+ old_metadata_id = None
+ old_timestamp = None
+ changeset_items = []
+
+ db = OldSortableCVSRevisionDatabase(
+ artifact_manager.get_temp_file(
+ config.CVS_REVS_SUMMARY_SORTED_DATAFILE
+ ),
+ self.cvs_item_serializer,
+ )
+
+ for cvs_rev in db:
+ if cvs_rev.metadata_id != old_metadata_id \
+ or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
+ # Start a new changeset. First finish up the old changeset,
+ # if any:
+ if changeset_items:
+ yield changeset_items
+ changeset_items = []
+ old_metadata_id = cvs_rev.metadata_id
+ changeset_items.append(cvs_rev)
+ old_timestamp = cvs_rev.timestamp
+
+ # Finish up the last changeset, if any:
+ if changeset_items:
+ yield changeset_items
+
+ def get_symbol_changesets(self):
+ """Generate symbol changesets, one at a time.
+
+ Each time, yield a list of CVSSymbols that might potentially
+ consititute a changeset."""
+
+ old_symbol_id = None
+ changeset_items = []
+
+ db = OldSortableCVSSymbolDatabase(
+ artifact_manager.get_temp_file(
+ config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
+ ),
+ self.cvs_item_serializer,
+ )
+
+ for cvs_symbol in db:
+ if cvs_symbol.symbol.id != old_symbol_id:
+ # Start a new changeset. First finish up the old changeset,
+ # if any:
+ if changeset_items:
+ yield changeset_items
+ changeset_items = []
+ old_symbol_id = cvs_symbol.symbol.id
+ changeset_items.append(cvs_symbol)
+
+ # Finish up the last changeset, if any:
+ if changeset_items:
+ yield changeset_items
+
+ @staticmethod
+ def compare_items(a, b):
+ return (
+ cmp(a.timestamp, b.timestamp)
+ or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
+ or cmp([int(x) for x in a.rev.split('.')],
+ [int(x) for x in b.rev.split('.')])
+ or cmp(a.id, b.id))
+
+ def break_internal_dependencies(self, changeset_items):
+ """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
+
+ CHANGESET_ITEMS is a list of CVSRevisions that could possibly
+ belong in a single RevisionChangeset, but there might be internal
+ dependencies among the items. Return a list of lists, where each
+ sublist is a list of CVSRevisions and at least one internal
+ dependency has been eliminated. Iff CHANGESET_ITEMS does not have
+ to be split, then the return value will contain a single value,
+ namely the original value of CHANGESET_ITEMS. Split
+ CHANGESET_ITEMS at most once, even though the resulting changesets
+ might themselves have internal dependencies."""
+
+ # We only look for succ dependencies, since by doing so we
+ # automatically cover pred dependencies as well. First create a
+ # list of tuples (pred, succ) of id pairs for CVSItems that depend
+ # on each other.
+ dependencies = []
+ changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
+ for cvs_item in changeset_items:
+ for next_id in cvs_item.get_succ_ids():
+ if next_id in changeset_cvs_item_ids:
+ # Sanity check: a CVSItem should never depend on itself:
+ if next_id == cvs_item.id:
+ raise InternalError('Item depends on itself: %s' % (cvs_item,))
+
+ dependencies.append((cvs_item.id, next_id,))
+
+ if dependencies:
+ # Sort the changeset_items in a defined order (chronological to the
+ # extent that the timestamps are correct and unique).
+ changeset_items.sort(self.compare_items)
+ indexes = {}
+ for (i, changeset_item) in enumerate(changeset_items):
+ indexes[changeset_item.id] = i
+ # How many internal dependencies would be broken by breaking the
+ # Changeset after a particular index?
+ breaks = [0] * len(changeset_items)
+ for (pred, succ,) in dependencies:
+ pred_index = indexes[pred]
+ succ_index = indexes[succ]
+ breaks[min(pred_index, succ_index)] += 1
+ breaks[max(pred_index, succ_index)] -= 1
+ best_i = None
+ best_count = -1
+ best_time = 0
+ for i in range(1, len(breaks)):
+ breaks[i] += breaks[i - 1]
+ for i in range(0, len(breaks) - 1):
+ if breaks[i] > best_count:
+ best_i = i
+ best_count = breaks[i]
+ best_time = (changeset_items[i + 1].timestamp
+ - changeset_items[i].timestamp)
+ elif breaks[i] == best_count \
+ and (changeset_items[i + 1].timestamp
+ - changeset_items[i].timestamp) < best_time:
+ best_i = i
+ best_count = breaks[i]
+ best_time = (changeset_items[i + 1].timestamp
+ - changeset_items[i].timestamp)
+ # Reuse the old changeset.id for the first of the split changesets.
+ return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
+ else:
+ return [changeset_items]
+
+ def break_all_internal_dependencies(self, changeset_items):
+ """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
+
+ CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
+ be part of a single changeset. Break this list into sublists,
+ where the CVSRevisions in each sublist are free of mutual
+ dependencies."""
+
+ # This method is written non-recursively to avoid any possible
+ # problems with recursion depth.
+
+ changesets_to_split = [changeset_items]
+ while changesets_to_split:
+ changesets = self.break_internal_dependencies(changesets_to_split.pop())
+ if len(changesets) == 1:
+ [changeset_items] = changesets
+ yield changeset_items
+ else:
+ # The changeset had to be split; see if either of the
+ # fragments have to be split:
+ changesets.reverse()
+ changesets_to_split.extend(changesets)
+
+ def get_changesets(self):
+ """Generate (Changeset, [CVSItem,...]) for all changesets.
+
+ The Changesets already have their internal dependencies broken.
+ The [CVSItem,...] list is the list of CVSItems in the
+ corresponding Changeset."""
+
+ for changeset_items in self.get_revision_changesets():
+ for split_changeset_items \
+ in self.break_all_internal_dependencies(changeset_items):
+ yield (
+ RevisionChangeset(
+ self.changeset_key_generator.gen_id(),
+ [cvs_rev.id for cvs_rev in split_changeset_items]
+ ),
+ split_changeset_items,
+ )
+
+ for changeset_items in self.get_symbol_changesets():
+ yield (
+ create_symbol_changeset(
+ self.changeset_key_generator.gen_id(),
+ changeset_items[0].symbol,
+ [cvs_symbol.id for cvs_symbol in changeset_items]
+ ),
+ changeset_items,
+ )
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Creating preliminary commit sets...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+
+ f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb')
+ self.cvs_item_serializer = cPickle.load(f)
+ f.close()
+
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
+ DB_OPEN_NEW,
+ )
+ cvs_item_to_changeset_id = CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
+ DB_OPEN_NEW,
+ )
+
+ self.sorted_cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_NEW)
+
+ self.changeset_key_generator = KeyGenerator()
+
+ for (changeset, changeset_items) in self.get_changesets():
+ if Log().is_on(Log.DEBUG):
+ Log().debug(repr(changeset))
+ changeset_db.store(changeset)
+ for cvs_item in changeset_items:
+ self.sorted_cvs_items_db.add(cvs_item)
+ cvs_item_to_changeset_id[cvs_item.id] = changeset.id
+
+ self.sorted_cvs_items_db.close()
+ cvs_item_to_changeset_id.close()
+ changeset_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ del self.cvs_item_serializer
+
+ Log().quiet("Done")
+
+
+class ProcessedChangesetLogger:
+ def __init__(self):
+ self.processed_changeset_ids = []
+
+ def log(self, changeset_id):
+ if Log().is_on(Log.DEBUG):
+ self.processed_changeset_ids.append(changeset_id)
+
+ def flush(self):
+ if self.processed_changeset_ids:
+ Log().debug(
+ 'Consumed changeset ids %s'
+ % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
+
+ del self.processed_changeset_ids[:]
+
+
+class BreakRevisionChangesetCyclesPass(Pass):
+ """Break up any dependency cycles involving only RevisionChangesets."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
+ self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
+ self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.CHANGESETS_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_INDEX)
+ self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
+
+ def get_source_changesets(self):
+ old_changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
+ DB_OPEN_READ)
+
+ changeset_ids = old_changeset_db.keys()
+
+ for changeset_id in changeset_ids:
+ yield old_changeset_db[changeset_id]
+
+ old_changeset_db.close()
+ del old_changeset_db
+
+ def break_cycle(self, cycle):
+ """Break up one or more changesets in CYCLE to help break the cycle.
+
+ CYCLE is a list of Changesets where
+
+ cycle[i] depends on cycle[i - 1]
+
+ Break up one or more changesets in CYCLE to make progress towards
+ breaking the cycle. Update self.changeset_graph accordingly.
+
+ It is not guaranteed that the cycle will be broken by one call to
+ this routine, but at least some progress must be made."""
+
+ self.processed_changeset_logger.flush()
+ best_i = None
+ best_link = None
+ for i in range(len(cycle)):
+ # It's OK if this index wraps to -1:
+ link = ChangesetGraphLink(
+ cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
+
+ if best_i is None or link < best_link:
+ best_i = i
+ best_link = link
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'Breaking cycle %s by breaking node %x' % (
+ ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
+ best_link.changeset.id,))
+
+ new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+ self.changeset_graph.delete_changeset(best_link.changeset)
+
+ for changeset in new_changesets:
+ self.changeset_graph.add_new_changeset(changeset)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Breaking revision changeset dependency cycles...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ shutil.copyfile(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET),
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
+ cvs_item_to_changeset_id = CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
+ DB_OPEN_WRITE)
+
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
+ DB_OPEN_NEW)
+
+ self.changeset_graph = ChangesetGraph(
+ changeset_db, cvs_item_to_changeset_id
+ )
+
+ max_changeset_id = 0
+ for changeset in self.get_source_changesets():
+ changeset_db.store(changeset)
+ if isinstance(changeset, RevisionChangeset):
+ self.changeset_graph.add_changeset(changeset)
+ max_changeset_id = max(max_changeset_id, changeset.id)
+
+ self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+ self.processed_changeset_logger = ProcessedChangesetLogger()
+
+ # Consume the graph, breaking cycles using self.break_cycle():
+ for (changeset, time_range) in self.changeset_graph.consume_graph(
+ cycle_breaker=self.break_cycle
+ ):
+ self.processed_changeset_logger.log(changeset.id)
+
+ self.processed_changeset_logger.flush()
+ del self.processed_changeset_logger
+
+ self.changeset_graph.close()
+ self.changeset_graph = None
+ Ctx()._cvs_items_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class RevisionTopologicalSortPass(Pass):
+ """Sort RevisionChangesets into commit order.
+
+ Also convert them to OrderedChangesets, without changing their ids."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
+ self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
+ self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+
+ def get_source_changesets(self, changeset_db):
+ changeset_ids = changeset_db.keys()
+
+ for changeset_id in changeset_ids:
+ yield changeset_db[changeset_id]
+
+ def get_changesets(self):
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
+ DB_OPEN_READ,
+ )
+
+ changeset_graph = ChangesetGraph(
+ changeset_db,
+ CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_REVBROKEN
+ ),
+ DB_OPEN_READ,
+ )
+ )
+
+ for changeset in self.get_source_changesets(changeset_db):
+ if isinstance(changeset, RevisionChangeset):
+ changeset_graph.add_changeset(changeset)
+ else:
+ yield changeset
+
+ changeset_ids = []
+
+ # Sentry:
+ changeset_ids.append(None)
+
+ for (changeset, time_range) in changeset_graph.consume_graph():
+ changeset_ids.append(changeset.id)
+
+ # Sentry:
+ changeset_ids.append(None)
+
+ for i in range(1, len(changeset_ids) - 1):
+ changeset = changeset_db[changeset_ids[i]]
+ yield OrderedChangeset(
+ changeset.id, changeset.cvs_item_ids, i - 1,
+ changeset_ids[i - 1], changeset_ids[i + 1])
+
+ changeset_graph.close()
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Generating CVSRevisions in commit order...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ changesets_revordered_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
+ DB_OPEN_NEW)
+
+ for changeset in self.get_changesets():
+ changesets_revordered_db.store(changeset)
+
+ changesets_revordered_db.close()
+ Ctx()._cvs_items_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class BreakSymbolChangesetCyclesPass(Pass):
+ """Break up any dependency cycles involving only SymbolChangesets."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
+ self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
+ self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
+ self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+
+ def get_source_changesets(self):
+ old_changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
+ DB_OPEN_READ)
+
+ changeset_ids = old_changeset_db.keys()
+
+ for changeset_id in changeset_ids:
+ yield old_changeset_db[changeset_id]
+
+ old_changeset_db.close()
+
+ def break_cycle(self, cycle):
+ """Break up one or more changesets in CYCLE to help break the cycle.
+
+ CYCLE is a list of Changesets where
+
+ cycle[i] depends on cycle[i - 1]
+
+ Break up one or more changesets in CYCLE to make progress towards
+ breaking the cycle. Update self.changeset_graph accordingly.
+
+ It is not guaranteed that the cycle will be broken by one call to
+ this routine, but at least some progress must be made."""
+
+ self.processed_changeset_logger.flush()
+ best_i = None
+ best_link = None
+ for i in range(len(cycle)):
+ # It's OK if this index wraps to -1:
+ link = ChangesetGraphLink(
+ cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
+
+ if best_i is None or link < best_link:
+ best_i = i
+ best_link = link
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'Breaking cycle %s by breaking node %x' % (
+ ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
+ best_link.changeset.id,))
+
+ new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+ self.changeset_graph.delete_changeset(best_link.changeset)
+
+ for changeset in new_changesets:
+ self.changeset_graph.add_new_changeset(changeset)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Breaking symbol changeset dependency cycles...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ shutil.copyfile(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
+ cvs_item_to_changeset_id = CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
+ DB_OPEN_WRITE)
+
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
+ DB_OPEN_NEW)
+
+ self.changeset_graph = ChangesetGraph(
+ changeset_db, cvs_item_to_changeset_id
+ )
+
+ max_changeset_id = 0
+ for changeset in self.get_source_changesets():
+ changeset_db.store(changeset)
+ if isinstance(changeset, SymbolChangeset):
+ self.changeset_graph.add_changeset(changeset)
+ max_changeset_id = max(max_changeset_id, changeset.id)
+
+ self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+ self.processed_changeset_logger = ProcessedChangesetLogger()
+
+ # Consume the graph, breaking cycles using self.break_cycle():
+ for (changeset, time_range) in self.changeset_graph.consume_graph(
+ cycle_breaker=self.break_cycle
+ ):
+ self.processed_changeset_logger.log(changeset.id)
+
+ self.processed_changeset_logger.flush()
+ del self.processed_changeset_logger
+
+ self.changeset_graph.close()
+ self.changeset_graph = None
+ Ctx()._cvs_items_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class BreakAllChangesetCyclesPass(Pass):
+ """Break up any dependency cycles that are closed by SymbolChangesets."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
+ self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
+ self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
+ self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
+
+ def get_source_changesets(self):
+ old_changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
+ DB_OPEN_READ)
+
+ changeset_ids = old_changeset_db.keys()
+
+ for changeset_id in changeset_ids:
+ yield old_changeset_db[changeset_id]
+
+ old_changeset_db.close()
+
+ def _split_retrograde_changeset(self, changeset):
+ """CHANGESET is retrograde. Split it into non-retrograde changesets."""
+
+ Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
+
+ self.changeset_graph.delete_changeset(changeset)
+
+ # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
+ ordinal_limits = {}
+ for cvs_branch in changeset.iter_cvs_items():
+ max_pred_ordinal = 0
+ min_succ_ordinal = sys.maxint
+
+ for pred_id in cvs_branch.get_pred_ids():
+ pred_ordinal = self.ordinals.get(
+ self.cvs_item_to_changeset_id[pred_id], 0)
+ max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
+
+ for succ_id in cvs_branch.get_succ_ids():
+ succ_ordinal = self.ordinals.get(
+ self.cvs_item_to_changeset_id[succ_id], sys.maxint)
+ min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
+
+ assert max_pred_ordinal < min_succ_ordinal
+ ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
+
+ # Find the earliest successor ordinal:
+ min_min_succ_ordinal = sys.maxint
+ for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
+ min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
+
+ early_item_ids = []
+ late_item_ids = []
+ for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
+ if max_pred_ordinal >= min_min_succ_ordinal:
+ late_item_ids.append(id)
+ else:
+ early_item_ids.append(id)
+
+ assert early_item_ids
+ assert late_item_ids
+
+ early_changeset = changeset.create_split_changeset(
+ self.changeset_key_generator.gen_id(), early_item_ids)
+ late_changeset = changeset.create_split_changeset(
+ self.changeset_key_generator.gen_id(), late_item_ids)
+
+ self.changeset_graph.add_new_changeset(early_changeset)
+ self.changeset_graph.add_new_changeset(late_changeset)
+
+ early_split = self._split_if_retrograde(early_changeset.id)
+
+ # Because of the way we constructed it, the early changeset should
+ # not have to be split:
+ assert not early_split
+
+ self._split_if_retrograde(late_changeset.id)
+
+ def _split_if_retrograde(self, changeset_id):
+ node = self.changeset_graph[changeset_id]
+ pred_ordinals = [
+ self.ordinals[id]
+ for id in node.pred_ids
+ if id in self.ordinals
+ ]
+ pred_ordinals.sort()
+ succ_ordinals = [
+ self.ordinals[id]
+ for id in node.succ_ids
+ if id in self.ordinals
+ ]
+ succ_ordinals.sort()
+ if pred_ordinals and succ_ordinals \
+ and pred_ordinals[-1] >= succ_ordinals[0]:
+ self._split_retrograde_changeset(self.changeset_db[node.id])
+ return True
+ else:
+ return False
+
+ def break_segment(self, segment):
+ """Break a changeset in SEGMENT[1:-1].
+
+ The range SEGMENT[1:-1] is not empty, and all of the changesets in
+ that range are SymbolChangesets."""
+
+ best_i = None
+ best_link = None
+ for i in range(1, len(segment) - 1):
+ link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
+
+ if best_i is None or link < best_link:
+ best_i = i
+ best_link = link
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'Breaking segment %s by breaking node %x' % (
+ ' -> '.join(['%x' % node.id for node in segment]),
+ best_link.changeset.id,))
+
+ new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+ self.changeset_graph.delete_changeset(best_link.changeset)
+
+ for changeset in new_changesets:
+ self.changeset_graph.add_new_changeset(changeset)
+
+ def break_cycle(self, cycle):
+ """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
+
+ CYCLE is a list of SymbolChangesets where
+
+ cycle[i] depends on cycle[i - 1]
+
+ . Break up one or more changesets in CYCLE to make progress
+ towards breaking the cycle. Update self.changeset_graph
+ accordingly.
+
+ It is not guaranteed that the cycle will be broken by one call to
+ this routine, but at least some progress must be made."""
+
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'Breaking cycle %s' % (
+ ' -> '.join(['%x' % changeset.id
+ for changeset in cycle + [cycle[0]]]),))
+
+ # Unwrap the cycle into a segment then break the segment:
+ self.break_segment([cycle[-1]] + cycle + [cycle[0]])
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Breaking CVSSymbol dependency loops...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ shutil.copyfile(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
+ self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
+ DB_OPEN_WRITE)
+
+ self.changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+ DB_OPEN_NEW)
+
+ self.changeset_graph = ChangesetGraph(
+ self.changeset_db, self.cvs_item_to_changeset_id
+ )
+
+ # A map {changeset_id : ordinal} for OrderedChangesets:
+ self.ordinals = {}
+ # A map {ordinal : changeset_id}:
+ ordered_changeset_map = {}
+ # A list of all BranchChangeset ids:
+ branch_changeset_ids = []
+ max_changeset_id = 0
+ for changeset in self.get_source_changesets():
+ self.changeset_db.store(changeset)
+ self.changeset_graph.add_changeset(changeset)
+ if isinstance(changeset, OrderedChangeset):
+ ordered_changeset_map[changeset.ordinal] = changeset.id
+ self.ordinals[changeset.id] = changeset.ordinal
+ elif isinstance(changeset, BranchChangeset):
+ branch_changeset_ids.append(changeset.id)
+ max_changeset_id = max(max_changeset_id, changeset.id)
+
+ # An array of ordered_changeset ids, indexed by ordinal:
+ ordered_changesets = []
+ for ordinal in range(len(ordered_changeset_map)):
+ id = ordered_changeset_map[ordinal]
+ ordered_changesets.append(id)
+
+ ordered_changeset_ids = set(ordered_changeset_map.values())
+ del ordered_changeset_map
+
+ self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+ # First we scan through all BranchChangesets looking for
+ # changesets that are individually "retrograde" and splitting
+ # those up:
+ for changeset_id in branch_changeset_ids:
+ self._split_if_retrograde(changeset_id)
+
+ del self.ordinals
+
+ next_ordered_changeset = 0
+
+ self.processed_changeset_logger = ProcessedChangesetLogger()
+
+ while self.changeset_graph:
+ # Consume any nodes that don't have predecessors:
+ for (changeset, time_range) \
+ in self.changeset_graph.consume_nopred_nodes():
+ self.processed_changeset_logger.log(changeset.id)
+ if changeset.id in ordered_changeset_ids:
+ next_ordered_changeset += 1
+ ordered_changeset_ids.remove(changeset.id)
+
+ self.processed_changeset_logger.flush()
+
+ if not self.changeset_graph:
+ break
+
+ # Now work on the next ordered changeset that has not yet been
+ # processed. BreakSymbolChangesetCyclesPass has broken any
+ # cycles involving only SymbolChangesets, so the presence of a
+ # cycle implies that there is at least one ordered changeset
+ # left in the graph:
+ assert next_ordered_changeset < len(ordered_changesets)
+
+ id = ordered_changesets[next_ordered_changeset]
+ path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
+ if path:
+ if Log().is_on(Log.DEBUG):
+ Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
+ self.break_segment(path)
+ else:
+ # There were no ordered changesets among the reachable
+ # predecessors, so do generic cycle-breaking:
+ if Log().is_on(Log.DEBUG):
+ Log().debug(
+ 'Breaking generic cycle found from %s'
+ % (self.changeset_db[id],)
+ )
+ self.break_cycle(self.changeset_graph.find_cycle(id))
+
+ del self.processed_changeset_logger
+ self.changeset_graph.close()
+ self.changeset_graph = None
+ self.cvs_item_to_changeset_id = None
+ self.changeset_db = None
+
+ Log().quiet("Done")
+
+
+class TopologicalSortPass(Pass):
+ """Sort changesets into commit order."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
+ self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
+
+ def get_source_changesets(self, changeset_db):
+ for changeset_id in changeset_db.keys():
+ yield changeset_db[changeset_id]
+
+ def get_changesets(self):
+ """Generate (changeset, timestamp) pairs in commit order."""
+
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+ DB_OPEN_READ)
+
+ changeset_graph = ChangesetGraph(
+ changeset_db,
+ CVSItemToChangesetTable(
+ artifact_manager.get_temp_file(
+ config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
+ ),
+ DB_OPEN_READ,
+ ),
+ )
+ symbol_changeset_ids = set()
+
+ for changeset in self.get_source_changesets(changeset_db):
+ changeset_graph.add_changeset(changeset)
+ if isinstance(changeset, SymbolChangeset):
+ symbol_changeset_ids.add(changeset.id)
+
+ # Ensure a monotonically-increasing timestamp series by keeping
+ # track of the previous timestamp and ensuring that the following
+ # one is larger.
+ timestamper = Timestamper()
+
+ for (changeset, time_range) in changeset_graph.consume_graph():
+ timestamp = timestamper.get(
+ time_range.t_max, changeset.id in symbol_changeset_ids
+ )
+ yield (changeset, timestamp)
+
+ changeset_graph.close()
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Generating CVSRevisions in commit order...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ sorted_changesets = open(
+ artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
+ 'w')
+
+ for (changeset, timestamp) in self.get_changesets():
+ sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
+
+ sorted_changesets.close()
+
+ Ctx()._cvs_items_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class CreateRevsPass(Pass):
+ """Generate the SVNCommit <-> CVSRevision mapping databases.
+
+ SVNCommitCreator also calls SymbolingsLogger to register
+ CVSRevisions that represent an opening or closing for a path on a
+ branch or tag. See SymbolingsLogger for more details.
+
+ This pass was formerly known as pass5."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
+ self._register_temp_file(config.SVN_COMMITS_STORE)
+ self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
+ self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
+ self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
+ self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
+
+ def get_changesets(self):
+ """Generate (changeset,timestamp,) tuples in commit order."""
+
+ changeset_db = ChangesetDatabase(
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+ artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+ DB_OPEN_READ)
+
+ for line in file(
+ artifact_manager.get_temp_file(
+ config.CHANGESETS_SORTED_DATAFILE)):
+ [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
+ yield (changeset_db[changeset_id], timestamp)
+
+ changeset_db.close()
+
+ def get_svn_commits(self, creator):
+ """Generate the SVNCommits, in order."""
+
+ for (changeset, timestamp) in self.get_changesets():
+ for svn_commit in creator.process_changeset(changeset, timestamp):
+ yield svn_commit
+
+ def log_svn_commit(self, svn_commit):
+ """Output information about SVN_COMMIT."""
+
+ Log().normal(
+ 'Creating Subversion r%d (%s)'
+ % (svn_commit.revnum, svn_commit.get_description(),)
+ )
+
+ if isinstance(svn_commit, SVNRevisionCommit):
+ for cvs_rev in svn_commit.cvs_revs:
+ Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Mapping CVS revisions to Subversion commits...")
+
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+
+ Ctx()._symbolings_logger = SymbolingsLogger()
+
+ persistence_manager = PersistenceManager(DB_OPEN_NEW)
+
+ creator = SVNCommitCreator()
+ for svn_commit in self.get_svn_commits(creator):
+ self.log_svn_commit(svn_commit)
+ persistence_manager.put_svn_commit(svn_commit)
+
+ stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
+ del creator
+
+ persistence_manager.close()
+ Ctx()._symbolings_logger.close()
+ Ctx()._cvs_items_db.close()
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_file_db.close()
+
+ Log().quiet("Done")
+
+
+class SortSymbolsPass(Pass):
+ """This pass was formerly known as pass6."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
+ self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Sorting symbolic name source revisions...")
+
+ sort_file(
+ artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
+ artifact_manager.get_temp_file(
+ config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+ options=['-k', '1,1', '-k', '2,2n', '-k', '3'],
+ )
+ Log().quiet("Done")
+
+
+class IndexSymbolsPass(Pass):
+ """This pass was formerly known as pass7."""
+
+ def register_artifacts(self):
+ self._register_temp_file(config.SYMBOL_OFFSETS_DB)
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
+
+ def generate_offsets_for_symbolings(self):
+ """This function iterates through all the lines in
+ SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
+ SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
+ where SYMBOLIC_NAME is first encountered. This will allow us to
+ seek to the various offsets in the file and sequentially read only
+ the openings and closings that we need."""
+
+ offsets = {}
+
+ f = open(
+ artifact_manager.get_temp_file(
+ config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+ 'r')
+ old_id = None
+ while True:
+ fpos = f.tell()
+ line = f.readline()
+ if not line:
+ break
+ id, svn_revnum, ignored = line.split(" ", 2)
+ id = int(id, 16)
+ if id != old_id:
+ Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
+ old_id = id
+ offsets[id] = fpos
+
+ f.close()
+
+ offsets_db = file(
+ artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
+ cPickle.dump(offsets, offsets_db, -1)
+ offsets_db.close()
+
+ def run(self, run_options, stats_keeper):
+ Log().quiet("Determining offsets for all symbolic names...")
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._symbol_db = SymbolDatabase()
+ self.generate_offsets_for_symbolings()
+ Ctx()._symbol_db.close()
+ Log().quiet("Done.")
+
+
+class OutputPass(Pass):
+ """This pass was formerly known as pass8."""
+
+ def register_artifacts(self):
+ self._register_temp_file_needed(config.PROJECTS)
+ self._register_temp_file_needed(config.CVS_FILES_DB)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+ self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+ self._register_temp_file_needed(config.SYMBOL_DB)
+ self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
+ self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
+ self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
+ self._register_temp_file_needed(config.SVN_COMMITS_STORE)
+ self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
+ Ctx().output_option.register_artifacts(self)
+
+ def get_svn_commits(self):
+ """Generate the SVNCommits in commit order."""
+
+ persistence_manager = PersistenceManager(DB_OPEN_READ)
+
+ svn_revnum = 1 # The first non-trivial commit
+
+ # Peek at the first revision to find the date to use to initialize
+ # the repository:
+ svn_commit = persistence_manager.get_svn_commit(svn_revnum)
+
+ while svn_commit:
+ yield svn_commit
+ svn_revnum += 1
+ svn_commit = persistence_manager.get_svn_commit(svn_revnum)
+
+ persistence_manager.close()
+
+ def run(self, run_options, stats_keeper):
+ Ctx()._projects = read_projects(
+ artifact_manager.get_temp_file(config.PROJECTS)
+ )
+ Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+ Ctx()._metadata_db = MetadataDatabase(
+ artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
+ artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
+ DB_OPEN_READ,
+ )
+ Ctx()._cvs_items_db = IndexedCVSItemStore(
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+ artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ DB_OPEN_READ)
+ Ctx()._symbol_db = SymbolDatabase()
+
+ Ctx().output_option.setup(stats_keeper.svn_rev_count())
+
+ for svn_commit in self.get_svn_commits():
+ svn_commit.output(Ctx().output_option)
+
+ Ctx().output_option.cleanup()
+
+ Ctx()._symbol_db.close()
+ Ctx()._cvs_items_db.close()
+ Ctx()._metadata_db.close()
+ Ctx()._cvs_file_db.close()
+
+
+# The list of passes constituting a run of cvs2svn:
+passes = [
+ CollectRevsPass(),
+ CleanMetadataPass(),
+ CollateSymbolsPass(),
+ #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
+ FilterSymbolsPass(),
+ SortRevisionSummaryPass(),
+ SortSymbolSummaryPass(),
+ InitializeChangesetsPass(),
+ #CheckIndexedItemStoreDependenciesPass(
+ # config.CVS_ITEMS_SORTED_STORE,
+ # config.CVS_ITEMS_SORTED_INDEX_TABLE),
+ BreakRevisionChangesetCyclesPass(),
+ RevisionTopologicalSortPass(),
+ BreakSymbolChangesetCyclesPass(),
+ BreakAllChangesetCyclesPass(),
+ TopologicalSortPass(),
+ CreateRevsPass(),
+ SortSymbolsPass(),
+ IndexSymbolsPass(),
+ OutputPass(),
+ ]
+
+
diff --git a/cvs2svn_lib/persistence_manager.py b/cvs2svn_lib/persistence_manager.py
new file mode 100644
index 0000000..8a622ab
--- /dev/null
+++ b/cvs2svn_lib/persistence_manager.py
@@ -0,0 +1,106 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class PersistenceManager."""
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.record_table import SignedIntegerPacker
+from cvs2svn_lib.record_table import RecordTable
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.svn_commit import SVNRevisionCommit
+from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
+from cvs2svn_lib.svn_commit import SVNPrimaryCommit
+from cvs2svn_lib.svn_commit import SVNBranchCommit
+from cvs2svn_lib.svn_commit import SVNTagCommit
+from cvs2svn_lib.svn_commit import SVNPostCommit
+
+
+class PersistenceManager:
+ """The PersistenceManager allows us to effectively store SVNCommits
+ to disk and retrieve them later using only their subversion revision
+ number as the key. It also returns the subversion revision number
+ for a given CVSRevision's unique key.
+
+ All information pertinent to each SVNCommit is stored in a series of
+ on-disk databases so that SVNCommits can be retrieved on-demand.
+
+ MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
+ In 'new' mode, PersistenceManager will initialize a new set of on-disk
+ databases and be fully-featured.
+ In 'read' mode, PersistenceManager will open existing on-disk databases
+ and the set_* methods will be unavailable."""
+
+ def __init__(self, mode):
+ self.mode = mode
+ if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
+ raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
+ primer = (
+ SVNInitialProjectCommit,
+ SVNPrimaryCommit,
+ SVNPostCommit,
+ SVNBranchCommit,
+ SVNTagCommit,
+ )
+ serializer = PrimedPickleSerializer(primer)
+ self.svn_commit_db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
+ artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
+ mode, serializer)
+ self.cvs2svn_db = RecordTable(
+ artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
+ mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
+
+ def get_svn_revnum(self, cvs_rev_id):
+ """Return the Subversion revision number in which CVS_REV_ID was
+ committed, or SVN_INVALID_REVNUM if there is no mapping for
+ CVS_REV_ID."""
+
+ return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)
+
+ def get_svn_commit(self, svn_revnum):
+ """Return an SVNCommit that corresponds to SVN_REVNUM.
+
+ If no SVNCommit exists for revnum SVN_REVNUM, then return None."""
+
+ return self.svn_commit_db.get(svn_revnum, None)
+
+ def put_svn_commit(self, svn_commit):
+ """Record the bidirectional mapping between SVN_REVNUM and
+ CVS_REVS and record associated attributes."""
+
+ if self.mode == DB_OPEN_READ:
+ raise RuntimeError, \
+ 'Write operation attempted on read-only PersistenceManager'
+
+ self.svn_commit_db[svn_commit.revnum] = svn_commit
+
+ if isinstance(svn_commit, SVNRevisionCommit):
+ for cvs_rev in svn_commit.cvs_revs:
+ self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum
+
+ def close(self):
+ self.cvs2svn_db.close()
+ self.cvs2svn_db = None
+ self.svn_commit_db.close()
+ self.svn_commit_db = None
+
+
diff --git a/cvs2svn_lib/process.py b/cvs2svn_lib/process.py
new file mode 100644
index 0000000..56469ce
--- /dev/null
+++ b/cvs2svn_lib/process.py
@@ -0,0 +1,116 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains generic utilities used by cvs2svn."""
+
+
+import subprocess
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import CommandError
+
+
+def call_command(command, **kw):
+ """Call the specified command, checking that it exits successfully.
+
+ Raise a FatalError if the command cannot be executed, or if it exits
+ with a non-zero exit code. Pass KW as keyword arguments to
+ subprocess.call()."""
+
+ try:
+ retcode = subprocess.call(command, **kw)
+ if retcode < 0:
+ raise FatalError(
+ 'Command terminated by signal %d: "%s"'
+ % (-retcode, ' '.join(command),)
+ )
+ elif retcode > 0:
+ raise FatalError(
+ 'Command failed with return code %d: "%s"'
+ % (retcode, ' '.join(command),)
+ )
+ except OSError, e:
+ raise FatalError(
+ 'Command execution failed (%s): "%s"'
+ % (e, ' '.join(command),)
+ )
+
+
+class CommandFailedException(Exception):
+ """Exception raised if check_command_runs() fails."""
+
+ pass
+
+
+def check_command_runs(cmd, cmdname):
+ """Check whether the command CMD can be executed without errors.
+
+ CMD is a list or string, as accepted by subprocess.Popen(). CMDNAME
+ is the name of the command as it should be included in exception
+ error messages.
+
+ This function checks three things: (1) the command can be run
+ without throwing an OSError; (2) it exits with status=0; (3) it
+ doesn't output anything to stderr. If any of these conditions is
+ not met, raise a CommandFailedException describing the problem."""
+
+ try:
+ pipe = subprocess.Popen(
+ cmd,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ except OSError, e:
+ raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
+ pipe.stdin.close()
+ pipe.stdout.read()
+ errmsg = pipe.stderr.read()
+ status = pipe.wait()
+ if status or errmsg:
+ msg = 'error executing %s: status %s' % (cmdname, status,)
+ if errmsg:
+ msg += ', error output:\n%s' % (errmsg,)
+ raise CommandFailedException(msg)
+
+
+class PipeStream(object):
+ """A file-like object from which revision contents can be read."""
+
+ def __init__(self, pipe_command):
+ self._pipe_command_str = ' '.join(pipe_command)
+ self.pipe = subprocess.Popen(
+ pipe_command,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ self.pipe.stdin.close()
+
+ def read(self, size=None):
+ if size is None:
+ return self.pipe.stdout.read()
+ else:
+ return self.pipe.stdout.read(size)
+
+ def close(self):
+ self.pipe.stdout.close()
+ error_output = self.pipe.stderr.read()
+ exit_status = self.pipe.wait()
+ if exit_status:
+ raise CommandError(self._pipe_command_str, exit_status, error_output)
+
+
diff --git a/cvs2svn_lib/project.py b/cvs2svn_lib/project.py
new file mode 100644
index 0000000..0fe92df
--- /dev/null
+++ b/cvs2svn_lib/project.py
@@ -0,0 +1,219 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import re
+import os
+import cPickle
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.common import verify_paths_disjoint
+from cvs2svn_lib.symbol_transform import CompoundSymbolTransform
+
+
+class FileInAndOutOfAtticException(Exception):
+ def __init__(self, non_attic_path, attic_path):
+ Exception.__init__(
+ self,
+ "A CVS repository cannot contain both %s and %s"
+ % (non_attic_path, attic_path))
+
+ self.non_attic_path = non_attic_path
+ self.attic_path = attic_path
+
+
+def normalize_ttb_path(opt, path, allow_empty=False):
+ try:
+ return normalize_svn_path(path, allow_empty)
+ except IllegalSVNPathError, e:
+ raise FatalError('Problem with %s: %s' % (opt, e,))
+
+
+class Project(object):
+ """A project within a CVS repository."""
+
+ def __init__(
+ self, id, project_cvs_repos_path,
+ initial_directories=[],
+ symbol_transforms=None,
+ ):
+ """Create a new Project record.
+
+ ID is a unique id for this project. PROJECT_CVS_REPOS_PATH is the
+ main CVS directory for this project (within the filesystem).
+
+ INITIAL_DIRECTORIES is an iterable of all SVN directories that
+ should be created when the project is first created. Normally,
+ this should include the trunk, branches, and tags directory.
+
+ SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances
+ which will be used to transform any symbol names within this
+ project."""
+
+ self.id = id
+
+ self.project_cvs_repos_path = os.path.normpath(project_cvs_repos_path)
+ if not os.path.isdir(self.project_cvs_repos_path):
+ raise FatalError("The specified CVS repository path '%s' is not an "
+ "existing directory." % self.project_cvs_repos_path)
+
+ self.cvs_repository_root, self.cvs_module = \
+ self.determine_repository_root(
+ os.path.abspath(self.project_cvs_repos_path))
+
+ # A regexp matching project_cvs_repos_path plus an optional separator:
+ self.project_prefix_re = re.compile(
+ r'^' + re.escape(self.project_cvs_repos_path)
+ + r'(' + re.escape(os.sep) + r'|$)')
+
+ # The SVN directories to add when the project is first created:
+ self._initial_directories = []
+
+ for path in initial_directories:
+ try:
+ path = normalize_svn_path(path, False)
+ except IllegalSVNPathError, e:
+ raise FatalError(
+ 'Initial directory %r is not a legal SVN path: %s'
+ % (path, e,)
+ )
+ self._initial_directories.append(path)
+
+ verify_paths_disjoint(*self._initial_directories)
+
+ # A list of transformation rules (regexp, replacement) applied to
+ # symbol names in this project.
+ if symbol_transforms is None:
+ symbol_transforms = []
+
+ self.symbol_transform = CompoundSymbolTransform(symbol_transforms)
+
+ # The ID of the Trunk instance for this Project. This member is
+ # filled in during CollectRevsPass.
+ self.trunk_id = None
+
+ # The ID of the CVSDirectory representing the root directory of
+ # this project. This member is filled in during CollectRevsPass.
+ self.root_cvs_directory_id = None
+
+ def __eq__(self, other):
+ return self.id == other.id
+
+ def __cmp__(self, other):
+ return cmp(self.cvs_module, other.cvs_module) \
+ or cmp(self.id, other.id)
+
+ def __hash__(self):
+ return self.id
+
+ @staticmethod
+ def determine_repository_root(path):
+ """Ascend above the specified PATH if necessary to find the
+ cvs_repository_root (a directory containing a CVSROOT directory)
+ and the cvs_module (the path of the conversion root within the cvs
+ repository). Return the root path and the module path of this
+ project relative to the root.
+
+ NB: cvs_module must be seperated by '/', *not* by os.sep."""
+
+ def is_cvs_repository_root(path):
+ return os.path.isdir(os.path.join(path, 'CVSROOT'))
+
+ original_path = path
+ cvs_module = ''
+ while not is_cvs_repository_root(path):
+ # Step up one directory:
+ prev_path = path
+ path, module_component = os.path.split(path)
+ if path == prev_path:
+ # Hit the root (of the drive, on Windows) without finding a
+ # CVSROOT dir.
+ raise FatalError(
+ "the path '%s' is not a CVS repository, nor a path "
+ "within a CVS repository. A CVS repository contains "
+ "a CVSROOT directory within its root directory."
+ % (original_path,))
+
+ cvs_module = module_component + "/" + cvs_module
+
+ return path, cvs_module
+
+ def transform_symbol(self, cvs_file, symbol_name, revision):
+ """Transform the symbol SYMBOL_NAME.
+
+ SYMBOL_NAME refers to revision number REVISION in CVS_FILE.
+ REVISION is the CVS revision number as a string, with zeros
+ removed (e.g., '1.7' or '1.7.2'). Use the renaming rules
+ specified with --symbol-transform to possibly rename the symbol.
+ Return the transformed symbol name, the original name if it should
+ not be transformed, or None if the symbol should be omitted from
+ the conversion."""
+
+ return self.symbol_transform.transform(cvs_file, symbol_name, revision)
+
+ def get_trunk(self):
+ """Return the Trunk instance for this project.
+
+ This method can only be called after self.trunk_id has been
+ initialized in CollectRevsPass."""
+
+ return Ctx()._symbol_db.get_symbol(self.trunk_id)
+
+ def get_root_cvs_directory(self):
+ """Return the root CVSDirectory instance for this project.
+
+ This method can only be called after self.root_cvs_directory_id
+ has been initialized in CollectRevsPass."""
+
+ return Ctx()._cvs_file_db.get_file(self.root_cvs_directory_id)
+
+ def get_initial_directories(self):
+ """Generate the project's initial SVN directories.
+
+ Yield as strings the SVN paths of directories that should be
+ created when the project is first created."""
+
+ # Yield the path of the Trunk symbol for this project (which might
+ # differ from the one passed to the --trunk option because of
+ # SymbolStrategyRules). The trunk path might be '' during a
+ # trunk-only conversion, but that is OK because DumpfileDelegate
+ # considers that directory to exist already and will therefore
+ # ignore it:
+ yield self.get_trunk().base_path
+
+ for path in self._initial_directories:
+ yield path
+
+ def __str__(self):
+ return self.project_cvs_repos_path
+
+
+def read_projects(filename):
+ retval = {}
+ for project in cPickle.load(open(filename, 'rb')):
+ retval[project.id] = project
+ return retval
+
+
+def write_projects(filename):
+ cPickle.dump(Ctx()._projects.values(), open(filename, 'wb'), -1)
+
+
diff --git a/cvs2svn_lib/property_setters.py b/cvs2svn_lib/property_setters.py
new file mode 100644
index 0000000..7cf379e
--- /dev/null
+++ b/cvs2svn_lib/property_setters.py
@@ -0,0 +1,385 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to set Subversion properties on files."""
+
+
+import os
+import re
+import fnmatch
+import ConfigParser
+from cStringIO import StringIO
+
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.log import Log
+
+
+class SVNPropertySetter:
+ """Abstract class for objects that can set properties on a SVNCommitItem."""
+
+ def set_properties(self, s_item):
+ """Set any properties that can be determined for S_ITEM.
+
+ S_ITEM is an instance of SVNCommitItem. This method should modify
+ S_ITEM.svn_props in place."""
+
+ raise NotImplementedError
+
+
+class CVSRevisionNumberSetter(SVNPropertySetter):
+ """Set the cvs2svn:cvs-rev property to the CVS revision number."""
+
+ propname = 'cvs2svn:cvs-rev'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ s_item.svn_props[self.propname] = s_item.cvs_rev.rev
+ s_item.svn_props_changed = True
+
+
+class ExecutablePropertySetter(SVNPropertySetter):
+ """Set the svn:executable property based on cvs_rev.cvs_file.executable."""
+
+ propname = 'svn:executable'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if s_item.cvs_rev.cvs_file.executable:
+ s_item.svn_props[self.propname] = '*'
+
+
+class CVSBinaryFileEOLStyleSetter(SVNPropertySetter):
+ """Set the eol-style to None for files with CVS mode '-kb'."""
+
+ propname = 'svn:eol-style'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if s_item.cvs_rev.cvs_file.mode == 'b':
+ s_item.svn_props[self.propname] = None
+
+
+class MimeMapper(SVNPropertySetter):
+ """A class that provides mappings from file names to MIME types."""
+
+ propname = 'svn:mime-type'
+
+ def __init__(self, mime_types_file):
+ self.mappings = { }
+
+ for line in file(mime_types_file):
+ if line.startswith("#"):
+ continue
+
+ # format of a line is something like
+ # text/plain c h cpp
+ extensions = line.split()
+ if len(extensions) < 2:
+ continue
+ type = extensions.pop(0)
+ for ext in extensions:
+ if ext in self.mappings and self.mappings[ext] != type:
+ Log().error(
+ "%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
+ % (warning_prefix, ext, self.mappings[ext], type)
+ )
+ self.mappings[ext] = type
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ basename, extension = os.path.splitext(s_item.cvs_rev.cvs_file.basename)
+
+ # Extension includes the dot, so strip it (will leave extension
+ # empty if filename ends with a dot, which is ok):
+ extension = extension[1:]
+
+ # If there is no extension (or the file ends with a period), use
+ # the base name for mapping. This allows us to set mappings for
+ # files such as README or Makefile:
+ if not extension:
+ extension = basename
+
+ mime_type = self.mappings.get(extension, None)
+ if mime_type is not None:
+ s_item.svn_props[self.propname] = mime_type
+
+
+class AutoPropsPropertySetter(SVNPropertySetter):
+ """Set arbitrary svn properties based on an auto-props configuration.
+
+ This class supports case-sensitive or case-insensitive pattern
+ matching. The command-line default is case-insensitive behavior,
+ consistent with Subversion (see
+ http://subversion.tigris.org/issues/show_bug.cgi?id=2036).
+
+ As a special extension to Subversion's auto-props handling, if a
+ property name is preceded by a '!' then that property is forced to
+ be left unset.
+
+ If a property specified in auto-props has already been set to a
+ different value, print a warning and leave the old property value
+ unchanged.
+
+ Python's treatment of whitespaces in the ConfigParser module is
+ buggy and inconsistent. Usually spaces are preserved, but if there
+ is at least one semicolon in the value, and the *first* semicolon is
+ preceded by a space, then that is treated as the start of a comment
+ and the rest of the line is silently discarded."""
+
+ property_name_pattern = r'(?P<name>[^\!\=\s]+)'
+ property_unset_re = re.compile(
+ r'^\!\s*' + property_name_pattern + r'$'
+ )
+ property_set_re = re.compile(
+ r'^' + property_name_pattern + r'\s*\=\s*(?P<value>.*)$'
+ )
+ property_novalue_re = re.compile(
+ r'^' + property_name_pattern + r'$'
+ )
+
+ quoted_re = re.compile(
+ r'^([\'\"]).*\1$'
+ )
+ comment_re = re.compile(r'\s;')
+
+ class Pattern:
+ """Describes the properties to be set for files matching a pattern."""
+
+ def __init__(self, pattern, propdict):
+ # A glob-like pattern:
+ self.pattern = pattern
+ # A dictionary of properties that should be set:
+ self.propdict = propdict
+
+ def match(self, basename):
+ """Does the file with the specified basename match pattern?"""
+
+ return fnmatch.fnmatch(basename, self.pattern)
+
+ def __init__(self, configfilename, ignore_case=True):
+ config = ConfigParser.ConfigParser()
+ if ignore_case:
+ self.transform_case = self.squash_case
+ else:
+ config.optionxform = self.preserve_case
+ self.transform_case = self.preserve_case
+
+ configtext = open(configfilename).read()
+ if self.comment_re.search(configtext):
+ Log().warn(
+ '%s: Please be aware that a space followed by a\n'
+ 'semicolon is sometimes treated as a comment in configuration\n'
+ 'files. This pattern was seen in\n'
+ ' %s\n'
+ 'Please make sure that you have not inadvertently commented\n'
+ 'out part of an important line.'
+ % (warning_prefix, configfilename,)
+ )
+
+ config.readfp(StringIO(configtext), configfilename)
+ self.patterns = []
+ sections = config.sections()
+ sections.sort()
+ for section in sections:
+ if self.transform_case(section) == 'auto-props':
+ patterns = config.options(section)
+ patterns.sort()
+ for pattern in patterns:
+ value = config.get(section, pattern)
+ if value:
+ self._add_pattern(pattern, value)
+
+ def squash_case(self, s):
+ return s.lower()
+
+ def preserve_case(self, s):
+ return s
+
+ def _add_pattern(self, pattern, props):
+ propdict = {}
+ if self.quoted_re.match(pattern):
+ Log().warn(
+ '%s: Quoting is not supported in auto-props; please verify rule\n'
+ 'for %r. (Using pattern including quotation marks.)\n'
+ % (warning_prefix, pattern,)
+ )
+ for prop in props.split(';'):
+ prop = prop.strip()
+ m = self.property_unset_re.match(prop)
+ if m:
+ name = m.group('name')
+ Log().debug(
+ 'auto-props: For %r, leaving %r unset.' % (pattern, name,)
+ )
+ propdict[name] = None
+ continue
+
+ m = self.property_set_re.match(prop)
+ if m:
+ name = m.group('name')
+ value = m.group('value')
+ if self.quoted_re.match(value):
+ Log().warn(
+ '%s: Quoting is not supported in auto-props; please verify\n'
+ 'rule %r for pattern %r. (Using value\n'
+ 'including quotation marks.)\n'
+ % (warning_prefix, prop, pattern,)
+ )
+ Log().debug(
+ 'auto-props: For %r, setting %r to %r.' % (pattern, name, value,)
+ )
+ propdict[name] = value
+ continue
+
+ m = self.property_novalue_re.match(prop)
+ if m:
+ name = m.group('name')
+ Log().debug(
+ 'auto-props: For %r, setting %r to the empty string'
+ % (pattern, name,)
+ )
+ propdict[name] = ''
+ continue
+
+ Log().warn(
+ '%s: in auto-props line for %r, value %r cannot be parsed (ignored)'
+ % (warning_prefix, pattern, prop,)
+ )
+
+ self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
+
+ def get_propdict(self, cvs_file):
+ basename = self.transform_case(cvs_file.basename)
+ propdict = {}
+ for pattern in self.patterns:
+ if pattern.match(basename):
+ for (key,value) in pattern.propdict.items():
+ if key in propdict:
+ if propdict[key] != value:
+ Log().warn(
+ "Contradictory values set for property '%s' for file %s."
+ % (key, cvs_file,))
+ else:
+ propdict[key] = value
+
+ return propdict
+
+ def set_properties(self, s_item):
+ propdict = self.get_propdict(s_item.cvs_rev.cvs_file)
+ for (k,v) in propdict.items():
+ if k in s_item.svn_props:
+ if s_item.svn_props[k] != v:
+ Log().warn(
+ "Property '%s' already set to %r for file %s; "
+ "auto-props value (%r) ignored."
+ % (k, s_item.svn_props[k], s_item.cvs_rev.cvs_path, v,))
+ else:
+ s_item.svn_props[k] = v
+
+
+class CVSBinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
+ """If the file is binary and its svn:mime-type property is not yet
+ set, set it to 'application/octet-stream'."""
+
+ propname = 'svn:mime-type'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if s_item.cvs_rev.cvs_file.mode == 'b':
+ s_item.svn_props[self.propname] = 'application/octet-stream'
+
+
+class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
+ """Set svn:eol-style based on svn:mime-type.
+
+ If svn:mime-type is known but svn:eol-style is not, then set
+ svn:eol-style based on svn:mime-type as follows: if svn:mime-type
+ starts with 'text/', then set svn:eol-style to native; otherwise,
+ force it to remain unset. See also issue #39."""
+
+ propname = 'svn:eol-style'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if s_item.svn_props.get('svn:mime-type', None) is not None:
+ if s_item.svn_props['svn:mime-type'].startswith("text/"):
+ s_item.svn_props[self.propname] = 'native'
+ else:
+ s_item.svn_props[self.propname] = None
+
+
+class DefaultEOLStyleSetter(SVNPropertySetter):
+ """Set the eol-style if one has not already been set."""
+
+ propname = 'svn:eol-style'
+
+ def __init__(self, value):
+ """Initialize with the specified default VALUE."""
+
+ self.value = value
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ s_item.svn_props[self.propname] = self.value
+
+
+class SVNBinaryFileKeywordsPropertySetter(SVNPropertySetter):
+ """Turn off svn:keywords for files with binary svn:eol-style."""
+
+ propname = 'svn:keywords'
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if not s_item.svn_props.get('svn:eol-style'):
+ s_item.svn_props[self.propname] = None
+
+
+class KeywordsPropertySetter(SVNPropertySetter):
+ """If the svn:keywords property is not yet set, set it based on the
+ file's mode. See issue #2."""
+
+ propname = 'svn:keywords'
+
+ def __init__(self, value):
+ """Use VALUE for the value of the svn:keywords property if it is
+ to be set."""
+
+ self.value = value
+
+ def set_properties(self, s_item):
+ if self.propname in s_item.svn_props:
+ return
+
+ if s_item.cvs_rev.cvs_file.mode in [None, 'kv', 'kvl']:
+ s_item.svn_props[self.propname] = self.value
+
+
diff --git a/cvs2svn_lib/rcs_revision_manager.py b/cvs2svn_lib/rcs_revision_manager.py
new file mode 100644
index 0000000..1c2dfcf
--- /dev/null
+++ b/cvs2svn_lib/rcs_revision_manager.py
@@ -0,0 +1,51 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Access the CVS repository via RCS's 'co' command."""
+
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import PipeStream
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.revision_manager import RevisionReader
+
+
+class RCSRevisionReader(RevisionReader):
+ """A RevisionReader that reads the contents via RCS."""
+
+ def __init__(self, co_executable):
+ self.co_executable = co_executable
+ try:
+ check_command_runs([self.co_executable, '-V'], self.co_executable)
+ except CommandFailedException, e:
+ raise FatalError('%s\n'
+ 'Please check that co is installed and in your PATH\n'
+ '(it is a part of the RCS software).' % (e,))
+
+ def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+ pipe_cmd = [
+ self.co_executable,
+ '-q',
+ '-x,v',
+ '-p%s' % (cvs_rev.rev,)
+ ]
+ if suppress_keyword_substitution:
+ pipe_cmd.append('-kk')
+ pipe_cmd.append(cvs_rev.cvs_file.filename)
+ return PipeStream(pipe_cmd)
+
+
diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py
new file mode 100644
index 0000000..b893819
--- /dev/null
+++ b/cvs2svn_lib/rcs_stream.py
@@ -0,0 +1,149 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module processes RCS diffs (deltas)."""
+
+
+import re
+
+def msplit(s):
+ """Split S into an array of lines.
+
+ Only \n is a line separator. The line endings are part of the lines."""
+
+ # return s.splitlines(True) clobbers \r
+ re = [ i + "\n" for i in s.split("\n") ]
+ re[-1] = re[-1][:-1]
+ if not re[-1]:
+ del re[-1]
+ return re
+
+
+class MalformedDeltaException(Exception):
+ """A malformed RCS delta was encountered."""
+
+ pass
+
+class RCSStream:
+ """This class represents a single file object to which RCS deltas can be
+ applied in various ways."""
+
+ ad_command = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
+ a_command = re.compile(r'^a(\d+)\s(\d+)\n$')
+
+ def __init__(self, text):
+ """Instantiate and initialize the file content with TEXT."""
+
+ self._texts = msplit(text)
+
+ def get_text(self):
+ """Return the current file content."""
+
+ return "".join(self._texts)
+
+ def apply_diff(self, diff):
+ """Apply the RCS diff DIFF to the current file content."""
+
+ ntexts = []
+ ooff = 0
+ diffs = msplit(diff)
+ i = 0
+ while i < len(diffs):
+ admatch = self.ad_command.match(diffs[i])
+ if not admatch:
+ raise MalformedDeltaException('Bad ed command')
+ i += 1
+ sl = int(admatch.group(2))
+ cn = int(admatch.group(3))
+ if admatch.group(1) == 'd': # "d" - Delete command
+ sl -= 1
+ if sl < ooff:
+ raise MalformedDeltaException('Deletion before last edit')
+ if sl > len(self._texts):
+ raise MalformedDeltaException('Deletion past file end')
+ if sl + cn > len(self._texts):
+ raise MalformedDeltaException('Deletion beyond file end')
+ ntexts += self._texts[ooff:sl]
+ ooff = sl + cn
+ else: # "a" - Add command
+ if sl < ooff: # Also catches same place
+ raise MalformedDeltaException('Insertion before last edit')
+ if sl > len(self._texts):
+ raise MalformedDeltaException('Insertion past file end')
+ ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
+ ooff = sl
+ i += cn
+ self._texts = ntexts + self._texts[ooff:]
+
+ def invert_diff(self, diff):
+ """Apply the RCS diff DIFF to the current file content and simultaneously
+ generate an RCS diff suitable for reverting the change."""
+
+ ntexts = []
+ ooff = 0
+ diffs = msplit(diff)
+ ndiffs = []
+ adjust = 0
+ i = 0
+ while i < len(diffs):
+ admatch = self.ad_command.match(diffs[i])
+ if not admatch:
+ raise MalformedDeltaException('Bad ed command')
+ i += 1
+ sl = int(admatch.group(2))
+ cn = int(admatch.group(3))
+ if admatch.group(1) == 'd': # "d" - Delete command
+ sl -= 1
+ if sl < ooff:
+ raise MalformedDeltaException('Deletion before last edit')
+ if sl > len(self._texts):
+ raise MalformedDeltaException('Deletion past file end')
+ if sl + cn > len(self._texts):
+ raise MalformedDeltaException('Deletion beyond file end')
+ # Handle substitution explicitly, as add must come after del
+ # (last add may end in no newline, so no command can follow).
+ if i < len(diffs):
+ amatch = self.a_command.match(diffs[i])
+ else:
+ amatch = None
+ if amatch and int(amatch.group(1)) == sl + cn:
+ cn2 = int(amatch.group(2))
+ i += 1
+ ndiffs += ["d%d %d\na%d %d\n" % \
+ (sl + 1 + adjust, cn2, sl + adjust + cn2, cn)] + \
+ self._texts[sl:sl + cn]
+ ntexts += self._texts[ooff:sl] + diffs[i:i + cn2]
+ adjust += cn2 - cn
+ i += cn2
+ else:
+ ndiffs += ["a%d %d\n" % (sl + adjust, cn)] + \
+ self._texts[sl:sl + cn]
+ ntexts += self._texts[ooff:sl]
+ adjust -= cn
+ ooff = sl + cn
+ else: # "a" - Add command
+ if sl < ooff: # Also catches same place
+ raise MalformedDeltaException('Insertion before last edit')
+ if sl > len(self._texts):
+ raise MalformedDeltaException('Insertion past file end')
+ ndiffs += ["d%d %d\n" % (sl + 1 + adjust, cn)]
+ ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
+ ooff = sl
+ adjust += cn
+ i += cn
+ self._texts = ntexts + self._texts[ooff:]
+ return "".join(ndiffs)
+
diff --git a/cvs2svn_lib/record_table.py b/cvs2svn_lib/record_table.py
new file mode 100644
index 0000000..41ab84a
--- /dev/null
+++ b/cvs2svn_lib/record_table.py
@@ -0,0 +1,399 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes to manage Databases of fixed-length records.
+
+The databases map small, non-negative integers to fixed-size records.
+The records are written in index order to a disk file. Gaps in the
+index sequence leave gaps in the data file, so for best space
+efficiency the indexes of existing records should be approximately
+continuous.
+
+To use a RecordTable, you need a class derived from Packer which can
+serialize/deserialize your records into fixed-size strings. Deriving
+classes have to specify how to pack records into strings and unpack
+strings into records by overwriting the pack() and unpack() methods
+respectively.
+
+Note that these classes keep track of gaps in the records that have
+been written by filling them with packer.empty_value. If a record is
+read which contains packer.empty_value, then a KeyError is raised."""
+
+
+import os
+import types
+import struct
+import mmap
+
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.log import Log
+
+
+# A unique value that can be used to stand for "unset" without
+# preventing the use of None.
+_unset = object()
+
+
+class Packer(object):
+ def __init__(self, record_len, empty_value=None):
+ self.record_len = record_len
+ if empty_value is None:
+ self.empty_value = '\0' * self.record_len
+ else:
+ assert type(empty_value) is types.StringType
+ assert len(empty_value) == self.record_len
+ self.empty_value = empty_value
+
+ def pack(self, v):
+ """Pack record V into a string of length self.record_len."""
+
+ raise NotImplementedError()
+
+ def unpack(self, s):
+ """Unpack string S into a record."""
+
+ raise NotImplementedError()
+
+
+class StructPacker(Packer):
+ def __init__(self, format, empty_value=_unset):
+ self.format = format
+ if empty_value is not _unset:
+ empty_value = self.pack(empty_value)
+ else:
+ empty_value = None
+
+ Packer.__init__(self, struct.calcsize(self.format),
+ empty_value=empty_value)
+
+ def pack(self, v):
+ return struct.pack(self.format, v)
+
+ def unpack(self, v):
+ return struct.unpack(self.format, v)[0]
+
+
+class UnsignedIntegerPacker(StructPacker):
+ def __init__(self, empty_value=0):
+ StructPacker.__init__(self, '=I', empty_value)
+
+
+class SignedIntegerPacker(StructPacker):
+ def __init__(self, empty_value=0):
+ StructPacker.__init__(self, '=i', empty_value)
+
+
+class FileOffsetPacker(Packer):
+ """A packer suitable for file offsets.
+
+ We store the 5 least significant bytes of the file offset. This is
+ enough bits to represent 1 TiB. Of course if the computer
+ doesn't have large file support, only the lowest 31 bits can be
+ nonzero, and the offsets are limited to 2 GiB."""
+
+ # Convert file offsets to 8-bit little-endian unsigned longs...
+ INDEX_FORMAT = '<Q'
+ # ...but then truncate to 5 bytes.
+ INDEX_FORMAT_LEN = 5
+
+ PAD = '\0' * (struct.calcsize(INDEX_FORMAT) - INDEX_FORMAT_LEN)
+
+ def __init__(self):
+ Packer.__init__(self, self.INDEX_FORMAT_LEN)
+
+ def pack(self, v):
+ return struct.pack(self.INDEX_FORMAT, v)[:self.INDEX_FORMAT_LEN]
+
+ def unpack(self, s):
+ return struct.unpack(self.INDEX_FORMAT, s + self.PAD)[0]
+
+
+class RecordTableAccessError(RuntimeError):
+ pass
+
+
+class AbstractRecordTable:
+ def __init__(self, filename, mode, packer):
+ self.filename = filename
+ self.mode = mode
+ self.packer = packer
+ # Simplify and speed access to this oft-needed quantity:
+ self._record_len = self.packer.record_len
+
+ def __str__(self):
+ return '%s(%r)' % (self.__class__.__name__, self.filename,)
+
+ def _set_packed_record(self, i, s):
+ """Set the value for index I to the packed value S."""
+
+ raise NotImplementedError()
+
+ def __setitem__(self, i, v):
+ self._set_packed_record(i, self.packer.pack(v))
+
+ def _get_packed_record(self, i):
+ """Return the packed record for index I.
+
+ Raise KeyError if it is not present."""
+
+ raise NotImplementedError()
+
+ def __getitem__(self, i):
+ """Return the item for index I.
+
+ Raise KeyError if that item has never been set (or if it was set
+ to self.packer.empty_value)."""
+
+ s = self._get_packed_record(i)
+
+ if s == self.packer.empty_value:
+ raise KeyError(i)
+
+ return self.packer.unpack(s)
+
+ def get_many(self, indexes, default=None):
+ """Yield (index, item) typles for INDEXES in arbitrary order.
+
+ Yield (index,default) for indices for which not item is defined."""
+
+ indexes = list(indexes)
+ # Sort the indexes to reduce disk seeking:
+ indexes.sort()
+ for i in indexes:
+ yield (i, self.get(i, default))
+
+ def get(self, i, default=None):
+ try:
+ return self[i]
+ except KeyError:
+ return default
+
+ def __delitem__(self, i):
+ """Delete the item for index I.
+
+ Raise KeyError if that item has never been set (or if it was set
+ to self.packer.empty_value)."""
+
+ if self.mode == DB_OPEN_READ:
+ raise RecordTableAccessError()
+
+ # Check that the value was set (otherwise raise KeyError):
+ self[i]
+ self._set_packed_record(i, self.packer.empty_value)
+
+ def iterkeys(self):
+ """Yield the keys in the map in key order."""
+
+ for i in xrange(0, self._limit):
+ try:
+ self[i]
+ yield i
+ except KeyError:
+ pass
+
+ def itervalues(self):
+ """Yield the values in the map in key order.
+
+ Skip over values that haven't been defined."""
+
+ for i in xrange(0, self._limit):
+ try:
+ yield self[i]
+ except KeyError:
+ pass
+
+
+class RecordTable(AbstractRecordTable):
+ # The approximate amount of memory that should be used for the cache
+ # for each instance of this class:
+ CACHE_MEMORY = 4 * 1024 * 1024
+
+ # Empirically, each entry in the cache table has an overhead of
+ # about 96 bytes on a 32-bit computer.
+ CACHE_OVERHEAD_PER_ENTRY = 96
+
+ def __init__(self, filename, mode, packer, cache_memory=CACHE_MEMORY):
+ AbstractRecordTable.__init__(self, filename, mode, packer)
+ if self.mode == DB_OPEN_NEW:
+ self.f = open(self.filename, 'wb+')
+ elif self.mode == DB_OPEN_WRITE:
+ self.f = open(self.filename, 'rb+')
+ elif self.mode == DB_OPEN_READ:
+ self.f = open(self.filename, 'rb')
+ else:
+ raise RuntimeError('Invalid mode %r' % self.mode)
+ self.cache_memory = cache_memory
+
+ # Number of items that can be stored in the write cache.
+ self._max_memory_cache = (
+ self.cache_memory
+ / (self.CACHE_OVERHEAD_PER_ENTRY + self._record_len))
+
+ # Read and write cache; a map {i : (dirty, s)}, where i is an
+ # index, dirty indicates whether the value has to be written to
+ # disk, and s is the packed value for the index. Up to
+ # self._max_memory_cache items can be stored here. When the cache
+ # fills up, it is written to disk in one go and then cleared.
+ self._cache = {}
+
+ # The index just beyond the last record ever written:
+ self._limit = os.path.getsize(self.filename) // self._record_len
+
+ # The index just beyond the last record ever written to disk:
+ self._limit_written = self._limit
+
+ def flush(self):
+ Log().debug('Flushing cache for %s' % (self,))
+
+ pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty]
+
+ if pairs:
+ pairs.sort()
+ old_i = None
+ f = self.f
+ for (i, s) in pairs:
+ if i == old_i:
+ # No seeking needed
+ pass
+ elif i <= self._limit_written:
+ # Just jump there:
+ f.seek(i * self._record_len)
+ else:
+ # Jump to the end of the file then write _empty_values until
+ # we reach the correct location:
+ f.seek(self._limit_written * self._record_len)
+ while self._limit_written < i:
+ f.write(self.packer.empty_value)
+ self._limit_written += 1
+ f.write(s)
+ old_i = i + 1
+ self._limit_written = max(self._limit_written, old_i)
+
+ self.f.flush()
+
+ self._cache.clear()
+
+ def _set_packed_record(self, i, s):
+ if self.mode == DB_OPEN_READ:
+ raise RecordTableAccessError()
+ if i < 0:
+ raise KeyError()
+ self._cache[i] = (True, s)
+ if len(self._cache) >= self._max_memory_cache:
+ self.flush()
+ self._limit = max(self._limit, i + 1)
+
+ def _get_packed_record(self, i):
+ try:
+ return self._cache[i][1]
+ except KeyError:
+ if not 0 <= i < self._limit_written:
+ raise KeyError(i)
+ self.f.seek(i * self._record_len)
+ s = self.f.read(self._record_len)
+ self._cache[i] = (False, s)
+ if len(self._cache) >= self._max_memory_cache:
+ self.flush()
+
+ return s
+
+ def close(self):
+ self.flush()
+ self._cache = None
+ self.f.close()
+ self.f = None
+
+
+class MmapRecordTable(AbstractRecordTable):
+ GROWTH_INCREMENT = 65536
+
+ def __init__(self, filename, mode, packer):
+ AbstractRecordTable.__init__(self, filename, mode, packer)
+ if self.mode == DB_OPEN_NEW:
+ self.python_file = open(self.filename, 'wb+')
+ self.python_file.write('\0' * self.GROWTH_INCREMENT)
+ self.python_file.flush()
+ self._filesize = self.GROWTH_INCREMENT
+ self.f = mmap.mmap(
+ self.python_file.fileno(), self._filesize,
+ access=mmap.ACCESS_WRITE
+ )
+
+ # The index just beyond the last record ever written:
+ self._limit = 0
+ elif self.mode == DB_OPEN_WRITE:
+ self.python_file = open(self.filename, 'rb+')
+ self._filesize = os.path.getsize(self.filename)
+ self.f = mmap.mmap(
+ self.python_file.fileno(), self._filesize,
+ access=mmap.ACCESS_WRITE
+ )
+
+ # The index just beyond the last record ever written:
+ self._limit = os.path.getsize(self.filename) // self._record_len
+ elif self.mode == DB_OPEN_READ:
+ self.python_file = open(self.filename, 'rb')
+ self._filesize = os.path.getsize(self.filename)
+ self.f = mmap.mmap(
+ self.python_file.fileno(), self._filesize,
+ access=mmap.ACCESS_READ
+ )
+
+ # The index just beyond the last record ever written:
+ self._limit = os.path.getsize(self.filename) // self._record_len
+ else:
+ raise RuntimeError('Invalid mode %r' % self.mode)
+
+ def flush(self):
+ self.f.flush()
+
+ def _set_packed_record(self, i, s):
+ if self.mode == DB_OPEN_READ:
+ raise RecordTableAccessError()
+ if i < 0:
+ raise KeyError()
+ if i >= self._limit:
+ # This write extends the range of valid indices. First check
+ # whether the file has to be enlarged:
+ new_size = (i + 1) * self._record_len
+ if new_size > self._filesize:
+ self._filesize = (
+ (new_size + self.GROWTH_INCREMENT - 1)
+ // self.GROWTH_INCREMENT
+ * self.GROWTH_INCREMENT
+ )
+ self.f.resize(self._filesize)
+ if i > self._limit:
+ # Pad up to the new record with empty_value:
+ self.f[self._limit * self._record_len:i * self._record_len] = \
+ self.packer.empty_value * (i - self._limit)
+ self._limit = i + 1
+
+ self.f[i * self._record_len:(i + 1) * self._record_len] = s
+
+ def _get_packed_record(self, i):
+ if not 0 <= i < self._limit:
+ raise KeyError(i)
+ return self.f[i * self._record_len:(i + 1) * self._record_len]
+
+ def close(self):
+ self.flush()
+ self.f.close()
+ self.python_file.close()
+
+
diff --git a/cvs2svn_lib/repository_delegate.py b/cvs2svn_lib/repository_delegate.py
new file mode 100644
index 0000000..53c9b65
--- /dev/null
+++ b/cvs2svn_lib/repository_delegate.py
@@ -0,0 +1,98 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class RepositoryDelegate."""
+
+
+import os
+import subprocess
+
+from cvs2svn_lib.common import CommandError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.config import DUMPFILE
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
+
+
+class RepositoryDelegate(DumpfileDelegate):
+ """Creates a new Subversion Repository. DumpfileDelegate does all
+ of the heavy lifting."""
+
+ def __init__(self, revision_reader, target):
+ self.target = target
+
+ # Since the output of this run is a repository, not a dumpfile,
+ # the temporary dumpfiles we create should go in the tmpdir. But
+ # since we delete it ourselves, we don't want to use
+ # artifact_manager.
+ DumpfileDelegate.__init__(
+ self, revision_reader, Ctx().get_temp_filename(DUMPFILE)
+ )
+
+ self.dumpfile = open(self.dumpfile_path, 'w+b')
+ self.loader_pipe = subprocess.Popen(
+ [Ctx().svnadmin_executable, 'load', '-q', self.target],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ self.loader_pipe.stdout.close()
+ try:
+ self._write_dumpfile_header(self.loader_pipe.stdin)
+ except IOError:
+ raise FatalError(
+ 'svnadmin failed with the following output while '
+ 'loading the dumpfile:\n%s'
+ % (self.loader_pipe.stderr.read(),)
+ )
+
+ def start_commit(self, revnum, revprops):
+ """Start a new commit."""
+
+ DumpfileDelegate.start_commit(self, revnum, revprops)
+
+ def end_commit(self):
+ """Feed the revision stored in the dumpfile to the svnadmin load pipe."""
+
+ DumpfileDelegate.end_commit(self)
+
+ self.dumpfile.seek(0)
+ while True:
+ data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
+ if not data:
+ break
+ try:
+ self.loader_pipe.stdin.write(data)
+ except IOError:
+ raise FatalError("svnadmin failed with the following output "
+ "while loading the dumpfile:\n"
+ + self.loader_pipe.stderr.read())
+ self.dumpfile.seek(0)
+ self.dumpfile.truncate()
+
+ def finish(self):
+ """Clean up."""
+
+ self.dumpfile.close()
+ self.loader_pipe.stdin.close()
+ error_output = self.loader_pipe.stderr.read()
+ exit_status = self.loader_pipe.wait()
+ del self.loader_pipe
+ if exit_status:
+ raise CommandError('svnadmin load', exit_status, error_output)
+ os.remove(self.dumpfile_path)
+
+
diff --git a/cvs2svn_lib/repository_mirror.py b/cvs2svn_lib/repository_mirror.py
new file mode 100644
index 0000000..72e2ba1
--- /dev/null
+++ b/cvs2svn_lib/repository_mirror.py
@@ -0,0 +1,897 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the RepositoryMirror class and supporting classes.
+
+RepositoryMirror represents the skeleton of a versioned file tree with
+multiple lines of development ('LODs'). It records the presence or
+absence of files and directories, but not their contents. Given three
+values (revnum, lod, cvs_path), it can tell you whether the specified
+CVSPath existed on the specified LOD in the given revision number.
+The file trees corresponding to the most recent revision can be
+modified.
+
+The individual file trees are stored using immutable tree structures.
+Each directory node is represented as a MirrorDirectory instance,
+which is basically a map {cvs_path : node_id}, where cvs_path is a
+CVSPath within the directory, and node_id is an integer ID that
+uniquely identifies another directory node if that node is a
+CVSDirectory, or None if that node is a CVSFile. If a directory node
+is to be modified, then first a new node is created with a copy of the
+original node's contents, then the copy is modified. A reference to
+the copy also has to be stored in the parent node, meaning that the
+parent node needs to be modified, and so on recursively to the root
+node of the file tree. This data structure allows cheap deep copies,
+which is useful for tagging and branching.
+
+The class must also be able to find the root directory node
+corresponding to a particular (revnum, lod). This is done by keeping
+an LODHistory instance for each LOD, which can determine the root
+directory node ID for that LOD for any revnum. It does so by
+recording changes to the root directory node ID only for revisions in
+which it changed. Thus it stores two arrays, revnums (a list of the
+revision numbers when the ID changed), and ids (a list of the
+corresponding IDs). To find the ID for a particular revnum, first a
+binary search is done in the revnums array to find the index of the
+last change preceding revnum, then the corresponding ID is read from
+the ids array. Since most revisions change only one LOD, this allows
+storage of the history of potentially tens of thousands of LODs over
+hundreds of thousands of revisions in an amount of space that scales
+as O(numberOfLODs + numberOfRevisions), rather than O(numberOfLODs *
+numberOfRevisions) as would be needed if the information were stored
+in the equivalent of a 2D array.
+
+The internal operation of these classes is somewhat intricate, but the
+interface attempts to hide the complexity, enforce the usage rules,
+and allow efficient access. The most important facts to remember are
+(1) that a directory node can be used for multiple purposes (for
+multiple branches and for multiple revisions on a single branch), (2)
+that only a node that has been created within the current revision is
+allowed to be mutated, and (3) that the current revision can include
+nodes carried over from prior revisions, which are immutable.
+
+This leads to a bewildering variety of MirrorDirectory classes. The
+most important distinction is between OldMirrorDirectories and
+CurrentMirrorDirectories. A single node can be represented multiple
+ways in memory at the same time, depending on whether it was looked up
+as part of the current revision or part of an old revision:
+
+ MirrorDirectory -- the base class for all MirrorDirectory nodes.
+ This class allows lookup of subnodes and iteration over
+ subnodes.
+
+ OldMirrorDirectory -- a MirrorDirectory that was looked up for an
+ old revision. These instances are immutable, as only the
+ current revision is allowed to be modified.
+
+ CurrentMirrorDirectory -- a MirrorDirectory that was looked up for
+ the current revision. Such an instance is always logically
+ mutable, though mutating it might require the node to be
+ copied first. Such an instance might represent a node that
+ has already been copied during this revision and can therefore
+ be modified freely (such nodes implement
+ _WritableMirrorDirectoryMixin), or it might represent a node
+ that was carried over from an old revision and hasn't been
+ copied yet (such nodes implement
+ _ReadOnlyMirrorDirectoryMixin). If the latter, then the node
+ copies itself (and bubbles up the change) before allowing
+ itself to be modified. But the distinction is managed
+ internally; client classes should not have to worry about it.
+
+ CurrentMirrorLODDirectory -- A CurrentMirrorDirectory representing
+ the root directory of a line of development in the current
+ revision. This class has two concrete subclasses,
+ _CurrentMirrorReadOnlyLODDirectory and
+ _CurrentMirrorWritableLODDirectory, depending on whether the
+ node has already been copied during this revision.
+
+
+ CurrentMirrorSubdirectory -- A CurrentMirrorDirectory representing
+ a subdirectory within a line of development's directory tree
+ in the current revision. This class has two concrete
+ subclasses, _CurrentMirrorReadOnlySubdirectory and
+ _CurrentMirrorWritableSubdirectory, depending on whether the
+ node has already been copied during this revision.
+
+ DeletedCurrentMirrorDirectory -- a MirrorDirectory that has been
+ deleted. Such an instance is disabled so that it cannot
+ accidentally be used.
+
+While a revision is being processed, RepositoryMirror._new_nodes holds
+every writable CurrentMirrorDirectory instance (i.e., every node that
+has been created in the revision). Since these nodes are mutable, it
+is important that there be exactly one instance associated with each
+node; otherwise there would be problems keeping the instances
+synchronized. These are written to the database by
+RepositoryMirror.end_commit().
+
+OldMirrorDirectory and read-only CurrentMirrorDirectory instances are
+*not* cached; they are recreated whenever they are referenced. There
+might be multiple instances referring to the same node. A read-only
+CurrentMirrorDirectory instance is mutated in place into a writable
+CurrentMirrorDirectory instance if it needs to be modified.
+
+FIXME: The rules for when a MirrorDirectory instance can continue to
+be used vs. when it has to be read again (because it has been modified
+indirectly and therefore copied) are confusing and error-prone.
+Probably the semantics should be changed.
+
+"""
+
+
+import bisect
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.serializer import MarshalSerializer
+from cvs2svn_lib.database import IndexedDatabase
+
+
+class RepositoryMirrorError(Exception):
+ """An error related to the RepositoryMirror."""
+
+ pass
+
+
+class LODExistsError(RepositoryMirrorError):
+ """The LOD already exists in the repository.
+
+ Exception raised if an attempt is made to add an LOD to the
+ repository mirror and that LOD already exists in the youngest
+ revision of the repository."""
+
+ pass
+
+
+class PathExistsError(RepositoryMirrorError):
+ """The path already exists in the repository.
+
+ Exception raised if an attempt is made to add a path to the
+ repository mirror and that path already exists in the youngest
+ revision of the repository."""
+
+ pass
+
+
+class DeletedNodeReusedError(RepositoryMirrorError):
+ """The MirrorDirectory has already been deleted and shouldn't be reused."""
+
+ pass
+
+
+class CopyFromCurrentNodeError(RepositoryMirrorError):
+ """A CurrentMirrorDirectory cannot be copied to the current revision."""
+
+ pass
+
+
+class MirrorDirectory(object):
+ """Represent a node within the RepositoryMirror.
+
+ Instances of this class act like a map {CVSPath : MirrorDirectory},
+ where CVSPath is an item within this directory (i.e., a file or
+ subdirectory within this directory). The value is either another
+ MirrorDirectory instance (for directories) or None (for files)."""
+
+ def __init__(self, repo, id, entries):
+ # The RepositoryMirror containing this directory:
+ self.repo = repo
+
+ # The id of this node:
+ self.id = id
+
+ # The entries within this directory, stored as a map {CVSPath :
+ # node_id}. The node_ids are integers for CVSDirectories, None
+ # for CVSFiles:
+ self._entries = entries
+
+ def __getitem__(self, cvs_path):
+ """Return the MirrorDirectory associated with the specified subnode.
+
+ Return a MirrorDirectory instance if the subnode is a
+ CVSDirectory; None if it is a CVSFile. Raise KeyError if the
+ specified subnode does not exist."""
+
+ raise NotImplementedError()
+
+ def __len__(self):
+ """Return the number of CVSPaths within this node."""
+
+ return len(self._entries)
+
+ def __contains__(self, cvs_path):
+ """Return True iff CVS_PATH is contained in this node."""
+
+ return cvs_path in self._entries
+
+ def __iter__(self):
+ """Iterate over the CVSPaths within this node."""
+
+ return self._entries.__iter__()
+
+ def _format_entries(self):
+ """Format the entries map for output in subclasses' __repr__() methods."""
+
+ def format_item(key, value):
+ if value is None:
+ return str(key)
+ else:
+ return '%s -> %x' % (key, value,)
+
+ items = self._entries.items()
+ items.sort()
+ return '{%s}' % (', '.join([format_item(*item) for item in items]),)
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s<%x>' % (self.__class__.__name__, self.id,)
+
+
+class OldMirrorDirectory(MirrorDirectory):
+ """Represent a historical directory within the RepositoryMirror."""
+
+ def __getitem__(self, cvs_path):
+ id = self._entries[cvs_path]
+ if id is None:
+ # This represents a leaf node.
+ return None
+ else:
+ return OldMirrorDirectory(self.repo, id, self.repo._node_db[id])
+
+ def __repr__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s(%s)' % (self, self._format_entries(),)
+
+
+class CurrentMirrorDirectory(MirrorDirectory):
+ """Represent a directory that currently exists in the RepositoryMirror."""
+
+ def __init__(self, repo, id, lod, cvs_path, entries):
+ MirrorDirectory.__init__(self, repo, id, entries)
+ self.lod = lod
+ self.cvs_path = cvs_path
+
+ def __getitem__(self, cvs_path):
+ id = self._entries[cvs_path]
+ if id is None:
+ # This represents a leaf node.
+ return None
+ else:
+ try:
+ return self.repo._new_nodes[id]
+ except KeyError:
+ return _CurrentMirrorReadOnlySubdirectory(
+ self.repo, id, self.lod, cvs_path, self,
+ self.repo._node_db[id]
+ )
+
+ def __setitem__(self, cvs_path, node):
+ """Create or overwrite a subnode of this node.
+
+ CVS_PATH is the path of the subnode. NODE will be the new value
+ of the node; for CVSDirectories it should be a MirrorDirectory
+ instance; for CVSFiles it should be None."""
+
+ if isinstance(node, DeletedCurrentMirrorDirectory):
+ raise DeletedNodeReusedError(
+ '%r has already been deleted and should not be reused' % (node,)
+ )
+ elif isinstance(node, CurrentMirrorDirectory):
+ raise CopyFromCurrentNodeError(
+ '%r was created in the current node and cannot be copied' % (node,)
+ )
+ else:
+ self._set_entry(cvs_path, node)
+
+ def __delitem__(self, cvs_path):
+ """Remove the subnode of this node at CVS_PATH.
+
+ If the node does not exist, then raise a KeyError."""
+
+ node = self[cvs_path]
+ self._del_entry(cvs_path)
+ if isinstance(node, _WritableMirrorDirectoryMixin):
+ node._mark_deleted()
+
+ def mkdir(self, cvs_directory):
+ """Create an empty subdirectory of this node at CVS_PATH.
+
+ Return the CurrentDirectory that was created."""
+
+ assert isinstance(cvs_directory, CVSDirectory)
+ if cvs_directory in self:
+ raise PathExistsError(
+ 'Attempt to create directory \'%s\' in %s in repository mirror '
+ 'when it already exists.'
+ % (cvs_directory, self.lod,)
+ )
+
+ new_node = _CurrentMirrorWritableSubdirectory(
+ self.repo, self.repo._key_generator.gen_id(), self.lod, cvs_directory,
+ self, {}
+ )
+ self._set_entry(cvs_directory, new_node)
+ self.repo._new_nodes[new_node.id] = new_node
+ return new_node
+
+ def add_file(self, cvs_file):
+ """Create a file within this node at CVS_FILE."""
+
+ assert isinstance(cvs_file, CVSFile)
+ if cvs_file in self:
+ raise PathExistsError(
+ 'Attempt to create file \'%s\' in %s in repository mirror '
+ 'when it already exists.'
+ % (cvs_file, self.lod,)
+ )
+
+ self._set_entry(cvs_file, None)
+
+ def __repr__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return '%s(%r, %r, %s)' % (
+ self, self.lod, self.cvs_path, self._format_entries(),
+ )
+
+
+class DeletedCurrentMirrorDirectory(object):
+ """A MirrorDirectory that has been deleted.
+
+ A MirrorDirectory that used to be a _WritableMirrorDirectoryMixin
+ but then was deleted. Such instances are turned into this class so
+ that nobody can accidentally mutate them again."""
+
+ pass
+
+
+class _WritableMirrorDirectoryMixin:
+ """Mixin for MirrorDirectories that are already writable.
+
+ A MirrorDirectory is writable if it has already been recreated
+ during the current revision."""
+
+ def _set_entry(self, cvs_path, node):
+ """Create or overwrite a subnode of this node, with no checks."""
+
+ if node is None:
+ self._entries[cvs_path] = None
+ else:
+ self._entries[cvs_path] = node.id
+
+ def _del_entry(self, cvs_path):
+ """Remove the subnode of this node at CVS_PATH, with no checks."""
+
+ del self._entries[cvs_path]
+
+ def _mark_deleted(self):
+ """Mark this object and any writable descendants as being deleted."""
+
+ self.__class__ = DeletedCurrentMirrorDirectory
+
+ for (cvs_path, id) in self._entries.iteritems():
+ if id in self.repo._new_nodes:
+ node = self[cvs_path]
+ if isinstance(node, _WritableMirrorDirectoryMixin):
+ # Mark deleted and recurse:
+ node._mark_deleted()
+
+
+class _ReadOnlyMirrorDirectoryMixin:
+ """Mixin for a CurrentMirrorDirectory that hasn't yet been made writable."""
+
+ def _make_writable(self):
+ raise NotImplementedError()
+
+ def _set_entry(self, cvs_path, node):
+ """Create or overwrite a subnode of this node, with no checks."""
+
+ self._make_writable()
+ self._set_entry(cvs_path, node)
+
+ def _del_entry(self, cvs_path):
+ """Remove the subnode of this node at CVS_PATH, with no checks."""
+
+ self._make_writable()
+ self._del_entry(cvs_path)
+
+
+class CurrentMirrorLODDirectory(CurrentMirrorDirectory):
+ """Represent an LOD's main directory in the mirror's current version."""
+
+ def __init__(self, repo, id, lod, entries):
+ CurrentMirrorDirectory.__init__(
+ self, repo, id, lod, lod.project.get_root_cvs_directory(), entries
+ )
+
+ def delete(self):
+ """Remove the directory represented by this object."""
+
+ lod_history = self.repo._get_lod_history(self.lod)
+ assert lod_history.exists()
+ lod_history.update(self.repo._youngest, None)
+ self._mark_deleted()
+
+
+class _CurrentMirrorReadOnlyLODDirectory(
+ CurrentMirrorLODDirectory, _ReadOnlyMirrorDirectoryMixin
+ ):
+ """Represent an LOD's main directory in the mirror's current version."""
+
+ def _make_writable(self):
+ self.__class__ = _CurrentMirrorWritableLODDirectory
+ # Create a new ID:
+ self.id = self.repo._key_generator.gen_id()
+ self.repo._new_nodes[self.id] = self
+ self.repo._get_lod_history(self.lod).update(self.repo._youngest, self.id)
+ self._entries = self._entries.copy()
+
+
+class _CurrentMirrorWritableLODDirectory(
+ CurrentMirrorLODDirectory, _WritableMirrorDirectoryMixin
+ ):
+ pass
+
+
+class CurrentMirrorSubdirectory(CurrentMirrorDirectory):
+ """Represent a subdirectory in the mirror's current version."""
+
+ def __init__(self, repo, id, lod, cvs_path, parent_mirror_dir, entries):
+ CurrentMirrorDirectory.__init__(self, repo, id, lod, cvs_path, entries)
+ self.parent_mirror_dir = parent_mirror_dir
+
+ def delete(self):
+ """Remove the directory represented by this object."""
+
+ del self.parent_mirror_dir[self.cvs_path]
+
+
+class _CurrentMirrorReadOnlySubdirectory(
+ CurrentMirrorSubdirectory, _ReadOnlyMirrorDirectoryMixin
+ ):
+ """Represent a subdirectory in the mirror's current version."""
+
+ def _make_writable(self):
+ self.__class__ = _CurrentMirrorWritableSubdirectory
+ # Create a new ID:
+ self.id = self.repo._key_generator.gen_id()
+ self.repo._new_nodes[self.id] = self
+ self.parent_mirror_dir._set_entry(self.cvs_path, self)
+ self._entries = self._entries.copy()
+
+
+class _CurrentMirrorWritableSubdirectory(
+ CurrentMirrorSubdirectory, _WritableMirrorDirectoryMixin
+ ):
+ pass
+
+
+class LODHistory(object):
+ """The history of root nodes for a line of development.
+
+ Members:
+
+ _mirror -- (RepositoryMirror) the RepositoryMirror that manages
+ this LODHistory.
+
+ lod -- (LineOfDevelopment) the LOD described by this LODHistory.
+
+ revnums -- (list of int) the revision numbers in which the id
+ changed, in numerical order.
+
+ ids -- (list of (int or None)) the ID of the node describing the
+ root of this LOD starting at the corresponding revision
+ number, or None if the LOD did not exist in that revision.
+
+ To find the root id for a given revision number, a binary search is
+ done within REVNUMS to find the index of the most recent revision at
+ the time of REVNUM, then that index is used to read the id out of
+ IDS.
+
+ A sentry is written at the zeroth index of both arrays to describe
+ the initial situation, namely, that the LOD doesn't exist in
+ revision r0."""
+
+ __slots__ = ['_mirror', 'lod', 'revnums', 'ids']
+
+ def __init__(self, mirror, lod):
+ self._mirror = mirror
+ self.lod = lod
+ self.revnums = [0]
+ self.ids = [None]
+
+ def get_id(self, revnum):
+ """Get the ID of the root path for this LOD in REVNUM.
+
+ Raise KeyError if this LOD didn't exist in REVNUM."""
+
+ index = bisect.bisect_right(self.revnums, revnum) - 1
+ id = self.ids[index]
+
+ if id is None:
+ raise KeyError()
+
+ return id
+
+ def get_current_id(self):
+ """Get the ID of the root path for this LOD in the current revision.
+
+ Raise KeyError if this LOD doesn't currently exist."""
+
+ id = self.ids[-1]
+
+ if id is None:
+ raise KeyError()
+
+ return id
+
+ def exists(self):
+ """Return True iff LOD exists in the current revision."""
+
+ return self.ids[-1] is not None
+
+ def update(self, revnum, id):
+ """Indicate that the root node of this LOD changed to ID at REVNUM.
+
+ REVNUM is a revision number that must be the same as that of the
+ previous recorded change (in which case the previous change is
+ overwritten) or later (in which the new change is appended).
+
+ ID can be a node ID, or it can be None to indicate that this LOD
+ ceased to exist in REVNUM."""
+
+ if revnum < self.revnums[-1]:
+ raise KeyError()
+ elif revnum == self.revnums[-1]:
+ # This is an attempt to overwrite an entry that was already
+ # updated during this revision. Don't allow the replacement
+ # None -> None or allow one new id to be replaced with another:
+ old_id = self.ids[-1]
+ if old_id is None and id is None:
+ raise InternalError(
+ 'ID changed from None -> None for %s, r%d' % (self.lod, revnum,)
+ )
+ elif (old_id is not None and id is not None
+ and old_id in self._mirror._new_nodes):
+ raise InternalError(
+ 'ID changed from %x -> %x for %s, r%d'
+ % (old_id, id, self.lod, revnum,)
+ )
+ self.ids[-1] = id
+ else:
+ self.revnums.append(revnum)
+ self.ids.append(id)
+
+
+class _NodeDatabase(object):
+ """A database storing all of the directory nodes.
+
+ The nodes are written in groups every time write_new_nodes() is
+ called. To the database is written a dictionary {node_id :
+ [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
+ the new nodes. When a node is read, its whole group is read and
+ cached under the assumption that the other nodes in the group are
+ likely to be needed soon. The cache is retained across revisions
+ and cleared when _cache_max_size is exceeded.
+
+ The dictionaries for nodes that have been read from the database
+ during the current revision are cached by node_id in the _cache
+ member variable. The corresponding dictionaries are *not* copied
+ when read. To avoid cross-talk between distinct MirrorDirectory
+ instances that have the same node_id, users of these dictionaries
+ have to copy them before modification."""
+
+ # How many entries should be allowed in the cache for each
+ # CVSDirectory in the repository. (This number is very roughly the
+ # number of complete lines of development that can be stored in the
+ # cache at one time.)
+ CACHE_SIZE_MULTIPLIER = 5
+
+ # But the cache will never be limited to less than this number:
+ MIN_CACHE_LIMIT = 5000
+
+ def __init__(self):
+ self.cvs_file_db = Ctx()._cvs_file_db
+ self.db = IndexedDatabase(
+ artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
+ artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
+ DB_OPEN_NEW, serializer=MarshalSerializer(),
+ )
+
+ # A list of the maximum node_id stored by each call to
+ # write_new_nodes():
+ self._max_node_ids = [0]
+
+ # A map {node_id : {cvs_path : node_id}}:
+ self._cache = {}
+
+ # The number of directories in the repository:
+ num_dirs = len([
+ cvs_path
+ for cvs_path in self.cvs_file_db.itervalues()
+ if isinstance(cvs_path, CVSDirectory)
+ ])
+
+ self._cache_max_size = max(
+ int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
+ self.MIN_CACHE_LIMIT,
+ )
+
+ def _load(self, items):
+ retval = {}
+ for (id, value) in items:
+ retval[self.cvs_file_db.get_file(id)] = value
+ return retval
+
+ def _dump(self, node):
+ return [
+ (cvs_path.id, value)
+ for (cvs_path, value) in node.iteritems()
+ ]
+
+ def _determine_index(self, id):
+ """Return the index of the record holding the node with ID."""
+
+ return bisect.bisect_left(self._max_node_ids, id)
+
+ def __getitem__(self, id):
+ try:
+ items = self._cache[id]
+ except KeyError:
+ index = self._determine_index(id)
+ for (node_id, items) in self.db[index].items():
+ self._cache[node_id] = self._load(items)
+ items = self._cache[id]
+
+ return items
+
+ def write_new_nodes(self, nodes):
+ """Write NODES to the database.
+
+ NODES is an iterable of writable CurrentMirrorDirectory instances."""
+
+ if len(self._cache) > self._cache_max_size:
+ # The size of the cache has exceeded the threshold. Discard the
+ # old cache values (but still store the new nodes into the
+ # cache):
+ Log().debug('Clearing node cache')
+ self._cache.clear()
+
+ data = {}
+ max_node_id = 0
+ for node in nodes:
+ max_node_id = max(max_node_id, node.id)
+ data[node.id] = self._dump(node._entries)
+ self._cache[node.id] = node._entries
+
+ self.db[len(self._max_node_ids)] = data
+
+ if max_node_id == 0:
+ # Rewrite last value:
+ self._max_node_ids.append(self._max_node_ids[-1])
+ else:
+ self._max_node_ids.append(max_node_id)
+
+ def close(self):
+ self._cache.clear()
+ self.db.close()
+ self.db = None
+
+
+class RepositoryMirror:
+ """Mirror a repository and its history.
+
+ Mirror a repository as it is constructed, one revision at a time.
+ For each LineOfDevelopment we store a skeleton of the directory
+ structure within that LOD for each revnum in which it changed.
+
+ For each LOD that has been seen so far, an LODHistory instance is
+ stored in self._lod_histories. An LODHistory keeps track of each
+ revnum in which files were added to or deleted from that LOD, as
+ well as the node id of the root of the node tree describing the LOD
+ contents at that revision.
+
+ The LOD trees themselves are stored in the _node_db database, which
+ maps node ids to nodes. A node is a map from CVSPath to ids of the
+ corresponding subnodes. The _node_db is stored on disk and each
+ access is expensive.
+
+ The _node_db database only holds the nodes for old revisions. The
+ revision that is being constructed is kept in memory in the
+ _new_nodes map, which is cheap to access.
+
+ You must invoke start_commit() before each commit and end_commit()
+ afterwards."""
+
+ def register_artifacts(self, which_pass):
+ """Register the artifacts that will be needed for this object."""
+
+ artifact_manager.register_temp_file(
+ config.MIRROR_NODES_INDEX_TABLE, which_pass
+ )
+ artifact_manager.register_temp_file(
+ config.MIRROR_NODES_STORE, which_pass
+ )
+
+ def open(self):
+ """Set up the RepositoryMirror and prepare it for commits."""
+
+ self._key_generator = KeyGenerator()
+
+ # A map from LOD to LODHistory instance for all LODs that have
+ # been referenced so far:
+ self._lod_histories = {}
+
+ # This corresponds to the 'nodes' table in a Subversion fs. (We
+ # don't need a 'representations' or 'strings' table because we
+ # only track file existence, not file contents.)
+ self._node_db = _NodeDatabase()
+
+ # Start at revision 0 without a root node.
+ self._youngest = 0
+
+ def start_commit(self, revnum):
+ """Start a new commit."""
+
+ assert revnum > self._youngest
+ self._youngest = revnum
+
+ # A map {node_id : _WritableMirrorDirectoryMixin}.
+ self._new_nodes = {}
+
+ def end_commit(self):
+ """Called at the end of each commit.
+
+ This method copies the newly created nodes to the on-disk nodes
+ db."""
+
+ # Copy the new nodes to the _node_db
+ self._node_db.write_new_nodes([
+ node
+ for node in self._new_nodes.values()
+ if not isinstance(node, DeletedCurrentMirrorDirectory)
+ ])
+
+ del self._new_nodes
+
+ def _get_lod_history(self, lod):
+ """Return the LODHistory instance describing LOD.
+
+ Create a new (empty) LODHistory if it doesn't yet exist."""
+
+ try:
+ return self._lod_histories[lod]
+ except KeyError:
+ lod_history = LODHistory(self, lod)
+ self._lod_histories[lod] = lod_history
+ return lod_history
+
+ def get_old_lod_directory(self, lod, revnum):
+ """Return the directory for the root path of LOD at revision REVNUM.
+
+ Return an instance of MirrorDirectory if the path exists;
+ otherwise, raise KeyError."""
+
+ lod_history = self._get_lod_history(lod)
+ id = lod_history.get_id(revnum)
+ return OldMirrorDirectory(self, id, self._node_db[id])
+
+ def get_old_path(self, cvs_path, lod, revnum):
+ """Return the node for CVS_PATH from LOD at REVNUM.
+
+ If CVS_PATH is a CVSDirectory, then return an instance of
+ OldMirrorDirectory. If CVS_PATH is a CVSFile, return None.
+
+ If CVS_PATH does not exist in the specified LOD and REVNUM, raise
+ KeyError."""
+
+ node = self.get_old_lod_directory(lod, revnum)
+
+ for sub_path in cvs_path.get_ancestry()[1:]:
+ node = node[sub_path]
+
+ return node
+
+ def get_current_lod_directory(self, lod):
+ """Return the directory for the root path of LOD in the current revision.
+
+ Return an instance of CurrentMirrorDirectory. Raise KeyError if
+ the path doesn't already exist."""
+
+ lod_history = self._get_lod_history(lod)
+ id = lod_history.get_current_id()
+ try:
+ return self._new_nodes[id]
+ except KeyError:
+ return _CurrentMirrorReadOnlyLODDirectory(
+ self, id, lod, self._node_db[id]
+ )
+
+ def get_current_path(self, cvs_path, lod):
+ """Return the node for CVS_PATH from LOD in the current revision.
+
+ If CVS_PATH is a CVSDirectory, then return an instance of
+ CurrentMirrorDirectory. If CVS_PATH is a CVSFile, return None.
+
+ If CVS_PATH does not exist in the current revision of the
+ specified LOD, raise KeyError."""
+
+ node = self.get_current_lod_directory(lod)
+
+ for sub_path in cvs_path.get_ancestry()[1:]:
+ node = node[sub_path]
+
+ return node
+
+ def add_lod(self, lod):
+ """Create a new LOD in this repository.
+
+ Return the CurrentMirrorDirectory that was created. If the LOD
+ already exists, raise LODExistsError."""
+
+ lod_history = self._get_lod_history(lod)
+ if lod_history.exists():
+ raise LODExistsError(
+ 'Attempt to create %s in repository mirror when it already exists.'
+ % (lod,)
+ )
+ new_node = _CurrentMirrorWritableLODDirectory(
+ self, self._key_generator.gen_id(), lod, {}
+ )
+ lod_history.update(self._youngest, new_node.id)
+ self._new_nodes[new_node.id] = new_node
+ return new_node
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
+
+ In the youngest revision of the repository, the destination LOD
+ *must not* already exist.
+
+ Return the new node at DEST_LOD, as a CurrentMirrorDirectory."""
+
+ # Get the node of our src_path
+ src_node = self.get_old_lod_directory(src_lod, src_revnum)
+
+ dest_lod_history = self._get_lod_history(dest_lod)
+ if dest_lod_history.exists():
+ raise LODExistsError(
+ 'Attempt to copy to %s in repository mirror when it already exists.'
+ % (dest_lod,)
+ )
+
+ dest_lod_history.update(self._youngest, src_node.id)
+
+ # Return src_node, except packaged up as a CurrentMirrorDirectory:
+ return self.get_current_lod_directory(dest_lod)
+
+ def close(self):
+ """Free resources and close databases."""
+
+ self._lod_histories = None
+ self._node_db.close()
+ self._node_db = None
+
+
diff --git a/cvs2svn_lib/revision_manager.py b/cvs2svn_lib/revision_manager.py
new file mode 100644
index 0000000..8af7c74
--- /dev/null
+++ b/cvs2svn_lib/revision_manager.py
@@ -0,0 +1,189 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module describes the interface to the CVS repository."""
+
+
+class RevisionRecorder:
+ """An object that can record text and deltas from CVS files."""
+
+ def __init__(self):
+ """Initialize the RevisionRecorder.
+
+ Please note that a RevisionRecorder is instantiated in every
+ program run, even if the data-collection pass will not be
+ executed. (This is to allow it to register the artifacts that it
+ produces.) Therefore, the __init__() method should not do much,
+ and more substantial preparation for use (like actually creating
+ the artifacts) should be done in start()."""
+
+ pass
+
+ def register_artifacts(self, which_pass):
+ """Register artifacts that will be needed during data recording.
+
+ WHICH_PASS is the pass that will call our callbacks, so it should
+ be used to do the registering (e.g., call
+ WHICH_PASS.register_temp_file() and/or
+ WHICH_PASS.register_temp_file_needed())."""
+
+ pass
+
+ def start(self):
+ """Data will soon start being collected.
+
+ Any non-idempotent initialization should be done here."""
+
+ pass
+
+ def start_file(self, cvs_file_items):
+ """Prepare to receive data for the file with the specified CVS_FILE_ITEMS.
+
+ CVS_FILE_ITEMS is an instance of CVSFileItems describing the file
+ dependency topology right after the file tree was parsed out of
+ the RCS file. (I.e., it reflects the original CVS dependency
+ structure.) Please note that the CVSFileItems instance will be
+ changed later."""
+
+ pass
+
+ def record_text(self, cvs_rev, log, text):
+ """Record information about a revision and optionally return a token.
+
+ CVS_REV is a CVSRevision instance describing a revision that has
+ log message LOG and text TEXT (as retrieved from the RCS file).
+ (TEXT is full text for the HEAD revision, and deltas for other
+ revisions.)"""
+
+ raise NotImplementedError()
+
+ def finish_file(self, cvs_file_items):
+ """The current file is finished; finish and clean up.
+
+ CVS_FILE_ITEMS is a CVSFileItems instance describing the file's
+ items at the end of processing of the RCS file in CollectRevsPass.
+ It may be modified relative to the CVS_FILE_ITEMS instance passed
+ to the corresponding start_file() call (revisions might be
+ deleted, topology changed, etc)."""
+
+ pass
+
+ def finish(self):
+ """All recording is done; clean up."""
+
+ pass
+
+
+class NullRevisionRecorder(RevisionRecorder):
+ """A do-nothing variety of RevisionRecorder."""
+
+ def record_text(self, cvs_rev, log, text):
+ return None
+
+
+class RevisionExcluder:
+ """An interface for informing a RevisionReader about excluded revisions.
+
+ Currently, revisions can be excluded via the --exclude option and
+ various fixups for CVS peculiarities. This interface can be used to
+ inform the associated RevisionReader about CVSItems that are being
+ excluded. (The recorder might use that information to free some
+ temporary data or adjust its expectations about which revisions will
+ later be read.)"""
+
+ def __init__(self):
+ """Initialize the RevisionExcluder.
+
+ Please note that a RevisionExcluder is instantiated in every
+ program run, even if the branch-exclusion pass will not be
+ executed. (This is to allow its register_artifacts() method to be
+ called.) Therefore, the __init__() method should not do much, and
+ more substantial preparation for use (like actually creating the
+ artifacts) should be done in start()."""
+
+ pass
+
+ def register_artifacts(self, which_pass):
+ """Register artifacts that will be needed during branch exclusion.
+
+ WHICH_PASS is the pass that will call our callbacks, so it should
+ be used to do the registering (e.g., call
+ WHICH_PASS.register_temp_file() and/or
+ WHICH_PASS.register_temp_file_needed())."""
+
+ pass
+
+ def start(self):
+ """Prepare to handle branch exclusions."""
+
+ pass
+
+ def process_file(self, cvs_file_items):
+ """Called for files whose trees were modified in FilterSymbolsPass.
+
+ This callback is called once for each CVSFile whose topology was
+ modified in FilterSymbolsPass."""
+
+ raise NotImplementedError()
+
+ def finish(self):
+ """Called after all branch exclusions for all files are done."""
+
+ pass
+
+
+class NullRevisionExcluder(RevisionExcluder):
+ """A do-nothing variety of RevisionExcluder."""
+
+ def process_file(self, cvs_file_items):
+ pass
+
+
+class RevisionReader(object):
+ """An object that can read the contents of CVSRevisions."""
+
+ def register_artifacts(self, which_pass):
+ """Register artifacts that will be needed during branch exclusion.
+
+ WHICH_PASS is the pass that will call our callbacks, so it should
+ be used to do the registering (e.g., call
+ WHICH_PASS.register_temp_file() and/or
+ WHICH_PASS.register_temp_file_needed())."""
+
+ pass
+
+ def start(self):
+ """Prepare for calls to get_content_stream."""
+
+ pass
+
+ def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+ """Return a file-like object from which the contents of CVS_REV
+ can be read.
+
+ CVS_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION is
+ True, then suppress the substitution of RCS/CVS keywords in the
+ output."""
+
+ raise NotImplementedError
+
+ def finish(self):
+ """Inform the reader that all calls to get_content_stream are done.
+ Start may be called again at a later point."""
+
+ pass
+
+
diff --git a/cvs2svn_lib/run_options.py b/cvs2svn_lib/run_options.py
new file mode 100644
index 0000000..27d2ea6
--- /dev/null
+++ b/cvs2svn_lib/run_options.py
@@ -0,0 +1,1035 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to set common cvs2xxx run options."""
+
+import sys
+import re
+import optparse
+from optparse import OptionGroup
+import time
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib import config
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import CVSTextDecoder
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.man_writer import ManOption
+from cvs2svn_lib.pass_manager import InvalidPassError
+from cvs2svn_lib.symbol_strategy import AllBranchRule
+from cvs2svn_lib.symbol_strategy import AllTagRule
+from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule
+from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule
+from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule
+from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule
+from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule
+from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule
+from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform
+from cvs2svn_lib.symbol_transform import RegexpSymbolTransform
+from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform
+from cvs2svn_lib.property_setters import AutoPropsPropertySetter
+from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter
+from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter
+from cvs2svn_lib.property_setters import CVSRevisionNumberSetter
+from cvs2svn_lib.property_setters import DefaultEOLStyleSetter
+from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter
+from cvs2svn_lib.property_setters import ExecutablePropertySetter
+from cvs2svn_lib.property_setters import KeywordsPropertySetter
+from cvs2svn_lib.property_setters import MimeMapper
+from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter
+
+
+usage = """\
+Usage: %prog --options OPTIONFILE
+ %prog [OPTION...] OUTPUT-OPTION CVS-REPOS-PATH"""
+
+description="""\
+Convert a CVS repository into a Subversion repository, including history.
+"""
+
+authors = u"""\
+Main authors are:
+.br
+C. Michael Pilato <cmpilato@collab.net>
+.br
+Greg Stein <gstein@lyra.org>
+.br
+Branko \u010cibej <brane@xbc.nu>
+.br
+Blair Zajac <blair@orcaware.com>
+.br
+Max Bowsher <maxb@ukf.net>
+.br
+Brian Fitzpatrick <fitz@red-bean.com>
+.br
+Tobias Ringstr\u00f6m <tobias@ringstrom.mine.nu>
+.br
+Karl Fogel <kfogel@collab.net>
+.br
+Erik H\u00fclsmann <e.huelsmann@gmx.net>
+.br
+David Summers <david@summersoft.fay.ar.us>
+.br
+Michael Haggerty <mhagger@alum.mit.edu>
+.PP
+Manpage was written for the Debian GNU/Linux system by
+Laszlo 'GCS' Boszormenyi <gcs@lsc.hu> (but may be used by others).
+"""
+
+
+class IncompatibleOption(ManOption):
+ """A ManOption that is incompatible with the --options option.
+
+ Record that the option was used so that error checking can later be
+ done."""
+
+ def __init__(self, *args, **kw):
+ ManOption.__init__(self, *args, **kw)
+
+ def take_action(self, action, dest, opt, value, values, parser):
+ oio = parser.values.options_incompatible_options
+ if opt not in oio:
+ oio.append(opt)
+ return ManOption.take_action(
+ self, action, dest, opt, value, values, parser
+ )
+
+
+class ContextOption(ManOption):
+ """A ManOption that stores its value to Ctx."""
+
+ def __init__(self, *args, **kw):
+ if kw.get('action') not in self.STORE_ACTIONS:
+ raise ValueError('Invalid action: %s' % (kw['action'],))
+
+ self.__compatible_with_option = kw.pop('compatible_with_option', False)
+ self.__action = kw.pop('action')
+ try:
+ self.__dest = kw.pop('dest')
+ except KeyError:
+ opt = args[0]
+ if not opt.startswith('--'):
+ raise ValueError
+ self.__dest = opt[2:].replace('-', '_')
+ if 'const' in kw:
+ self.__const = kw.pop('const')
+
+ kw['action'] = 'callback'
+ kw['callback'] = self.__callback
+
+ ManOption.__init__(self, *args, **kw)
+
+ def __callback(self, option, opt_str, value, parser):
+ if not self.__compatible_with_option:
+ oio = parser.values.options_incompatible_options
+ if opt_str not in oio:
+ oio.append(opt_str)
+
+ action = self.__action
+ dest = self.__dest
+
+ if action == "store":
+ setattr(Ctx(), dest, value)
+ elif action == "store_const":
+ setattr(Ctx(), dest, self.__const)
+ elif action == "store_true":
+ setattr(Ctx(), dest, True)
+ elif action == "store_false":
+ setattr(Ctx(), dest, False)
+ elif action == "append":
+ getattr(Ctx(), dest).append(value)
+ elif action == "count":
+ setattr(Ctx(), dest, getattr(Ctx(), dest, 0) + 1)
+ else:
+ raise RuntimeError("unknown action %r" % self.__action)
+
+ return 1
+
+
+class IncompatibleOptionsException(FatalError):
+ pass
+
+
+# Options that are not allowed to be used with --trunk-only:
+SYMBOL_OPTIONS = [
+ '--symbol-transform',
+ '--symbol-hints',
+ '--force-branch',
+ '--force-tag',
+ '--exclude',
+ '--keep-trivial-imports',
+ '--symbol-default',
+ '--no-cross-branch-commits',
+ ]
+
+class SymbolOptionsWithTrunkOnlyException(IncompatibleOptionsException):
+ def __init__(self):
+ IncompatibleOptionsException.__init__(
+ self,
+ 'The following symbol-related options cannot be used together\n'
+ 'with --trunk-only:\n'
+ ' %s'
+ % ('\n '.join(SYMBOL_OPTIONS),)
+ )
+
+
+def not_both(opt1val, opt1name, opt2val, opt2name):
+ """Raise an exception if both opt1val and opt2val are set."""
+ if opt1val and opt2val:
+ raise IncompatibleOptionsException(
+ "cannot pass both '%s' and '%s'." % (opt1name, opt2name,)
+ )
+
+
+class RunOptions(object):
+ """A place to store meta-options that are used to start the conversion."""
+
+ def __init__(self, progname, cmd_args, pass_manager):
+ """Process the command-line options, storing run options to SELF.
+
+ PROGNAME is the name of the program, used in the usage string.
+ CMD_ARGS is the list of command-line arguments passed to the
+ program. PASS_MANAGER is an instance of PassManager, needed to
+ help process the -p and --help-passes options."""
+
+ self.progname = progname
+ self.cmd_args = cmd_args
+ self.pass_manager = pass_manager
+ self.start_pass = 1
+ self.end_pass = self.pass_manager.num_passes
+ self.profiling = False
+
+ self.projects = []
+
+ # A list of one list of SymbolStrategyRules for each project:
+ self.project_symbol_strategy_rules = []
+
+ parser = self.parser = optparse.OptionParser(
+ usage=usage,
+ description=self.get_description(),
+ add_help_option=False,
+ )
+ # A place to record any options used that are incompatible with
+ # --options:
+ parser.set_default('options_incompatible_options', [])
+
+ # Populate the options parser with the options, one group at a
+ # time:
+ parser.add_option_group(self._get_options_file_options_group())
+ parser.add_option_group(self._get_output_options_group())
+ parser.add_option_group(self._get_conversion_options_group())
+ parser.add_option_group(self._get_symbol_handling_options_group())
+ parser.add_option_group(self._get_subversion_properties_options_group())
+ parser.add_option_group(self._get_extraction_options_group())
+ parser.add_option_group(self._get_environment_options_group())
+ parser.add_option_group(self._get_partial_conversion_options_group())
+ parser.add_option_group(self._get_information_options_group())
+
+ (self.options, self.args) = parser.parse_args(args=self.cmd_args)
+
+ # Now the log level has been set; log the time when the run started:
+ Log().verbose(
+ time.strftime(
+ 'Conversion start time: %Y-%m-%d %I:%M:%S %Z',
+ time.localtime(Log().start_time)
+ )
+ )
+
+ if self.options.options_file_found:
+ # Check that no options that are incompatible with --options
+ # were used:
+ self.verify_option_compatibility()
+ else:
+ # --options was not specified. So do the main initialization
+ # based on other command-line options:
+ self.process_options()
+
+ # Check for problems with the options:
+ self.check_options()
+
+ def get_description(self):
+ return description
+
+ def _get_options_file_options_group(self):
+ group = OptionGroup(
+ self.parser, 'Configuration via options file'
+ )
+ self.parser.set_default('options_file_found', False)
+ group.add_option(ManOption(
+ '--options', type='string',
+ action='callback', callback=self.callback_options,
+ help=(
+ 'read the conversion options from PATH. This '
+ 'method allows more flexibility than using '
+ 'command-line options. See documentation for info'
+ ),
+ man_help=(
+ 'Read the conversion options from \\fIpath\\fR instead of from '
+ 'the command line. This option allows far more conversion '
+ 'flexibility than can be achieved using the command-line alone. '
+ 'See the documentation for more information. Only the following '
+ 'command-line options are allowed in combination with '
+ '\\fB--options\\fR: \\fB-h\\fR/\\fB--help\\fR, '
+ '\\fB--help-passes\\fR, \\fB--version\\fR, '
+ '\\fB-v\\fR/\\fB--verbose\\fR, \\fB-q\\fR/\\fB--quiet\\fR, '
+ '\\fB-p\\fR/\\fB--pass\\fR/\\fB--passes\\fR, \\fB--dry-run\\fR, '
+ '\\fB--profile\\fR, \\fB--sort\\fR, \\fB--trunk-only\\fR, '
+ '\\fB--encoding\\fR, and \\fB--fallback-encoding\\fR. '
+ 'Options are processed in the order specified on the command '
+ 'line.'
+ ),
+ metavar='PATH',
+ ))
+ return group
+
+ def _get_output_options_group(self):
+ group = OptionGroup(self.parser, 'Output options')
+ return group
+
+ def _get_conversion_options_group(self):
+ group = OptionGroup(self.parser, 'Conversion options')
+ group.add_option(ContextOption(
+ '--trunk-only',
+ action='store_true',
+ compatible_with_option=True,
+ help='convert only trunk commits, not tags nor branches',
+ man_help=(
+ 'Convert only trunk commits, not tags nor branches.'
+ ),
+ ))
+ group.add_option(ManOption(
+ '--encoding', type='string',
+ action='callback', callback=self.callback_encoding,
+ help=(
+ 'encoding for paths and log messages in CVS repos. '
+ 'If option is specified multiple times, encoders '
+ 'are tried in order until one succeeds. See '
+ 'http://docs.python.org/lib/standard-encodings.html '
+ 'for a list of standard Python encodings.'
+ ),
+ man_help=(
+ 'Use \\fIencoding\\fR as the encoding for filenames, log '
+ 'messages, and author names in the CVS repos. This option '
+ 'may be specified multiple times, in which case the encodings '
+ 'are tried in order until one succeeds. Default: ascii. See '
+ 'http://docs.python.org/lib/standard-encodings.html for a list '
+ 'of other standard encodings.'
+ ),
+ metavar='ENC',
+ ))
+ group.add_option(ManOption(
+ '--fallback-encoding', type='string',
+ action='callback', callback=self.callback_fallback_encoding,
+ help='If all --encodings fail, use lossy encoding with ENC',
+ man_help=(
+ 'If none of the encodings specified with \\fB--encoding\\fR '
+ 'succeed in decoding an author name or log message, then fall '
+ 'back to using \\fIencoding\\fR in lossy \'replace\' mode. '
+ 'Use of this option may cause information to be lost, but at '
+ 'least it allows the conversion to run to completion. This '
+ 'option only affects the encoding of log messages and author '
+ 'names; there is no fallback encoding for filenames. (By '
+ 'using an \\fB--options\\fR file, it is possible to specify '
+ 'a fallback encoding for filenames.) Default: disabled.'
+ ),
+ metavar='ENC',
+ ))
+ group.add_option(ContextOption(
+ '--retain-conflicting-attic-files',
+ action='store_true',
+ help=(
+ 'if a file appears both in and out of '
+ 'the CVS Attic, then leave the attic version in a '
+ 'SVN directory called "Attic"'
+ ),
+ man_help=(
+ 'If a file appears both inside an outside of the CVS attic, '
+ 'retain the attic version in an SVN subdirectory called '
+ '\'Attic\'. (Normally this situation is treated as a fatal '
+ 'error.)'
+ ),
+ ))
+
+ return group
+
+ def _get_symbol_handling_options_group(self):
+ group = OptionGroup(self.parser, 'Symbol handling')
+ self.parser.set_default('symbol_transforms', [])
+ group.add_option(IncompatibleOption(
+ '--symbol-transform', type='string',
+ action='callback', callback=self.callback_symbol_transform,
+ help=(
+ 'transform symbol names from P to S, where P and S '
+ 'use Python regexp and reference syntax '
+ 'respectively. P must match the whole symbol name'
+ ),
+ man_help=(
+ 'Transform RCS/CVS symbol names before entering them into '
+ 'Subversion. \\fIpattern\\fR is a Python regexp pattern that '
+ 'is matches against the entire symbol name; \\fIreplacement\\fR '
+ 'is a replacement using Python\'s regexp reference syntax. '
+ 'You may specify any number of these options; they will be '
+ 'applied in the order given on the command line.'
+ ),
+ metavar='P:S',
+ ))
+ self.parser.set_default('symbol_strategy_rules', [])
+ group.add_option(IncompatibleOption(
+ '--symbol-hints', type='string',
+ action='callback', callback=self.callback_symbol_hints,
+ help='read symbol conversion hints from PATH',
+ man_help=(
+ 'Read symbol conversion hints from \\fIpath\\fR. The format of '
+ '\\fIpath\\fR is the same as the format output by '
+ '\\fB--write-symbol-info\\fR, namely a text file with four '
+ 'whitespace-separated columns: \\fIproject-id\\fR, '
+ '\\fIsymbol\\fR, \\fIconversion\\fR, and '
+ '\\fIparent-lod-name\\fR. \\fIproject-id\\fR is the numerical '
+ 'ID of the project to which the symbol belongs, counting from '
+ '0. \\fIproject-id\\fR can be set to \'.\' if '
+ 'project-specificity is not needed. \\fIsymbol-name\\fR is the '
+ 'name of the symbol being specified. \\fIconversion\\fR '
+ 'specifies how the symbol should be converted, and can be one '
+ 'of the values \'branch\', \'tag\', or \'exclude\'. If '
+ '\\fIconversion\\fR is \'.\', then this rule does not affect '
+ 'how the symbol is converted. \\fIparent-lod-name\\fR is the '
+ 'name of the symbol from which this symbol should sprout, or '
+ '\'.trunk.\' if the symbol should sprout from trunk. If '
+ '\\fIparent-lod-name\\fR is omitted or \'.\', then this rule '
+ 'does not affect the preferred parent of this symbol. The file '
+ 'may contain blank lines or comment lines (lines whose first '
+ 'non-whitespace character is \'#\').'
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('symbol_default', 'heuristic')
+ group.add_option(IncompatibleOption(
+ '--symbol-default', type='choice',
+ choices=['heuristic', 'strict', 'branch', 'tag'],
+ action='store',
+ help=(
+ 'specify how ambiguous symbols are converted. '
+ 'OPT is "heuristic" (default), "strict", "branch", '
+ 'or "tag"'
+ ),
+ man_help=(
+ 'Specify how to convert ambiguous symbols (those that appear in '
+ 'the CVS archive as both branches and tags). \\fIopt\\fR must '
+ 'be \'heuristic\' (decide how to treat each ambiguous symbol '
+ 'based on whether it was used more often as a branch/tag in '
+ 'CVS), \'strict\' (no default; every ambiguous symbol has to be '
+ 'resolved manually using \\fB--force-branch\\fR, '
+ '\\fB--force-tag\\fR, or \\fB--exclude\\fR), \'branch\' (treat '
+ 'every ambiguous symbol as a branch), or \'tag\' (treat every '
+ 'ambiguous symbol as a tag). The default is \'heuristic\'.'
+ ),
+ metavar='OPT',
+ ))
+ group.add_option(IncompatibleOption(
+ '--force-branch', type='string',
+ action='callback', callback=self.callback_force_branch,
+ help='force symbols matching REGEXP to be branches',
+ man_help=(
+ 'Force symbols whose names match \\fIregexp\\fR to be branches. '
+ '\\fIregexp\\fR must match the whole symbol name.'
+ ),
+ metavar='REGEXP',
+ ))
+ group.add_option(IncompatibleOption(
+ '--force-tag', type='string',
+ action='callback', callback=self.callback_force_tag,
+ help='force symbols matching REGEXP to be tags',
+ man_help=(
+ 'Force symbols whose names match \\fIregexp\\fR to be tags. '
+ '\\fIregexp\\fR must match the whole symbol name.'
+ ),
+ metavar='REGEXP',
+ ))
+ group.add_option(IncompatibleOption(
+ '--exclude', type='string',
+ action='callback', callback=self.callback_exclude,
+ help='exclude branches and tags matching REGEXP',
+ man_help=(
+ 'Exclude branches and tags whose names match \\fIregexp\\fR '
+ 'from the conversion. \\fIregexp\\fR must match the whole '
+ 'symbol name.'
+ ),
+ metavar='REGEXP',
+ ))
+ self.parser.set_default('keep_trivial_imports', False)
+ group.add_option(IncompatibleOption(
+ '--keep-trivial-imports',
+ action='store_true',
+ help=(
+ 'do not exclude branches that were only used for '
+ 'a single import (usually these are unneeded)'
+ ),
+ man_help=(
+ 'Do not exclude branches that were only used for a single '
+ 'import. (By default such branches are excluded because they '
+ 'are usually created by the inappropriate use of \\fBcvs '
+ 'import\\fR.)'
+ ),
+ ))
+
+ return group
+
+ def _get_subversion_properties_options_group(self):
+ group = OptionGroup(self.parser, 'Subversion properties')
+ group.add_option(ContextOption(
+ '--username', type='string',
+ action='store',
+ help='username for cvs2svn-synthesized commits',
+ man_help=(
+ 'Set the default username to \\fIname\\fR when cvs2svn needs '
+ 'to generate a commit for which CVS does not record the '
+ 'original username. This happens when a branch or tag is '
+ 'created. The default is to use no author at all for such '
+ 'commits.'
+ ),
+ metavar='NAME',
+ ))
+ self.parser.set_default('auto_props_files', [])
+ group.add_option(IncompatibleOption(
+ '--auto-props', type='string',
+ action='append', dest='auto_props_files',
+ help=(
+ 'set file properties from the auto-props section '
+ 'of a file in svn config format'
+ ),
+ man_help=(
+ 'Specify a file in the format of Subversion\'s config file, '
+ 'whose [auto-props] section can be used to set arbitrary '
+ 'properties on files in the Subversion repository based on '
+ 'their filenames. (The [auto-props] section header must be '
+ 'present; other sections of the config file, including the '
+ 'enable-auto-props setting, are ignored.) Filenames are matched '
+ 'to the filename patterns case-insensitively.'
+
+ ),
+ metavar='FILE',
+ ))
+ self.parser.set_default('mime_types_files', [])
+ group.add_option(IncompatibleOption(
+ '--mime-types', type='string',
+ action='append', dest='mime_types_files',
+ help=(
+ 'specify an apache-style mime.types file for setting '
+ 'svn:mime-type'
+ ),
+ man_help=(
+ 'Specify an apache-style mime.types \\fIfile\\fR for setting '
+ 'svn:mime-type.'
+ ),
+ metavar='FILE',
+ ))
+ self.parser.set_default('eol_from_mime_type', False)
+ group.add_option(IncompatibleOption(
+ '--eol-from-mime-type',
+ action='store_true',
+ help='set svn:eol-style from mime type if known',
+ man_help=(
+ 'For files that don\'t have the kb expansion mode but have a '
+ 'known mime type, set the eol-style based on the mime type. '
+ 'For such files, set svn:eol-style to "native" if the mime type '
+ 'begins with "text/", and leave it unset (i.e., no EOL '
+ 'translation) otherwise. Files with unknown mime types are '
+ 'not affected by this option. This option has no effect '
+ 'unless the \\fB--mime-types\\fR option is also specified.'
+ ),
+ ))
+ group.add_option(IncompatibleOption(
+ '--default-eol', type='choice',
+ choices=['binary', 'native', 'CRLF', 'LF', 'CR'],
+ action='store',
+ help=(
+ 'default svn:eol-style for non-binary files with '
+ 'undetermined mime types. STYLE is "binary" '
+ '(default), "native", "CRLF", "LF", or "CR"'
+ ),
+ man_help=(
+ 'Set svn:eol-style to \\fIstyle\\fR for files that don\'t have '
+ 'the CVS \'kb\' expansion mode and whose end-of-line '
+ 'translation mode hasn\'t been determined by one of the other '
+ 'options. \\fIstyle\\fR must be \'binary\' (default), '
+ '\'native\', \'CRLF\', \'LF\', or \'CR\'.'
+ ),
+ metavar='STYLE',
+ ))
+ self.parser.set_default('keywords_off', False)
+ group.add_option(IncompatibleOption(
+ '--keywords-off',
+ action='store_true',
+ help=(
+ 'don\'t set svn:keywords on any files (by default, '
+ 'cvs2svn sets svn:keywords on non-binary files to "%s")'
+ % (config.SVN_KEYWORDS_VALUE,)
+ ),
+ man_help=(
+ 'By default, cvs2svn sets svn:keywords on CVS files to "author '
+ 'id date" if the mode of the RCS file in question is either kv, '
+ 'kvl or unset. If you use the --keywords-off switch, cvs2svn '
+ 'will not set svn:keywords for any file. While this will not '
+ 'touch the keywords in the contents of your files, Subversion '
+ 'will not expand them.'
+ ),
+ ))
+ group.add_option(ContextOption(
+ '--keep-cvsignore',
+ action='store_true',
+ help=(
+ 'keep .cvsignore files (in addition to creating '
+ 'the analogous svn:ignore properties)'
+ ),
+ man_help=(
+ 'Include \\fI.cvsignore\\fR files in the output. (Normally '
+ 'they are unneeded because cvs2svn sets the corresponding '
+ '\\fIsvn:ignore\\fR properties.)'
+ ),
+ ))
+ group.add_option(IncompatibleOption(
+ '--cvs-revnums',
+ action='callback', callback=self.callback_cvs_revnums,
+ help='record CVS revision numbers as file properties',
+ man_help=(
+ 'Record CVS revision numbers as file properties in the '
+ 'Subversion repository. (Note that unless it is removed '
+ 'explicitly, the last CVS revision number will remain '
+ 'associated with the file even after the file is changed '
+ 'within Subversion.)'
+ ),
+ ))
+
+ # Deprecated options:
+ group.add_option(IncompatibleOption(
+ '--no-default-eol',
+ action='store_const', dest='default_eol', const=None,
+ help=optparse.SUPPRESS_HELP,
+ man_help=optparse.SUPPRESS_HELP,
+ ))
+ self.parser.set_default('auto_props_ignore_case', True)
+ # True is the default now, so this option has no effect:
+ group.add_option(IncompatibleOption(
+ '--auto-props-ignore-case',
+ action='store_true',
+ help=optparse.SUPPRESS_HELP,
+ man_help=optparse.SUPPRESS_HELP,
+ ))
+
+ return group
+
+ def _get_extraction_options_group(self):
+ group = OptionGroup(self.parser, 'Extraction options')
+
+ return group
+
+ def _get_environment_options_group(self):
+ group = OptionGroup(self.parser, 'Environment options')
+ group.add_option(ContextOption(
+ '--tmpdir', type='string',
+ action='store',
+ help=(
+ 'directory to use for temporary data files '
+ '(default "cvs2svn-tmp")'
+ ),
+ man_help=(
+ 'Set the \\fIpath\\fR to use for temporary data. Default '
+ 'is a directory called \\fIcvs2svn-tmp\\fR under the current '
+ 'directory.'
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('co_executable', config.CO_EXECUTABLE)
+ group.add_option(IncompatibleOption(
+ '--co', type='string',
+ action='store', dest='co_executable',
+ help='path to the "co" program (required if --use-rcs)',
+ man_help=(
+ 'Path to the \\fIco\\fR program. (\\fIco\\fR is needed if the '
+ '\\fB--use-rcs\\fR option is used.)'
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('cvs_executable', config.CVS_EXECUTABLE)
+ group.add_option(IncompatibleOption(
+ '--cvs', type='string',
+ action='store', dest='cvs_executable',
+ help='path to the "cvs" program (required if --use-cvs)',
+ man_help=(
+ 'Path to the \\fIcvs\\fR program. (\\fIcvs\\fR is needed if the '
+ '\\fB--use-cvs\\fR option is used.)'
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(ContextOption(
+ '--sort', type='string',
+ action='store', dest='sort_executable',
+ compatible_with_option=True,
+ help='path to the GNU "sort" program',
+ man_help=(
+ 'Path to the GNU \\fIsort\\fR program. (cvs2svn requires GNU '
+ 'sort.)'
+ ),
+ metavar='PATH',
+ ))
+
+ return group
+
+ def _get_partial_conversion_options_group(self):
+ group = OptionGroup(self.parser, 'Partial conversions')
+ group.add_option(ManOption(
+ '--pass', type='string',
+ action='callback', callback=self.callback_passes,
+ help='execute only specified PASS of conversion',
+ man_help=(
+ 'Execute only pass \\fIpass\\fR of the conversion. '
+ '\\fIpass\\fR can be specified by name or by number (see '
+ '\\fB--help-passes\\fR).'
+ ),
+ metavar='PASS',
+ ))
+ group.add_option(ManOption(
+ '--passes', '-p', type='string',
+ action='callback', callback=self.callback_passes,
+ help=(
+ 'execute passes START through END, inclusive (PASS, '
+ 'START, and END can be pass names or numbers)'
+ ),
+ man_help=(
+ 'Execute passes \\fIstart\\fR through \\fIend\\fR of the '
+ 'conversion (inclusive). \\fIstart\\fR and \\fIend\\fR can be '
+ 'specified by name or by number (see \\fB--help-passes\\fR). '
+ 'If \\fIstart\\fR or \\fIend\\fR is missing, it defaults to '
+ 'the first or last pass, respectively. For this to work the '
+ 'earlier passes must have been completed before on the '
+ 'same CVS repository, and the generated data files must be '
+ 'in the temporary directory (see \\fB--tmpdir\\fR).'
+ ),
+ metavar='[START]:[END]',
+ ))
+
+ return group
+
+ def _get_information_options_group(self):
+ group = OptionGroup(self.parser, 'Information options')
+ group.add_option(ManOption(
+ '--version',
+ action='callback', callback=self.callback_version,
+ help='print the version number',
+ man_help='Print the version number.',
+ ))
+ group.add_option(ManOption(
+ '--help', '-h',
+ action="help",
+ help='print this usage message and exit with success',
+ man_help='Print the usage message and exit with success.',
+ ))
+ group.add_option(ManOption(
+ '--help-passes',
+ action='callback', callback=self.callback_help_passes,
+ help='list the available passes and their numbers',
+ man_help=(
+ 'Print the numbers and names of the conversion passes and '
+ 'exit with success.'
+ ),
+ ))
+ group.add_option(ManOption(
+ '--man',
+ action='callback', callback=self.callback_manpage,
+ help='write the manpage for this program to standard output',
+ man_help=(
+ 'Output the unix-style manpage for this program to standard '
+ 'output.'
+ ),
+ ))
+ group.add_option(ManOption(
+ '--verbose', '-v',
+ action='callback', callback=self.callback_verbose,
+ help='verbose (may be specified twice for debug output)',
+ man_help=(
+ 'Print more information while running. This option may be '
+ 'specified twice to output voluminous debugging information.'
+ ),
+ ))
+ group.add_option(ManOption(
+ '--quiet', '-q',
+ action='callback', callback=self.callback_quiet,
+ help='quiet (may be specified twice for very quiet)',
+ man_help=(
+ 'Print less information while running. This option may be '
+ 'specified twice to suppress all non-error output.'
+ ),
+ ))
+ group.add_option(ContextOption(
+ '--write-symbol-info', type='string',
+ action='store', dest='symbol_info_filename',
+ help='write information and statistics about CVS symbols to PATH.',
+ man_help=(
+ 'Write to \\fIpath\\fR symbol statistics and information about '
+ 'how symbols were converted during CollateSymbolsPass.'
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(ContextOption(
+ '--skip-cleanup',
+ action='store_true',
+ help='prevent the deletion of intermediate files',
+ man_help='Prevent the deletion of temporary files.',
+ ))
+ group.add_option(ManOption(
+ '--profile',
+ action='callback', callback=self.callback_profile,
+ help='profile with \'hotshot\' (into file cvs2svn.hotshot)',
+ man_help=(
+ 'Profile with \'hotshot\' (into file \\fIcvs2svn.hotshot\\fR).'
+ ),
+ ))
+
+ return group
+
+ def callback_options(self, option, opt_str, value, parser):
+ parser.values.options_file_found = True
+ self.process_options_file(value)
+
+ def callback_encoding(self, option, opt_str, value, parser):
+ ctx = Ctx()
+
+ try:
+ ctx.cvs_author_decoder.add_encoding(value)
+ ctx.cvs_log_decoder.add_encoding(value)
+ ctx.cvs_filename_decoder.add_encoding(value)
+ except LookupError, e:
+ raise FatalError(str(e))
+
+ def callback_fallback_encoding(self, option, opt_str, value, parser):
+ ctx = Ctx()
+
+ try:
+ ctx.cvs_author_decoder.set_fallback_encoding(value)
+ ctx.cvs_log_decoder.set_fallback_encoding(value)
+ # Don't use fallback_encoding for filenames.
+ except LookupError, e:
+ raise FatalError(str(e))
+
+ def callback_help_passes(self, option, opt_str, value, parser):
+ self.pass_manager.help_passes()
+ sys.exit(0)
+
+ def callback_manpage(self, option, opt_str, value, parser):
+ raise NotImplementedError()
+
+ def callback_version(self, option, opt_str, value, parser):
+ sys.stdout.write(
+ '%s version %s\n' % (self.progname, VERSION)
+ )
+ sys.exit(0)
+
+ def callback_verbose(self, option, opt_str, value, parser):
+ Log().increase_verbosity()
+
+ def callback_quiet(self, option, opt_str, value, parser):
+ Log().decrease_verbosity()
+
+ def callback_passes(self, option, opt_str, value, parser):
+ if value.find(':') >= 0:
+ start_pass, end_pass = value.split(':')
+ self.start_pass = self.pass_manager.get_pass_number(start_pass, 1)
+ self.end_pass = self.pass_manager.get_pass_number(
+ end_pass, self.pass_manager.num_passes
+ )
+ else:
+ self.end_pass = \
+ self.start_pass = \
+ self.pass_manager.get_pass_number(value)
+
+ def callback_profile(self, option, opt_str, value, parser):
+ self.profiling = True
+
+ def callback_symbol_hints(self, option, opt_str, value, parser):
+ parser.values.symbol_strategy_rules.append(SymbolHintsFileRule(value))
+
+ def callback_force_branch(self, option, opt_str, value, parser):
+ parser.values.symbol_strategy_rules.append(
+ ForceBranchRegexpStrategyRule(value)
+ )
+
+ def callback_force_tag(self, option, opt_str, value, parser):
+ parser.values.symbol_strategy_rules.append(
+ ForceTagRegexpStrategyRule(value)
+ )
+
+ def callback_exclude(self, option, opt_str, value, parser):
+ parser.values.symbol_strategy_rules.append(
+ ExcludeRegexpStrategyRule(value)
+ )
+
+ def callback_cvs_revnums(self, option, opt_str, value, parser):
+ Ctx().svn_property_setters.append(CVSRevisionNumberSetter())
+
+ def callback_symbol_transform(self, option, opt_str, value, parser):
+ [pattern, replacement] = value.split(":")
+ try:
+ parser.values.symbol_transforms.append(
+ RegexpSymbolTransform(pattern, replacement)
+ )
+ except re.error:
+ raise FatalError("'%s' is not a valid regexp." % (pattern,))
+
+ def process_symbol_strategy_options(self):
+ """Process symbol strategy-related options."""
+
+ ctx = Ctx()
+ options = self.options
+
+ # Add the standard symbol name cleanup rules:
+ self.options.symbol_transforms.extend([
+ ReplaceSubstringsSymbolTransform('\\','/'),
+ # Remove leading, trailing, and repeated slashes:
+ NormalizePathsSymbolTransform(),
+ ])
+
+ if ctx.trunk_only:
+ if options.symbol_strategy_rules or options.keep_trivial_imports:
+ raise SymbolOptionsWithTrunkOnlyException()
+
+ else:
+ if not options.keep_trivial_imports:
+ options.symbol_strategy_rules.append(ExcludeTrivialImportBranchRule())
+
+ options.symbol_strategy_rules.append(UnambiguousUsageRule())
+ if options.symbol_default == 'strict':
+ pass
+ elif options.symbol_default == 'branch':
+ options.symbol_strategy_rules.append(AllBranchRule())
+ elif options.symbol_default == 'tag':
+ options.symbol_strategy_rules.append(AllTagRule())
+ elif options.symbol_default == 'heuristic':
+ options.symbol_strategy_rules.append(BranchIfCommitsRule())
+ options.symbol_strategy_rules.append(HeuristicStrategyRule())
+ else:
+ assert False
+
+ # Now add a rule whose job it is to pick the preferred parents of
+ # branches and tags:
+ options.symbol_strategy_rules.append(HeuristicPreferredParentRule())
+
+ def process_property_setter_options(self):
+ """Process the options that set SVN properties."""
+
+ ctx = Ctx()
+ options = self.options
+
+ for value in options.auto_props_files:
+ ctx.svn_property_setters.append(
+ AutoPropsPropertySetter(value, options.auto_props_ignore_case)
+ )
+
+ for value in options.mime_types_files:
+ ctx.svn_property_setters.append(MimeMapper(value))
+
+ ctx.svn_property_setters.append(CVSBinaryFileEOLStyleSetter())
+
+ ctx.svn_property_setters.append(CVSBinaryFileDefaultMimeTypeSetter())
+
+ if options.eol_from_mime_type:
+ ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
+
+ ctx.svn_property_setters.append(
+ DefaultEOLStyleSetter(options.default_eol)
+ )
+
+ ctx.svn_property_setters.append(SVNBinaryFileKeywordsPropertySetter())
+
+ if not options.keywords_off:
+ ctx.svn_property_setters.append(
+ KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE))
+
+ ctx.svn_property_setters.append(ExecutablePropertySetter())
+
+ def process_options(self):
+ """Do the main configuration based on command-line options.
+
+ This method is only called if the --options option was not
+ specified."""
+
+ raise NotImplementedError()
+
+ def check_options(self):
+ """Check the the run options are OK.
+
+ This should only be called after all options have been processed."""
+
+ # Convenience var, so we don't have to keep instantiating this Borg.
+ ctx = Ctx()
+
+ if not self.start_pass <= self.end_pass:
+ raise InvalidPassError(
+ 'Ending pass must not come before starting pass.')
+
+ if not ctx.dry_run and ctx.output_option is None:
+ raise FatalError('No output option specified.')
+
+ if ctx.output_option is not None:
+ ctx.output_option.check()
+
+ if not self.projects:
+ raise FatalError('No project specified.')
+
+ def verify_option_compatibility(self):
+ """Verify that no options incompatible with --options were used.
+
+ The --options option was specified. Verify that no incompatible
+ options or arguments were specified."""
+
+ if self.options.options_incompatible_options or self.args:
+ if self.options.options_incompatible_options:
+ oio = self.options.options_incompatible_options
+ Log().error(
+ '%s: The following options cannot be used in combination with '
+ 'the --options\n'
+ 'option:\n'
+ ' %s\n'
+ % (error_prefix, '\n '.join(oio))
+ )
+ if self.args:
+ Log().error(
+ '%s: No cvs-repos-path arguments are allowed with the --options '
+ 'option.\n'
+ % (error_prefix,)
+ )
+ sys.exit(1)
+
+ def process_options_file(self, options_filename):
+ """Read options from the file named OPTIONS_FILENAME.
+
+ Store the run options to SELF."""
+
+ g = {
+ 'ctx' : Ctx(),
+ 'run_options' : self,
+ }
+ execfile(options_filename, g)
+
+ def usage(self):
+ self.parser.print_help()
+
+
diff --git a/cvs2svn_lib/serializer.py b/cvs2svn_lib/serializer.py
new file mode 100644
index 0000000..24bd81c
--- /dev/null
+++ b/cvs2svn_lib/serializer.py
@@ -0,0 +1,146 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Picklers and unpicklers that are primed with known objects."""
+
+
+import cStringIO
+import marshal
+import cPickle
+import zlib
+
+
+class Serializer:
+ """An object able to serialize/deserialize some class of objects."""
+
+ def dumpf(self, f, object):
+ """Serialize OBJECT to file-like object F."""
+
+ raise NotImplementedError()
+
+ def dumps(self, object):
+ """Return a string containing OBJECT in serialized form."""
+
+ raise NotImplementedError()
+
+ def loadf(self, f):
+ """Return the next object deserialized from file-like object F."""
+
+ raise NotImplementedError()
+
+ def loads(self, s):
+ """Return the object deserialized from string S."""
+
+ raise NotImplementedError()
+
+
+class MarshalSerializer(Serializer):
+ """This class uses the marshal module to serialize/deserialize.
+
+ This means that it shares the limitations of the marshal module,
+ namely only being able to serialize a few simple python data types
+ without reference loops."""
+
+ def dumpf(self, f, object):
+ marshal.dump(object, f)
+
+ def dumps(self, object):
+ return marshal.dumps(object)
+
+ def loadf(self, f):
+ return marshal.load(f)
+
+ def loads(self, s):
+ return marshal.loads(s)
+
+
+class PrimedPickleSerializer(Serializer):
+ """This class acts as a pickler/unpickler with a pre-initialized memo.
+
+ The picklers and unpicklers are 'pre-trained' to recognize the
+ objects that are in the primer. If objects are recognized
+ from PRIMER, then only their persistent IDs need to be pickled
+ instead of the whole object. (Note that the memos needed for
+ pickling and unpickling are different.)
+
+ A new pickler/unpickler is created for each use, each time with the
+ memo initialized appropriately for pickling or unpickling."""
+
+ def __init__(self, primer):
+ """Prepare to make picklers/unpicklers with the specified primer.
+
+ The Pickler and Unpickler are 'primed' by pre-pickling PRIMER,
+ which can be an arbitrary object (e.g., a list of objects that are
+ expected to occur frequently in the objects to be serialized)."""
+
+ f = cStringIO.StringIO()
+ pickler = cPickle.Pickler(f, -1)
+ pickler.dump(primer)
+ self.pickler_memo = pickler.memo
+
+ unpickler = cPickle.Unpickler(cStringIO.StringIO(f.getvalue()))
+ unpickler.load()
+ self.unpickler_memo = unpickler.memo
+
+ def dumpf(self, f, object):
+ """Serialize OBJECT to file-like object F."""
+
+ pickler = cPickle.Pickler(f, -1)
+ pickler.memo = self.pickler_memo.copy()
+ pickler.dump(object)
+
+ def dumps(self, object):
+ """Return a string containing OBJECT in serialized form."""
+
+ f = cStringIO.StringIO()
+ self.dumpf(f, object)
+ return f.getvalue()
+
+ def loadf(self, f):
+ """Return the next object deserialized from file-like object F."""
+
+ unpickler = cPickle.Unpickler(f)
+ unpickler.memo = self.unpickler_memo.copy()
+ return unpickler.load()
+
+ def loads(self, s):
+ """Return the object deserialized from string S."""
+
+ return self.loadf(cStringIO.StringIO(s))
+
+
+class CompressingSerializer(Serializer):
+ """This class wraps other Serializers to compress their serialized data."""
+
+ def __init__(self, wrapee):
+ """Constructor. WRAPEE is the Serializer whose bitstream ought to be
+ compressed."""
+
+ self.wrapee = wrapee
+
+ def dumpf(self, f, object):
+ marshal.dump(zlib.compress(self.wrapee.dumps(object), 9), f)
+
+ def dumps(self, object):
+ return marshal.dumps(zlib.compress(self.wrapee.dumps(object), 9))
+
+ def loadf(self, f):
+ return self.wrapee.loads(zlib.decompress(marshal.load(f)))
+
+ def loads(self, s):
+ return self.wrapee.loads(zlib.decompress(marshal.loads(s)))
+
+
diff --git a/cvs2svn_lib/stats_keeper.py b/cvs2svn_lib/stats_keeper.py
new file mode 100644
index 0000000..1a82540
--- /dev/null
+++ b/cvs2svn_lib/stats_keeper.py
@@ -0,0 +1,189 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the StatsKeeper class.
+
+A StatsKeeper can pickle itself to a STATISTICS_FILE. This module
+also includes a function to read a StatsKeeper from a STATISTICS_FILE."""
+
+
+import time
+import cPickle
+from cStringIO import StringIO
+
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+
+
+class StatsKeeper:
+ def __init__(self):
+ self._svn_rev_count = None
+ self._first_rev_date = 1L<<32
+ self._last_rev_date = 0
+ self._pass_timings = { }
+ self._stats_reflect_exclude = False
+ self.reset_cvs_rev_info()
+
+ def log_duration_for_pass(self, duration, pass_num, pass_name):
+ self._pass_timings[pass_num] = (pass_name, duration,)
+
+ def set_stats_reflect_exclude(self, value):
+ self._stats_reflect_exclude = value
+
+ def reset_cvs_rev_info(self):
+ self._repos_file_count = 0
+ self._repos_size = 0
+ self._cvs_revs_count = 0
+ self._cvs_branches_count = 0
+ self._cvs_tags_count = 0
+
+ # A set of tag_ids seen:
+ self._tag_ids = set()
+
+ # A set of branch_ids seen:
+ self._branch_ids = set()
+
+ def record_cvs_file(self, cvs_file):
+ self._repos_file_count += 1
+ self._repos_size += cvs_file.file_size
+
+ def _record_cvs_rev(self, cvs_rev):
+ self._cvs_revs_count += 1
+
+ if cvs_rev.timestamp < self._first_rev_date:
+ self._first_rev_date = cvs_rev.timestamp
+
+ if cvs_rev.timestamp > self._last_rev_date:
+ self._last_rev_date = cvs_rev.timestamp
+
+ def _record_cvs_branch(self, cvs_branch):
+ self._cvs_branches_count += 1
+ self._branch_ids.add(cvs_branch.symbol.id)
+
+ def _record_cvs_tag(self, cvs_tag):
+ self._cvs_tags_count += 1
+ self._tag_ids.add(cvs_tag.symbol.id)
+
+ def record_cvs_item(self, cvs_item):
+ if isinstance(cvs_item, CVSRevision):
+ self._record_cvs_rev(cvs_item)
+ elif isinstance(cvs_item, CVSBranch):
+ self._record_cvs_branch(cvs_item)
+ elif isinstance(cvs_item, CVSTag):
+ self._record_cvs_tag(cvs_item)
+ else:
+ raise RuntimeError('Unknown CVSItem type')
+
+ def set_svn_rev_count(self, count):
+ self._svn_rev_count = count
+
+ def svn_rev_count(self):
+ return self._svn_rev_count
+
+ def __getstate__(self):
+ state = self.__dict__.copy()
+ # This can get kinda large, so we don't store it:
+ return state
+
+ def archive(self, filename):
+ f = open(filename, 'wb')
+ cPickle.dump(self, f)
+ f.close()
+
+ def __str__(self):
+ f = StringIO()
+ f.write('\n')
+ f.write('cvs2svn Statistics:\n')
+ f.write('------------------\n')
+ f.write('Total CVS Files: %10i\n' % (self._repos_file_count,))
+ f.write('Total CVS Revisions: %10i\n' % (self._cvs_revs_count,))
+ f.write('Total CVS Branches: %10i\n' % (self._cvs_branches_count,))
+ f.write('Total CVS Tags: %10i\n' % (self._cvs_tags_count,))
+ f.write('Total Unique Tags: %10i\n' % (len(self._tag_ids),))
+ f.write('Total Unique Branches: %10i\n' % (len(self._branch_ids),))
+ f.write('CVS Repos Size in KB: %10i\n' % ((self._repos_size / 1024),))
+
+ if self._svn_rev_count is not None:
+ f.write('Total SVN Commits: %10i\n' % self._svn_rev_count)
+
+ f.write(
+ 'First Revision Date: %s\n' % (time.ctime(self._first_rev_date),)
+ )
+ f.write(
+ 'Last Revision Date: %s\n' % (time.ctime(self._last_rev_date),)
+ )
+ f.write('------------------')
+
+ if not self._stats_reflect_exclude:
+ f.write(
+ '\n'
+ '(These are unaltered CVS repository stats and do not\n'
+ ' reflect tags or branches excluded via --exclude)\n'
+ )
+
+ return f.getvalue()
+
+ @staticmethod
+ def _get_timing_format(value):
+ # Output times with up to 3 decimal places:
+ decimals = max(0, 4 - len('%d' % int(value)))
+ length = len(('%%.%df' % decimals) % value)
+ return '%%%d.%df' % (length, decimals,)
+
+ def single_pass_timing(self, pass_num):
+ (pass_name, duration,) = self._pass_timings[pass_num]
+ format = self._get_timing_format(duration)
+ time_string = format % (duration,)
+ return (
+ 'Time for pass%d (%s): %s seconds.'
+ % (pass_num, pass_name, time_string,)
+ )
+
+ def timings(self):
+ passes = self._pass_timings.keys()
+ passes.sort()
+ f = StringIO()
+ f.write('Timings (seconds):\n')
+ f.write('------------------\n')
+
+ total = 0.0
+ for pass_num in passes:
+ (pass_name, duration,) = self._pass_timings[pass_num]
+ total += duration
+
+ format = self._get_timing_format(total)
+
+ for pass_num in passes:
+ (pass_name, duration,) = self._pass_timings[pass_num]
+ f.write(
+ (format + ' pass%-2d %s\n') % (duration, pass_num, pass_name,)
+ )
+
+ f.write((format + ' total') % total)
+ return f.getvalue()
+
+
+def read_stats_keeper(filename):
+ """Factory function: Return a _StatsKeeper instance.
+
+ Read the instance from FILENAME as written by StatsKeeper.archive()."""
+
+ f = open(filename, 'rb')
+ retval = cPickle.load(f)
+ f.close()
+ return retval
+
diff --git a/cvs2svn_lib/stdout_delegate.py b/cvs2svn_lib/stdout_delegate.py
new file mode 100644
index 0000000..2b4e228
--- /dev/null
+++ b/cvs2svn_lib/stdout_delegate.py
@@ -0,0 +1,107 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
+
+
+class StdoutDelegate(SVNRepositoryDelegate):
+ """Makes no changes to the disk, but writes out information to
+ STDOUT about what is happening in the SVN output. Of course, our
+ print statements will state that we're doing something, when in
+ reality, we aren't doing anything other than printing out that we're
+ doing something. Kind of zen, really."""
+
+ def __init__(self, total_revs):
+ self.total_revs = total_revs
+
+ def start_commit(self, revnum, revprops):
+ """Prints out the Subversion revision number of the commit that is
+ being started."""
+
+ Log().verbose("=" * 60)
+ Log().normal("Starting Subversion r%d / %d" % (revnum, self.total_revs))
+
+ def end_commit(self):
+ pass
+
+ def initialize_project(self, project):
+ Log().verbose(" Initializing project %s" % (project,))
+
+ def initialize_lod(self, lod):
+ Log().verbose(" Initializing %s" % (lod,))
+
+ def mkdir(self, lod, cvs_directory):
+ Log().verbose(
+ " New Directory %s" % (lod.get_path(cvs_directory.cvs_path),)
+ )
+
+ def add_path(self, s_item):
+ """Print a line stating what path we are 'adding'."""
+
+ Log().verbose(" Adding %s" % (s_item.cvs_rev.get_svn_path(),))
+
+ def change_path(self, s_item):
+ """Print a line stating what path we are 'changing'."""
+
+ Log().verbose(" Changing %s" % (s_item.cvs_rev.get_svn_path(),))
+
+ def delete_lod(self, lod):
+ """Print a line stating that we are 'deleting' LOD."""
+
+ Log().verbose(" Deleting %s" % (lod.get_path(),))
+
+ def delete_path(self, lod, cvs_path):
+ """Print a line stating that we are 'deleting' PATH."""
+
+ Log().verbose(" Deleting %s" % (lod.get_path(cvs_path.cvs_path),))
+
+ def _show_copy(self, src_path, dest_path, src_revnum):
+ """Print a line stating that we are 'copying' revision SRC_REVNUM
+ of SRC_PATH to DEST_PATH."""
+
+ Log().verbose(
+ " Copying revision %d of %s\n"
+ " to %s\n"
+ % (src_revnum, src_path, dest_path,)
+ )
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ """Print a line stating that we are 'copying' revision SRC_REVNUM
+ of SRC_PATH to DEST_PATH."""
+
+ self._show_copy(src_lod.get_path(), dest_lod.get_path(), src_revnum)
+
+ def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+ """Print a line stating that we are 'copying' revision SRC_REVNUM
+ of CVS_PATH from SRC_LOD to DEST_LOD."""
+
+ self._show_copy(
+ src_lod.get_path(cvs_path.cvs_path),
+ dest_lod.get_path(cvs_path.cvs_path),
+ src_revnum,
+ )
+
+ def finish(self):
+ """State that we are done creating our repository."""
+
+ Log().verbose("Finished creating Subversion repository.")
+ Log().quiet("Done.")
+
+
diff --git a/cvs2svn_lib/svn_commit.py b/cvs2svn_lib/svn_commit.py
new file mode 100644
index 0000000..25dc38e
--- /dev/null
+++ b/cvs2svn_lib/svn_commit.py
@@ -0,0 +1,381 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNCommit classes.
+
+There are five types of SVNCommits:
+
+ SVNInitialProjectCommit -- Initializes a project (creates its trunk,
+ branches, and tags directories).
+
+ SVNPrimaryCommit -- Commits one or more CVSRevisions on one or more
+ lines of development.
+
+ SVNBranchCommit -- Creates or fills a branch; that is, copies files
+ from a source line of development to a target branch.
+
+ SVNTagCommit -- Creates or fills a tag; that is, copies files from a
+ source line of development to a target tag.
+
+ SVNPostCommit -- Updates trunk to reflect changes on a non-trunk
+ default branch.
+
+"""
+
+
+import textwrap
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+
+
+class SVNCommit:
+ """This represents one commit to the Subversion Repository."""
+
+ # textwrap.TextWrapper instance to be used for wrapping log messages:
+ text_wrapper = textwrap.TextWrapper(width=76)
+
+ def __init__(self, date, revnum):
+ """Instantiate an SVNCommit.
+
+ REVNUM is the SVN revision number of this commit."""
+
+ # The date of the commit, as an integer. While the SVNCommit is
+ # being built up, this contains the latest date seen so far. This
+ # member is set externally.
+ self.date = date
+
+ # The SVN revision number of this commit, as an integer.
+ self.revnum = revnum
+
+ def __getstate__(self):
+ return (self.date, self.revnum,)
+
+ def __setstate__(self, state):
+ (self.date, self.revnum,) = state
+
+ def get_cvs_items(self):
+ """Return a list containing the CVSItems in this commit."""
+
+ raise NotImplementedError()
+
+ def get_author(self):
+ """Return the author or this commit, or None if none is to be used.
+
+ The return value is exactly as the author appeared in the RCS
+ file, with undefined character encoding."""
+
+ raise NotImplementedError()
+
+ def get_log_msg(self):
+ """Return a log message for this commit.
+
+ The return value is exactly as the log message appeared in the RCS
+ file, with undefined character encoding."""
+
+ raise NotImplementedError()
+
+ def get_warning_summary(self):
+ """Return a summary of this commit that can be used in warnings."""
+
+ return '(subversion rev %s)' % (self.revnum,)
+
+ def get_description(self):
+ """Return a partial description of this SVNCommit, for logging."""
+
+ raise NotImplementedError()
+
+ def output(self, output_option):
+ """Cause this commit to be output to OUTPUT_OPTION.
+
+ This method is used for double-dispatch. Derived classes should
+ call the OutputOption.process_*_commit() method appropriate for
+ the type of SVNCommit."""
+
+ raise NotImplementedError()
+
+ def __str__(self):
+ """ Print a human-readable description of this SVNCommit.
+
+ This description is not intended to be machine-parseable."""
+
+ ret = "SVNCommit #: " + str(self.revnum) + "\n"
+ ret += " debug description: " + self.get_description() + "\n"
+ return ret
+
+
+class SVNInitialProjectCommit(SVNCommit):
+ def __init__(self, date, projects, revnum):
+ SVNCommit.__init__(self, date, revnum)
+ self.projects = list(projects)
+
+ def __getstate__(self):
+ return (
+ SVNCommit.__getstate__(self),
+ [project.id for project in self.projects],
+ )
+
+ def __setstate__(self, state):
+ (svn_commit_state, project_ids,) = state
+ SVNCommit.__setstate__(self, svn_commit_state)
+ self.projects = [
+ Ctx()._projects[project_id] for project_id in project_ids
+ ]
+
+ def get_cvs_items(self):
+ return []
+
+ def get_author(self):
+ return Ctx().username
+
+ def get_log_msg(self):
+ return self.text_wrapper.fill(
+ Ctx().initial_project_commit_message % {}
+ )
+
+ def get_description(self):
+ return 'Project initialization'
+
+ def output(self, output_option):
+ output_option.process_initial_project_commit(self)
+
+
+class SVNRevisionCommit(SVNCommit):
+ """A SVNCommit that includes actual CVS revisions."""
+
+ def __init__(self, cvs_revs, date, revnum):
+ SVNCommit.__init__(self, date, revnum)
+
+ self.cvs_revs = list(cvs_revs)
+
+ # This value is set lazily by _get_metadata():
+ self._metadata = None
+
+ def __getstate__(self):
+ """Return the part of the state represented by this mixin."""
+
+ return (
+ SVNCommit.__getstate__(self),
+ [cvs_rev.id for cvs_rev in self.cvs_revs],
+ )
+
+ def __setstate__(self, state):
+ """Restore the part of the state represented by this mixin."""
+
+ (svn_commit_state, cvs_rev_ids) = state
+ SVNCommit.__setstate__(self, svn_commit_state)
+
+ self.cvs_revs = [
+ cvs_rev
+ for (id, cvs_rev) in Ctx()._cvs_items_db.get_many(cvs_rev_ids)
+ ]
+ self._metadata = None
+
+ def get_cvs_items(self):
+ return self.cvs_revs
+
+ def _get_metadata(self):
+ """Return the Metadata instance for this commit."""
+
+ if self._metadata is None:
+ # Set self._metadata for this commit from that of the first cvs
+ # revision.
+ if not self.cvs_revs:
+ raise InternalError('SVNPrimaryCommit contains no CVS revisions')
+
+ metadata_id = self.cvs_revs[0].metadata_id
+ self._metadata = Ctx()._metadata_db[metadata_id]
+
+ return self._metadata
+
+ def get_author(self):
+ return self._get_metadata().author
+
+ def get_warning_summary(self):
+ retval = []
+ retval.append(SVNCommit.get_warning_summary(self) + ' Related files:')
+ for cvs_rev in self.cvs_revs:
+ retval.append(' ' + cvs_rev.cvs_file.filename)
+ return '\n'.join(retval)
+
+ def __str__(self):
+ """Return the revision part of a description of this SVNCommit.
+
+ Derived classes should append the output of this method to the
+ output of SVNCommit.__str__()."""
+
+ ret = []
+ ret.append(SVNCommit.__str__(self))
+ ret.append(' cvs_revs:\n')
+ for cvs_rev in self.cvs_revs:
+ ret.append(' %x\n' % (cvs_rev.id,))
+ return ''.join(ret)
+
+
+class SVNPrimaryCommit(SVNRevisionCommit):
+ def __init__(self, cvs_revs, date, revnum):
+ SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
+
+ def get_log_msg(self):
+ """Return the actual log message for this commit."""
+
+ return self._get_metadata().log_msg
+
+ def get_description(self):
+ return 'commit'
+
+ def output(self, output_option):
+ output_option.process_primary_commit(self)
+
+
+class SVNPostCommit(SVNRevisionCommit):
+ def __init__(self, motivating_revnum, cvs_revs, date, revnum):
+ SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
+
+ # The subversion revision number of the *primary* commit where the
+ # default branch changes actually happened. (NOTE: Secondary
+ # commits that fill branches and tags also have a motivating
+ # commit, but we do not record it because it is (currently) not
+ # needed for anything.) motivating_revnum is used when generating
+ # the log message for the commit that synchronizes the default
+ # branch with trunk.
+ #
+ # It is possible for multiple synchronization commits to refer to
+ # the same motivating commit revision number, and it is possible
+ # for a single synchronization commit to contain CVSRevisions on
+ # multiple different default branches.
+ self.motivating_revnum = motivating_revnum
+
+ def __getstate__(self):
+ return (
+ SVNRevisionCommit.__getstate__(self),
+ self.motivating_revnum,
+ )
+
+ def __setstate__(self, state):
+ (rev_state, self.motivating_revnum,) = state
+ SVNRevisionCommit.__setstate__(self, rev_state)
+
+ def get_cvs_items(self):
+ # It might seem that we should return
+ # SVNRevisionCommit.get_cvs_items(self) here, but this commit
+ # doesn't really include those CVSItems, but rather followup
+ # commits to those.
+ return []
+
+ def get_log_msg(self):
+ """Return a manufactured log message for this commit."""
+
+ return self.text_wrapper.fill(
+ Ctx().post_commit_message % {'revnum' : self.motivating_revnum}
+ )
+
+ def get_description(self):
+ return 'post-commit default branch(es)'
+
+ def output(self, output_option):
+ output_option.process_post_commit(self)
+
+
+class SVNSymbolCommit(SVNCommit):
+ def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+ SVNCommit.__init__(self, date, revnum)
+
+ # The TypedSymbol that is filled in this SVNCommit.
+ self.symbol = symbol
+
+ self.cvs_symbol_ids = cvs_symbol_ids
+
+ def __getstate__(self):
+ return (
+ SVNCommit.__getstate__(self),
+ self.symbol.id, self.cvs_symbol_ids,
+ )
+
+ def __setstate__(self, state):
+ (svn_commit_state, symbol_id, self.cvs_symbol_ids) = state
+ SVNCommit.__setstate__(self, svn_commit_state)
+ self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+
+ def get_cvs_items(self):
+ return [
+ cvs_symbol
+ for (id, cvs_symbol)
+ in Ctx()._cvs_items_db.get_many(self.cvs_symbol_ids)
+ ]
+
+ def _get_symbol_type(self):
+ """Return the type of the self.symbol ('branch' or 'tag')."""
+
+ raise NotImplementedError()
+
+ def get_author(self):
+ return Ctx().username
+
+ def get_log_msg(self):
+ """Return a manufactured log message for this commit."""
+
+ return self.text_wrapper.fill(
+ Ctx().symbol_commit_message % {
+ 'symbol_type' : self._get_symbol_type(),
+ 'symbol_name' : self.symbol.name,
+ }
+ )
+
+ def get_description(self):
+ return 'copying to %s %r' % (self._get_symbol_type(), self.symbol.name,)
+
+ def __str__(self):
+ """ Print a human-readable description of this SVNCommit.
+
+ This description is not intended to be machine-parseable."""
+
+ return (
+ SVNCommit.__str__(self)
+ + " symbolic name: %s\n" % (self.symbol.name,)
+ )
+
+
+class SVNBranchCommit(SVNSymbolCommit):
+ def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+ if not isinstance(symbol, Branch):
+ raise InternalError('Incorrect symbol type %r' % (symbol,))
+
+ SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
+
+ def _get_symbol_type(self):
+ return 'branch'
+
+ def output(self, output_option):
+ output_option.process_branch_commit(self)
+
+
+class SVNTagCommit(SVNSymbolCommit):
+ def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+ if not isinstance(symbol, Tag):
+ raise InternalError('Incorrect symbol type %r' % (symbol,))
+
+ SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
+
+ def _get_symbol_type(self):
+ return 'tag'
+
+ def output(self, output_option):
+ output_option.process_tag_commit(self)
+
+
diff --git a/cvs2svn_lib/svn_commit_creator.py b/cvs2svn_lib/svn_commit_creator.py
new file mode 100644
index 0000000..c87db38
--- /dev/null
+++ b/cvs2svn_lib/svn_commit_creator.py
@@ -0,0 +1,217 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNCommitCreator class."""
+
+
+import time
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSBranchNoop
+from cvs2svn_lib.cvs_item import CVSTagNoop
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
+from cvs2svn_lib.svn_commit import SVNPrimaryCommit
+from cvs2svn_lib.svn_commit import SVNPostCommit
+from cvs2svn_lib.svn_commit import SVNBranchCommit
+from cvs2svn_lib.svn_commit import SVNTagCommit
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+class SVNCommitCreator:
+ """This class creates and yields SVNCommits via process_changeset()."""
+
+ def __init__(self):
+ # The revision number to assign to the next new SVNCommit.
+ self.revnum_generator = KeyGenerator()
+
+ # A set containing the Projects that have already been
+ # initialized:
+ self._initialized_projects = set()
+
+ def _post_commit(self, cvs_revs, motivating_revnum, timestamp):
+ """Generate any SVNCommits needed to follow CVS_REVS.
+
+ That is, handle non-trunk default branches. A revision on a CVS
+ non-trunk default branch is visible in a default CVS checkout of
+ HEAD. So we copy such commits over to Subversion's trunk so that
+ checking out SVN trunk gives the same output as checking out of
+ CVS's default branch."""
+
+ cvs_revs = [
+ cvs_rev
+ for cvs_rev in cvs_revs
+ if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop)
+ ]
+
+ if cvs_revs:
+ cvs_revs.sort(
+ lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename)
+ )
+ # Generate an SVNCommit for all of our default branch cvs_revs.
+ yield SVNPostCommit(
+ motivating_revnum, cvs_revs, timestamp,
+ self.revnum_generator.gen_id(),
+ )
+
+ def _process_revision_changeset(self, changeset, timestamp):
+ """Process CHANGESET, using TIMESTAMP as the commit time.
+
+ Create and yield one or more SVNCommits in the process. CHANGESET
+ must be an OrderedChangeset. TIMESTAMP is used as the timestamp
+ for any resulting SVNCommits."""
+
+ if not changeset.cvs_item_ids:
+ Log().warn('Changeset has no items: %r' % changeset)
+ return
+
+ Log().verbose('-' * 60)
+ Log().verbose('CVS Revision grouping:')
+ Log().verbose(' Time: %s' % time.ctime(timestamp))
+
+ # Generate an SVNCommit unconditionally. Even if the only change in
+ # this group of CVSRevisions is a deletion of an already-deleted
+ # file (that is, a CVS revision in state 'dead' whose predecessor
+ # was also in state 'dead'), the conversion will still generate a
+ # Subversion revision containing the log message for the second dead
+ # revision, because we don't want to lose that information.
+
+ cvs_revs = list(changeset.iter_cvs_items())
+ if cvs_revs:
+ cvs_revs.sort(lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename))
+ svn_commit = SVNPrimaryCommit(
+ cvs_revs, timestamp, self.revnum_generator.gen_id()
+ )
+
+ yield svn_commit
+
+ for cvs_rev in cvs_revs:
+ Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)
+
+ # Generate an SVNPostCommit if we have default branch revs. If
+ # some of the revisions in this commit happened on a non-trunk
+ # default branch, then those files have to be copied into trunk
+ # manually after being changed on the branch (because the RCS
+ # "default branch" appears as head, i.e., trunk, in practice).
+ # Unfortunately, Subversion doesn't support copies with sources
+ # in the current txn. All copies must be based in committed
+ # revisions. Therefore, we generate the copies in a new
+ # revision.
+ for svn_post_commit in self._post_commit(
+ cvs_revs, svn_commit.revnum, timestamp
+ ):
+ yield svn_post_commit
+
+ def _process_tag_changeset(self, changeset, timestamp):
+ """Process TagChangeset CHANGESET, producing a SVNTagCommit.
+
+ Filter out CVSTagNoops. If no CVSTags are left, don't generate a
+ SVNTagCommit."""
+
+ if Ctx().trunk_only:
+ raise InternalError(
+ 'TagChangeset encountered during a --trunk-only conversion')
+
+ cvs_tag_ids = [
+ cvs_tag.id
+ for cvs_tag in changeset.iter_cvs_items()
+ if not isinstance(cvs_tag, CVSTagNoop)
+ ]
+ if cvs_tag_ids:
+ yield SVNTagCommit(
+ changeset.symbol, cvs_tag_ids, timestamp,
+ self.revnum_generator.gen_id(),
+ )
+ else:
+ Log().debug(
+ 'Omitting %r because it contains only CVSTagNoops' % (changeset,)
+ )
+
+ def _process_branch_changeset(self, changeset, timestamp):
+ """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.
+
+ Filter out CVSBranchNoops. If no CVSBranches are left, don't
+ generate a SVNBranchCommit."""
+
+ if Ctx().trunk_only:
+ raise InternalError(
+ 'BranchChangeset encountered during a --trunk-only conversion')
+
+ cvs_branches = [
+ cvs_branch
+ for cvs_branch in changeset.iter_cvs_items()
+ if not isinstance(cvs_branch, CVSBranchNoop)
+ ]
+ if cvs_branches:
+ svn_commit = SVNBranchCommit(
+ changeset.symbol,
+ [cvs_branch.id for cvs_branch in cvs_branches],
+ timestamp,
+ self.revnum_generator.gen_id(),
+ )
+ yield svn_commit
+ for cvs_branch in cvs_branches:
+ Ctx()._symbolings_logger.log_branch_revision(
+ cvs_branch, svn_commit.revnum
+ )
+ else:
+ Log().debug(
+ 'Omitting %r because it contains only CVSBranchNoops' % (changeset,)
+ )
+
+ def process_changeset(self, changeset, timestamp):
+ """Process CHANGESET, using TIMESTAMP for all of its entries.
+
+ Return a generator that generates the resulting SVNCommits.
+
+ The changesets must be fed to this function in proper dependency
+ order."""
+
+ # First create any new projects that might be opened by the
+ # changeset:
+ projects_opened = \
+ changeset.get_projects_opened() - self._initialized_projects
+ if projects_opened:
+ if Ctx().cross_project_commits:
+ yield SVNInitialProjectCommit(
+ timestamp, projects_opened, self.revnum_generator.gen_id()
+ )
+ else:
+ for project in projects_opened:
+ yield SVNInitialProjectCommit(
+ timestamp, [project], self.revnum_generator.gen_id()
+ )
+ self._initialized_projects.update(projects_opened)
+
+ if isinstance(changeset, OrderedChangeset):
+ for svn_commit \
+ in self._process_revision_changeset(changeset, timestamp):
+ yield svn_commit
+ elif isinstance(changeset, TagChangeset):
+ for svn_commit in self._process_tag_changeset(changeset, timestamp):
+ yield svn_commit
+ elif isinstance(changeset, BranchChangeset):
+ for svn_commit in self._process_branch_changeset(changeset, timestamp):
+ yield svn_commit
+ else:
+ raise TypeError('Illegal changeset %r' % changeset)
+
+
diff --git a/cvs2svn_lib/svn_commit_item.py b/cvs2svn_lib/svn_commit_item.py
new file mode 100644
index 0000000..8bc9015
--- /dev/null
+++ b/cvs2svn_lib/svn_commit_item.py
@@ -0,0 +1,50 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class SVNCommitItem."""
+
+
+from cvs2svn_lib.context import Ctx
+
+
+class SVNCommitItem:
+ """A wrapper class for CVSRevision objects upon which
+ Subversion-related data (such as properties) may be hung."""
+
+ def __init__(self, cvs_rev, svn_props_changed):
+ """Initialize instance and record the properties for this file.
+ SVN_PROPS_CHANGED indicates whether the svn: properties are known
+ to have changed since the last revision.
+
+ The properties are set by the SVNPropertySetters in
+ Ctx().svn_property_setters."""
+
+ self.cvs_rev = cvs_rev
+ # Did the svn properties change for this file (i.e., do they have
+ # to be written to the dumpfile?)
+ self.svn_props_changed = svn_props_changed
+
+ # The properties for this item as a map { key : value }. If VALUE
+ # is None, the property should be left unset.
+ self.svn_props = { }
+
+ for svn_property_setter in Ctx().svn_property_setters:
+ svn_property_setter.set_properties(self)
+
+ def has_keywords(self):
+ return bool(self.svn_props.get('svn:keywords', None))
+
+
diff --git a/cvs2svn_lib/svn_output_option.py b/cvs2svn_lib/svn_output_option.py
new file mode 100644
index 0000000..86d1ba4
--- /dev/null
+++ b/cvs2svn_lib/svn_output_option.py
@@ -0,0 +1,753 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes for outputting the converted repository to SVN."""
+
+
+import os
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import format_date
+from cvs2svn_lib.common import PathsNotDisjointException
+from cvs2svn_lib.common import verify_paths_disjoint
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import call_command
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import LineOfDevelopment
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.repository_mirror import RepositoryMirror
+from cvs2svn_lib.repository_mirror import PathExistsError
+from cvs2svn_lib.svn_commit_item import SVNCommitItem
+from cvs2svn_lib.openings_closings import SymbolingsReader
+from cvs2svn_lib.fill_source import get_source_set
+from cvs2svn_lib.stdout_delegate import StdoutDelegate
+from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
+from cvs2svn_lib.repository_delegate import RepositoryDelegate
+from cvs2svn_lib.output_option import OutputOption
+
+
+class SVNOutputOption(OutputOption):
+ """An OutputOption appropriate for output to Subversion."""
+
+ class ParentMissingError(Exception):
+ """The parent of a path is missing.
+
+ Exception raised if an attempt is made to add a path to the
+ repository mirror but the parent's path doesn't exist in the
+ youngest revision of the repository."""
+
+ pass
+
+ class ExpectedDirectoryError(Exception):
+ """A file was found where a directory was expected."""
+
+ pass
+
+ def __init__(self, author_transforms=None):
+ self._mirror = RepositoryMirror()
+
+ def to_utf8(s):
+ if isinstance(s, unicode):
+ return s.encode('utf8')
+ else:
+ return s
+
+ self.author_transforms = {}
+ if author_transforms is not None:
+ for (cvsauthor, name) in author_transforms.iteritems():
+ cvsauthor = to_utf8(cvsauthor)
+ name = to_utf8(name)
+ self.author_transforms[cvsauthor] = name
+
+ def register_artifacts(self, which_pass):
+ # These artifacts are needed for SymbolingsReader:
+ artifact_manager.register_temp_file_needed(
+ config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
+ )
+ artifact_manager.register_temp_file_needed(
+ config.SYMBOL_OFFSETS_DB, which_pass
+ )
+
+ self._mirror.register_artifacts(which_pass)
+ Ctx().revision_reader.register_artifacts(which_pass)
+
+ def check_symbols(self, symbol_map):
+ """Check that the paths of all included LODs are set and disjoint."""
+
+ error_found = False
+
+ # Check that all included LODs have their base paths set, and
+ # collect the paths into a list:
+ paths = []
+ for lod in symbol_map.itervalues():
+ if isinstance(lod, LineOfDevelopment):
+ if lod.base_path is None:
+ Log().error('%s: No path was set for %r\n' % (error_prefix, lod,))
+ error_found = True
+ else:
+ paths.append(lod.base_path)
+
+ # Check that the SVN paths of all LODS are disjoint:
+ try:
+ verify_paths_disjoint(*paths)
+ except PathsNotDisjointException, e:
+ Log().error(str(e))
+ error_found = True
+
+ if error_found:
+ raise FatalException(
+ 'Please fix the above errors and restart CollateSymbolsPass'
+ )
+
+ def setup(self, svn_rev_count):
+ self._symbolings_reader = SymbolingsReader()
+ self._mirror.open()
+ self._delegates = []
+ Ctx().revision_reader.start()
+ self.add_delegate(StdoutDelegate(svn_rev_count))
+
+ def _get_author(self, svn_commit):
+ author = svn_commit.get_author()
+ name = self.author_transforms.get(author, author)
+ return name
+
+ def _get_revprops(self, svn_commit):
+ """Return the Subversion revprops for this SVNCommit."""
+
+ return {
+ 'svn:author' : self._get_author(svn_commit),
+ 'svn:log' : svn_commit.get_log_msg(),
+ 'svn:date' : format_date(svn_commit.date),
+ }
+
+ def start_commit(self, revnum, revprops):
+ """Start a new commit."""
+
+ self._mirror.start_commit(revnum)
+ self._invoke_delegates('start_commit', revnum, revprops)
+
+ def end_commit(self):
+ """Called at the end of each commit.
+
+ This method copies the newly created nodes to the on-disk nodes
+ db."""
+
+ self._mirror.end_commit()
+ self._invoke_delegates('end_commit')
+
+ def delete_lod(self, lod):
+ """Delete the main path for LOD from the tree.
+
+ The path must currently exist. Silently refuse to delete trunk
+ paths."""
+
+ if isinstance(lod, Trunk):
+ # Never delete a Trunk path.
+ return
+
+ self._mirror.get_current_lod_directory(lod).delete()
+ self._invoke_delegates('delete_lod', lod)
+
+ def delete_path(self, cvs_path, lod, should_prune=False):
+ """Delete CVS_PATH from LOD."""
+
+ if cvs_path.parent_directory is None:
+ self.delete_lod(lod)
+ return
+
+ parent_node = self._mirror.get_current_path(
+ cvs_path.parent_directory, lod
+ )
+ del parent_node[cvs_path]
+ self._invoke_delegates('delete_path', lod, cvs_path)
+
+ if should_prune:
+ while parent_node is not None and len(parent_node) == 0:
+ # A drawback of this code is that we issue a delete for each
+ # path and not just a single delete for the topmost directory
+ # pruned.
+ node = parent_node
+ cvs_path = node.cvs_path
+ if cvs_path.parent_directory is None:
+ parent_node = None
+ self.delete_lod(lod)
+ else:
+ parent_node = node.parent_mirror_dir
+ node.delete()
+ self._invoke_delegates('delete_path', lod, cvs_path)
+
+ def initialize_project(self, project):
+ """Create the basic structure for PROJECT."""
+
+ self._invoke_delegates('initialize_project', project)
+
+ # Don't invoke delegates.
+ self._mirror.add_lod(project.get_trunk())
+
+ def change_path(self, cvs_rev):
+ """Register a change in self._youngest for the CVS_REV's svn_path."""
+
+ # We do not have to update the nodes because our mirror is only
+ # concerned with the presence or absence of paths, and a file
+ # content change does not cause any path changes.
+ self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
+
+ def _mkdir_p(self, cvs_directory, lod):
+ """Make sure that CVS_DIRECTORY exists in LOD.
+
+ If not, create it, calling delegates. Return the node for
+ CVS_DIRECTORY."""
+
+ try:
+ node = self._mirror.get_current_lod_directory(lod)
+ except KeyError:
+ node = self._mirror.add_lod(lod)
+ self._invoke_delegates('initialize_lod', lod)
+
+ for sub_path in cvs_directory.get_ancestry()[1:]:
+ try:
+ node = node[sub_path]
+ except KeyError:
+ node = node.mkdir(sub_path)
+ self._invoke_delegates('mkdir', lod, sub_path)
+ if node is None:
+ raise self.ExpectedDirectoryError(
+ 'File found at \'%s\' where directory was expected.' % (sub_path,)
+ )
+
+ return node
+
+ def add_path(self, cvs_rev):
+ """Add the CVS_REV's svn_path to the repository mirror.
+
+ Create any missing intermediate paths."""
+
+ cvs_file = cvs_rev.cvs_file
+ parent_path = cvs_file.parent_directory
+ lod = cvs_rev.lod
+ parent_node = self._mkdir_p(parent_path, lod)
+ parent_node.add_file(cvs_file)
+ self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
+
+ In the youngest revision of the repository, the destination LOD
+ *must not* already exist.
+
+ Return the new node at DEST_LOD. Note that this node is not
+ necessarily writable, though its parent node necessarily is."""
+
+ node = self._mirror.copy_lod(src_lod, dest_lod, src_revnum)
+ self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)
+ return node
+
+ def copy_path(
+ self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False
+ ):
+ """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.
+
+ In the youngest revision of the repository, the destination's
+ parent *must* exist unless CREATE_PARENT is specified. But the
+ destination itself *must not* exist.
+
+ Return the new node at (CVS_PATH, DEST_LOD), as a
+ CurrentMirrorDirectory."""
+
+ if cvs_path.parent_directory is None:
+ return self.copy_lod(src_lod, dest_lod, src_revnum)
+
+ # Get the node of our source, or None if it is a file:
+ src_node = self._mirror.get_old_path(cvs_path, src_lod, src_revnum)
+
+ # Get the parent path of the destination:
+ if create_parent:
+ dest_parent_node = self._mkdir_p(cvs_path.parent_directory, dest_lod)
+ else:
+ try:
+ dest_parent_node = self._mirror.get_current_path(
+ cvs_path.parent_directory, dest_lod
+ )
+ except KeyError:
+ raise self.ParentMissingError(
+ 'Attempt to add path \'%s\' to repository mirror, '
+ 'but its parent directory doesn\'t exist in the mirror.'
+ % (dest_lod.get_path(cvs_path.cvs_path),)
+ )
+
+ if cvs_path in dest_parent_node:
+ raise PathExistsError(
+ 'Attempt to add path \'%s\' to repository mirror '
+ 'when it already exists in the mirror.'
+ % (dest_lod.get_path(cvs_path.cvs_path),)
+ )
+
+ dest_parent_node[cvs_path] = src_node
+ self._invoke_delegates(
+ 'copy_path',
+ cvs_path, src_lod, dest_lod, src_revnum
+ )
+
+ return dest_parent_node[cvs_path]
+
+ def fill_symbol(self, svn_symbol_commit, fill_source):
+ """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.
+
+ The symbolic name is guaranteed to exist in the Subversion
+ repository by the end of this call, even if there are no paths
+ under it."""
+
+ symbol = svn_symbol_commit.symbol
+
+ try:
+ dest_node = self._mirror.get_current_lod_directory(symbol)
+ except KeyError:
+ self._fill_directory(symbol, None, fill_source, None)
+ else:
+ self._fill_directory(symbol, dest_node, fill_source, None)
+
+ def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
+ """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
+
+ Use items from FILL_SOURCE, and recurse into the child items.
+
+ Fill SYMBOL starting at the path FILL_SOURCE.cvs_path. DEST_NODE
+ is the node of this destination path, or None if the destination
+ does not yet exist. All directories above this path have already
+ been filled. FILL_SOURCE is a FillSource instance describing the
+ items within a subtree of the repository that still need to be
+ copied to the destination.
+
+ PARENT_SOURCE is the SVNRevisionRange that was used to copy the
+ parent directory, if it was copied in this commit. We prefer to
+ copy from the same source as was used for the parent, since it
+ typically requires less touching-up. If PARENT_SOURCE is None,
+ then the parent directory was not copied in this commit, so no
+ revision is preferable to any other."""
+
+ copy_source = fill_source.compute_best_source(parent_source)
+
+ # Figure out if we shall copy to this destination and delete any
+ # destination path that is in the way.
+ if dest_node is None:
+ # The destination does not exist at all, so it definitely has to
+ # be copied:
+ dest_node = self.copy_path(
+ fill_source.cvs_path, copy_source.source_lod,
+ symbol, copy_source.opening_revnum
+ )
+ elif (parent_source is not None) and (
+ copy_source.source_lod != parent_source.source_lod
+ or copy_source.opening_revnum != parent_source.opening_revnum
+ ):
+ # The parent path was copied from a different source than we
+ # need to use, so we have to delete the version that was copied
+ # with the parent then re-copy from the correct source:
+ self.delete_path(fill_source.cvs_path, symbol)
+ dest_node = self.copy_path(
+ fill_source.cvs_path, copy_source.source_lod,
+ symbol, copy_source.opening_revnum
+ )
+ else:
+ copy_source = parent_source
+
+ # The map {CVSPath : FillSource} of entries within this directory
+ # that need filling:
+ src_entries = fill_source.get_subsource_map()
+
+ if copy_source is not None:
+ self._prune_extra_entries(
+ fill_source.cvs_path, symbol, dest_node, src_entries
+ )
+
+ return self._cleanup_filled_directory(
+ symbol, dest_node, src_entries, copy_source
+ )
+
+ def _cleanup_filled_directory(
+ self, symbol, dest_node, src_entries, copy_source
+ ):
+ """The directory at DEST_NODE has been filled and pruned; recurse.
+
+ Recurse into the SRC_ENTRIES, in alphabetical order. If DEST_NODE
+ was copied in this revision, COPY_SOURCE should indicate where it
+ was copied from; otherwise, COPY_SOURCE should be None."""
+
+ cvs_paths = src_entries.keys()
+ cvs_paths.sort()
+ for cvs_path in cvs_paths:
+ if isinstance(cvs_path, CVSDirectory):
+ # Path is a CVSDirectory:
+ try:
+ dest_subnode = dest_node[cvs_path]
+ except KeyError:
+ # Path doesn't exist yet; it has to be created:
+ dest_node = self._fill_directory(
+ symbol, None, src_entries[cvs_path], None
+ ).parent_mirror_dir
+ else:
+ # Path already exists, but might have to be cleaned up:
+ dest_node = self._fill_directory(
+ symbol, dest_subnode, src_entries[cvs_path], copy_source
+ ).parent_mirror_dir
+ else:
+ # Path is a CVSFile:
+ self._fill_file(
+ symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source
+ )
+ # Reread dest_node since the call to _fill_file() might have
+ # made it writable:
+ dest_node = self._mirror.get_current_path(
+ dest_node.cvs_path, dest_node.lod
+ )
+
+ return dest_node
+
+ def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
+ """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
+
+ Use items from FILL_SOURCE.
+
+ Fill SYMBOL at path FILL_SOURCE.cvs_path. DEST_NODE is the node
+ of this destination path, or None if the destination does not yet
+ exist. All directories above this path have already been filled
+ as needed. FILL_SOURCE is a FillSource instance describing the
+ item that needs to be copied to the destination.
+
+ PARENT_SOURCE is the source from which the parent directory was
+ copied, or None if the parent directory was not copied during this
+ commit. We prefer to copy from PARENT_SOURCE, since it typically
+ requires less touching-up. If PARENT_SOURCE is None, then the
+ parent directory was not copied in this commit, so no revision is
+ preferable to any other."""
+
+ copy_source = fill_source.compute_best_source(parent_source)
+
+ # Figure out if we shall copy to this destination and delete any
+ # destination path that is in the way.
+ if not dest_existed:
+ # The destination does not exist at all, so it definitely has to
+ # be copied:
+ self.copy_path(
+ fill_source.cvs_path, copy_source.source_lod,
+ symbol, copy_source.opening_revnum
+ )
+ elif (parent_source is not None) and (
+ copy_source.source_lod != parent_source.source_lod
+ or copy_source.opening_revnum != parent_source.opening_revnum
+ ):
+ # The parent path was copied from a different source than we
+ # need to use, so we have to delete the version that was copied
+ # with the parent and then re-copy from the correct source:
+ self.delete_path(fill_source.cvs_path, symbol)
+ self.copy_path(
+ fill_source.cvs_path, copy_source.source_lod,
+ symbol, copy_source.opening_revnum
+ )
+
+ def _prune_extra_entries(
+ self, dest_cvs_path, symbol, dest_node, src_entries
+ ):
+ """Delete any entries in DEST_NODE that are not in SRC_ENTRIES."""
+
+ delete_list = [
+ cvs_path
+ for cvs_path in dest_node
+ if cvs_path not in src_entries
+ ]
+
+ # Sort the delete list so that the output is in a consistent
+ # order:
+ delete_list.sort()
+ for cvs_path in delete_list:
+ del dest_node[cvs_path]
+ self._invoke_delegates('delete_path', symbol, cvs_path)
+
+ def add_delegate(self, delegate):
+ """Adds DELEGATE to self._delegates.
+
+ For every delegate you add, whenever a repository action method is
+ performed, delegate's corresponding repository action method is
+ called. Multiple delegates will be called in the order that they
+ are added. See SVNRepositoryDelegate for more information."""
+
+ self._delegates.append(delegate)
+
+ def _invoke_delegates(self, method, *args):
+ """Invoke a method on each delegate.
+
+ Iterate through each of our delegates, in the order that they were
+ added, and call the delegate's method named METHOD with the
+ arguments in ARGS."""
+
+ for delegate in self._delegates:
+ getattr(delegate, method)(*args)
+
+ def process_initial_project_commit(self, svn_commit):
+ self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+ for project in svn_commit.projects:
+ self.initialize_project(project)
+
+ self.end_commit()
+
+ def process_primary_commit(self, svn_commit):
+ self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+ # This actually commits CVSRevisions
+ if len(svn_commit.cvs_revs) > 1:
+ plural = "s"
+ else:
+ plural = ""
+ Log().verbose("Committing %d CVSRevision%s"
+ % (len(svn_commit.cvs_revs), plural))
+ for cvs_rev in svn_commit.cvs_revs:
+ if isinstance(cvs_rev, CVSRevisionNoop):
+ pass
+
+ elif isinstance(cvs_rev, CVSRevisionDelete):
+ self.delete_path(cvs_rev.cvs_file, cvs_rev.lod, Ctx().prune)
+
+ elif isinstance(cvs_rev, CVSRevisionAdd):
+ self.add_path(cvs_rev)
+
+ elif isinstance(cvs_rev, CVSRevisionChange):
+ self.change_path(cvs_rev)
+
+ self.end_commit()
+
+ def process_post_commit(self, svn_commit):
+ self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+ Log().verbose(
+ 'Synchronizing default branch motivated by %d'
+ % (svn_commit.motivating_revnum,)
+ )
+
+ for cvs_rev in svn_commit.cvs_revs:
+ trunk = cvs_rev.cvs_file.project.get_trunk()
+ if isinstance(cvs_rev, CVSRevisionAdd):
+ # Copy from branch to trunk:
+ self.copy_path(
+ cvs_rev.cvs_file, cvs_rev.lod, trunk,
+ svn_commit.motivating_revnum, True
+ )
+ elif isinstance(cvs_rev, CVSRevisionChange):
+ # Delete old version of the path on trunk...
+ self.delete_path(cvs_rev.cvs_file, trunk)
+ # ...and copy the new version over from branch:
+ self.copy_path(
+ cvs_rev.cvs_file, cvs_rev.lod, trunk,
+ svn_commit.motivating_revnum, True
+ )
+ elif isinstance(cvs_rev, CVSRevisionDelete):
+ # Delete trunk path:
+ self.delete_path(cvs_rev.cvs_file, trunk)
+ elif isinstance(cvs_rev, CVSRevisionNoop):
+ # Do nothing
+ pass
+ else:
+ raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
+
+ self.end_commit()
+
+ def process_branch_commit(self, svn_commit):
+ self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+ Log().verbose('Filling branch:', svn_commit.symbol.name)
+
+ # Get the set of sources for the symbolic name:
+ source_set = get_source_set(
+ svn_commit.symbol,
+ self._symbolings_reader.get_range_map(svn_commit),
+ )
+
+ self.fill_symbol(svn_commit, source_set)
+
+ self.end_commit()
+
+ def process_tag_commit(self, svn_commit):
+ self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+ Log().verbose('Filling tag:', svn_commit.symbol.name)
+
+ # Get the set of sources for the symbolic name:
+ source_set = get_source_set(
+ svn_commit.symbol,
+ self._symbolings_reader.get_range_map(svn_commit),
+ )
+
+ self.fill_symbol(svn_commit, source_set)
+
+ self.end_commit()
+
+ def cleanup(self):
+ self._invoke_delegates('finish')
+ self._mirror.close()
+ self._mirror = None
+ Ctx().revision_reader.finish()
+ self._symbolings_reader.close()
+ del self._symbolings_reader
+
+
+class DumpfileOutputOption(SVNOutputOption):
+ """Output the result of the conversion into a dumpfile."""
+
+ def __init__(self, dumpfile_path, author_transforms=None):
+ SVNOutputOption.__init__(self, author_transforms)
+ self.dumpfile_path = dumpfile_path
+
+ def check(self):
+ pass
+
+ def setup(self, svn_rev_count):
+ Log().quiet("Starting Subversion Dumpfile.")
+ SVNOutputOption.setup(self, svn_rev_count)
+ if not Ctx().dry_run:
+ self.add_delegate(
+ DumpfileDelegate(Ctx().revision_reader, self.dumpfile_path)
+ )
+
+
+class RepositoryOutputOption(SVNOutputOption):
+ """Output the result of the conversion into an SVN repository."""
+
+ def __init__(self, target, author_transforms=None):
+ SVNOutputOption.__init__(self, author_transforms)
+ self.target = target
+
+ def check(self):
+ if not Ctx().dry_run:
+ # Verify that svnadmin can be executed. The 'help' subcommand
+ # should be harmless.
+ try:
+ check_command_runs([Ctx().svnadmin_executable, 'help'], 'svnadmin')
+ except CommandFailedException, e:
+ raise FatalError(
+ '%s\n'
+ 'svnadmin could not be executed. Please ensure that it is\n'
+ 'installed and/or use the --svnadmin option.' % (e,))
+
+ def setup(self, svn_rev_count):
+ Log().quiet("Starting Subversion Repository.")
+ SVNOutputOption.setup(self, svn_rev_count)
+ if not Ctx().dry_run:
+ self.add_delegate(
+ RepositoryDelegate(Ctx().revision_reader, self.target)
+ )
+
+
+class NewRepositoryOutputOption(RepositoryOutputOption):
+ """Output the result of the conversion into a new SVN repository."""
+
+ def __init__(
+ self, target, fs_type=None, bdb_txn_nosync=None, author_transforms=None, create_options=[]
+ ):
+ RepositoryOutputOption.__init__(self, target, author_transforms)
+ self.bdb_txn_nosync = bdb_txn_nosync
+
+ # Determine the options to be passed to "svnadmin create":
+ if not fs_type:
+ # User didn't say what kind repository (bdb, fsfs, etc). We
+ # still pass --bdb-txn-nosync. It's a no-op if the default
+ # repository type doesn't support it, but we definitely want it
+ # if BDB is the default.
+ self.create_options = ['--bdb-txn-nosync']
+ elif fs_type == 'bdb':
+ # User explicitly specified bdb.
+ #
+ # Since this is a BDB repository, pass --bdb-txn-nosync, because
+ # it gives us a 4-5x speed boost (if cvs2svn is creating the
+ # repository, cvs2svn should be the only program accessing the
+ # svn repository until cvs2svn is done). But we'll turn no-sync
+ # off in self.finish(), unless instructed otherwise.
+ self.create_options = ['--fs-type=bdb', '--bdb-txn-nosync']
+ else:
+ # User specified something other than bdb.
+ self.create_options = ['--fs-type=%s' % fs_type]
+
+ # Now append the user's explicitly-set create options:
+ self.create_options += create_options
+
+ def check(self):
+ RepositoryOutputOption.check(self)
+ if not Ctx().dry_run and os.path.exists(self.target):
+ raise FatalError("the svn-repos-path '%s' exists.\n"
+ "Remove it, or pass '--existing-svnrepos'."
+ % self.target)
+
+ def setup(self, svn_rev_count):
+ Log().normal("Creating new repository '%s'" % (self.target))
+ if Ctx().dry_run:
+ # Do not actually create repository:
+ pass
+ else:
+ call_command([
+ Ctx().svnadmin_executable, 'create',
+ ] + self.create_options + [
+ self.target
+ ])
+
+ RepositoryOutputOption.setup(self, svn_rev_count)
+
+ def cleanup(self):
+ RepositoryOutputOption.cleanup(self)
+
+ # If this is a BDB repository, and we created the repository, and
+ # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
+ # line in the DB_CONFIG file, because txn syncing should be on by
+ # default in BDB repositories.
+ #
+ # We determine if this is a BDB repository by looking for the
+ # DB_CONFIG file, which doesn't exist in FSFS, rather than by
+ # checking self.fs_type. That way this code will Do The Right
+ # Thing in all circumstances.
+ db_config = os.path.join(self.target, "db/DB_CONFIG")
+ if Ctx().dry_run:
+ # Do not change repository:
+ pass
+ elif not self.bdb_txn_nosync and os.path.exists(db_config):
+ no_sync = 'set_flags DB_TXN_NOSYNC\n'
+
+ contents = open(db_config, 'r').readlines()
+ index = contents.index(no_sync)
+ contents[index] = '# ' + no_sync
+ open(db_config, 'w').writelines(contents)
+
+
+class ExistingRepositoryOutputOption(RepositoryOutputOption):
+ """Output the result of the conversion into an existing SVN repository."""
+
+ def __init__(self, target, author_transforms=None):
+ RepositoryOutputOption.__init__(self, target, author_transforms)
+
+ def check(self):
+ RepositoryOutputOption.check(self)
+ if not os.path.isdir(self.target):
+ raise FatalError("the svn-repos-path '%s' is not an "
+ "existing directory." % self.target)
+
+
diff --git a/cvs2svn_lib/svn_repository_delegate.py b/cvs2svn_lib/svn_repository_delegate.py
new file mode 100644
index 0000000..00c4a01
--- /dev/null
+++ b/cvs2svn_lib/svn_repository_delegate.py
@@ -0,0 +1,121 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNRepositoryDelegate class."""
+
+
+class SVNRepositoryDelegate:
+ """Abstract superclass for any delegate to SVNOutputOption.
+
+ Subclasses must implement all of the methods below.
+
+ For each method, a subclass implements, in its own way, the
+ Subversion operation implied by the method's name. For example, for
+ the add_path method, the DumpfileDelegate would write out a
+ 'Node-add:' command to a Subversion dumpfile, the StdoutDelegate
+ would merely print that the path is being added to the repository,
+ and the RepositoryDelegate would actually cause the path to be added
+ to the Subversion repository that it is creating."""
+
+ def start_commit(self, revnum, revprops):
+ """An SVN commit is starting.
+
+ Perform any actions needed to start an SVN commit with revision
+ number REVNUM and revision properties REVPROPS."""
+
+ raise NotImplementedError()
+
+ def end_commit(self):
+ """An SVN commit is ending."""
+
+ raise NotImplementedError()
+
+ def initialize_project(self, project):
+ """Initialize PROJECT.
+
+ For Subversion, this means to create the trunk, branches, and tags
+ directories for PROJECT."""
+
+ raise NotImplementedError()
+
+ def initialize_lod(self, lod):
+ """Initialize LOD with no contents.
+
+ LOD is an instance of LineOfDevelopment. It is also possible for
+ an LOD to be created by copying from another LOD; such events are
+ indicated via the copy_lod() callback."""
+
+ raise NotImplementedError()
+
+ def mkdir(self, lod, cvs_directory):
+ """Create CVS_DIRECTORY within LOD.
+
+ LOD is a LineOfDevelopment; CVS_DIRECTORY is a CVSDirectory."""
+
+ raise NotImplementedError()
+
+ def add_path(self, s_item):
+ """Add the path corresponding to S_ITEM to the repository.
+
+ S_ITEM is an SVNCommitItem."""
+
+ raise NotImplementedError()
+
+ def change_path(self, s_item):
+ """Change the path corresponding to S_ITEM in the repository.
+
+ S_ITEM is an SVNCommitItem."""
+
+ raise NotImplementedError()
+
+ def delete_lod(self, lod):
+ """Delete LOD from the repository.
+
+ LOD is a LineOfDevelopment instance."""
+
+ raise NotImplementedError()
+
+ def delete_path(self, lod, cvs_path):
+ """Delete CVS_PATH from LOD.
+
+ LOD is a LineOfDevelopment; CVS_PATH is a CVSPath."""
+
+ raise NotImplementedError()
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ """Copy SRC_LOD in SRC_REVNUM to DEST_LOD.
+
+ SRC_LOD and DEST_LOD are both LODs, and SRC_REVNUM is a subversion
+ revision number (int)."""
+
+ raise NotImplementedError()
+
+ def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+ """Copy CVS_PATH in SRC_LOD@SRC_REVNUM to DEST_LOD.
+
+ CVS_PATH is a CVSPath, SRC_LOD and DEST_LOD are LODs, and
+ SRC_REVNUM is a subversion revision number (int)."""
+
+ raise NotImplementedError()
+
+ def finish(self):
+ """All SVN revisions have been committed.
+
+ Perform any necessary cleanup."""
+
+ raise NotImplementedError()
+
+
diff --git a/cvs2svn_lib/svn_revision_range.py b/cvs2svn_lib/svn_revision_range.py
new file mode 100644
index 0000000..04ba7fa
--- /dev/null
+++ b/cvs2svn_lib/svn_revision_range.py
@@ -0,0 +1,171 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNRevisionRange class."""
+
+
+import bisect
+
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+
+
+class SVNRevisionRange:
+ """The range of subversion revision numbers from which a path can be
+ copied. self.opening_revnum is the number of the earliest such
+ revision, and self.closing_revnum is one higher than the number of
+ the last such revision. If self.closing_revnum is None, then no
+ closings were registered."""
+
+ def __init__(self, source_lod, opening_revnum):
+ self.source_lod = source_lod
+ self.opening_revnum = opening_revnum
+ self.closing_revnum = None
+
+ def add_closing(self, closing_revnum):
+ # When we have a non-trunk default branch, we may have multiple
+ # closings--only register the first closing we encounter.
+ if self.closing_revnum is None:
+ self.closing_revnum = closing_revnum
+
+ def __contains__(self, revnum):
+ """Return True iff REVNUM is contained in the range."""
+
+ return (
+ self.opening_revnum <= revnum \
+ and (self.closing_revnum is None or revnum < self.closing_revnum)
+ )
+
+ def __str__(self):
+ if self.closing_revnum is None:
+ return '[%d:]' % (self.opening_revnum,)
+ else:
+ return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
+
+ def __repr__(self):
+ return str(self)
+
+
+class RevisionScores:
+ """Represent the scores for a range of revisions."""
+
+ def __init__(self, svn_revision_ranges):
+ """Initialize based on SVN_REVISION_RANGES.
+
+ SVN_REVISION_RANGES is a list of SVNRevisionRange objects.
+
+ The score of an svn source is defined to be the number of
+ SVNRevisionRanges on that LOD that include the revision. A score
+ thus indicates that copying the corresponding revision (or any
+ following revision up to the next revision in the list) of the
+ object in question would yield that many correct paths at or
+ underneath the object. There may be other paths underneath it
+ that are not correct and would need to be deleted or recopied;
+ those can only be detected by descending and examining their
+ scores.
+
+ If SVN_REVISION_RANGES is empty, then all scores are undefined."""
+
+ deltas_map = {}
+
+ for range in svn_revision_ranges:
+ source_lod = range.source_lod
+ try:
+ deltas = deltas_map[source_lod]
+ except:
+ deltas = []
+ deltas_map[source_lod] = deltas
+ deltas.append((range.opening_revnum, +1))
+ if range.closing_revnum is not None:
+ deltas.append((range.closing_revnum, -1))
+
+ # A map:
+ #
+ # {SOURCE_LOD : [(REV1 SCORE1), (REV2 SCORE2), (REV3 SCORE3), ...]}
+ #
+ # where the tuples are sorted by revision number and the revision
+ # numbers are distinct. Score is the number of correct paths that
+ # would result from using the specified SOURCE_LOD and revision
+ # number (or any other revision preceding the next revision
+ # listed) as a source. For example, the score of any revision REV
+ # in the range REV2 <= REV < REV3 is equal to SCORE2.
+ self._scores_map = {}
+
+ for (source_lod,deltas) in deltas_map.items():
+ # Sort by revision number:
+ deltas.sort()
+
+ # Initialize output list with zeroth element of deltas. This
+ # element must exist, because it was verified that
+ # svn_revision_ranges (and therefore openings) is not empty.
+ scores = [ deltas[0] ]
+ total = deltas[0][1]
+ for (rev, change) in deltas[1:]:
+ total += change
+ if rev == scores[-1][0]:
+ # Same revision as last entry; modify last entry:
+ scores[-1] = (rev, total)
+ else:
+ # Previously-unseen revision; create new entry:
+ scores.append((rev, total))
+ self._scores_map[source_lod] = scores
+
+ def get_score(self, range):
+ """Return the score for RANGE's opening revision.
+
+ If RANGE doesn't appear explicitly in self.scores, use the score
+ of the higest revision preceding RANGE. If there are no preceding
+ revisions, then the score for RANGE is unknown; in this case,
+ return -1."""
+
+ try:
+ scores = self._scores_map[range.source_lod]
+ except KeyError:
+ return -1
+
+ # Remember, according to the tuple sorting rules,
+ #
+ # (revnum, anything,) < (revnum+1,) < (revnum+1, anything,)
+ predecessor_index = bisect.bisect_right(
+ scores, (range.opening_revnum + 1,)
+ ) - 1
+
+ if predecessor_index < 0:
+ return -1
+
+ return scores[predecessor_index][1]
+
+ def get_best_revnum(self):
+ """Find the revnum with the highest score.
+
+ Return (revnum, score) for the revnum with the highest score. If
+ the highest score is shared by multiple revisions, select the
+ oldest revision."""
+
+ best_source_lod = None
+ best_revnum = SVN_INVALID_REVNUM
+ best_score = 0
+
+ source_lods = self._scores_map.keys()
+ source_lods.sort()
+ for source_lod in source_lods:
+ for revnum, score in self._scores_map[source_lod]:
+ if score > best_score:
+ best_source_lod = source_lod
+ best_score = score
+ best_revnum = revnum
+ return best_source_lod, best_revnum, best_score
+
+
diff --git a/cvs2svn_lib/svn_run_options.py b/cvs2svn_lib/svn_run_options.py
new file mode 100644
index 0000000..e757730
--- /dev/null
+++ b/cvs2svn_lib/svn_run_options.py
@@ -0,0 +1,543 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2svn run options."""
+
+
+import sys
+import optparse
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib import config
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.svn_output_option import DumpfileOutputOption
+from cvs2svn_lib.svn_output_option import ExistingRepositoryOutputOption
+from cvs2svn_lib.svn_output_option import NewRepositoryOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.checkout_internal import InternalRevisionRecorder
+from cvs2svn_lib.checkout_internal import InternalRevisionExcluder
+from cvs2svn_lib.checkout_internal import InternalRevisionReader
+from cvs2svn_lib.symbol_strategy import TrunkPathRule
+from cvs2svn_lib.symbol_strategy import BranchesPathRule
+from cvs2svn_lib.symbol_strategy import TagsPathRule
+
+
+short_desc = 'convert a cvs repository into a subversion repository'
+
+synopsis = """\
+.B cvs2svn
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTION CVS-REPOS-PATH\\fR
+.br
+.B cvs2svn
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+long_desc = """\
+Create a new Subversion repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the Subversion
+repository, including such information as date of commit and id of the
+committer.
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert. It is not possible to convert a
+CVS repository to which you only have remote access; see the FAQ for
+more information. This path doesn't have to be the top level
+directory of a CVS repository; it can point at a project within a
+repository, in which case only that project will be converted. This
+path or one of its parent directories has to contain a subdirectory
+called CVSROOT (though the CVSROOT directory can be empty).
+.P
+Multiple CVS repositories can be converted into a single Subversion
+repository in a single run of cvs2svn, but only by using an
+\\fB--options\\fR file.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+ ('cvs', '1'),
+ ('svn', '1'),
+ ('svnadmin', '1'),
+ ]
+
+
+class SVNRunOptions(RunOptions):
+ def _get_output_options_group(self):
+ group = RunOptions._get_output_options_group(self)
+
+ group.add_option(IncompatibleOption(
+ '--svnrepos', '-s', type='string',
+ action='store',
+ help='path where SVN repos should be created',
+ man_help=(
+ 'Write the output of the conversion into a Subversion repository '
+ 'located at \\fIpath\\fR. This option causes a new Subversion '
+ 'repository to be created at \\fIpath\\fR unless the '
+ '\\fB--existing-svnrepos\\fR option is also used.'
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('existing_svnrepos', False)
+ group.add_option(IncompatibleOption(
+ '--existing-svnrepos',
+ action='store_true',
+ help='load into existing SVN repository (for use with --svnrepos)',
+ man_help=(
+ 'Load the converted CVS repository into an existing Subversion '
+ 'repository, instead of creating a new repository. (This option '
+ 'should be used in combination with '
+ '\\fB-s\\fR/\\fB--svnrepos\\fR.) The repository must either be '
+ 'empty or contain no paths that overlap with those that will '
+ 'result from the conversion. Please note that you need write '
+ 'permission for the repository files.'
+ ),
+ ))
+ group.add_option(IncompatibleOption(
+ '--fs-type', type='string',
+ action='store',
+ help=(
+ 'pass --fs-type=TYPE to "svnadmin create" (for use with '
+ '--svnrepos)'
+ ),
+ man_help=(
+ 'Pass \\fI--fs-type\\fR=\\fItype\\fR to "svnadmin create" when '
+ 'creating a new repository.'
+ ),
+ metavar='TYPE',
+ ))
+ self.parser.set_default('bdb_txn_nosync', False)
+ group.add_option(IncompatibleOption(
+ '--bdb-txn-nosync',
+ action='store_true',
+ help=(
+ 'pass --bdb-txn-nosync to "svnadmin create" (for use with '
+ '--svnrepos)'
+ ),
+ man_help=(
+ 'Pass \\fI--bdb-txn-nosync\\fR to "svnadmin create" when '
+ 'creating a new BDB-style Subversion repository.'
+ ),
+ ))
+ self.parser.set_default('create_options', [])
+ group.add_option(IncompatibleOption(
+ '--create-option', type='string',
+ action='append', dest='create_options',
+ help='pass OPT to "svnadmin create" (for use with --svnrepos)',
+ man_help=(
+ 'Pass \\fIopt\\fR to "svnadmin create" when creating a new '
+ 'Subversion repository (can be specified multiple times to '
+ 'pass multiple options).'
+ ),
+ metavar='OPT',
+ ))
+ group.add_option(IncompatibleOption(
+ '--dumpfile', type='string',
+ action='store',
+ help='just produce a dumpfile; don\'t commit to a repos',
+ man_help=(
+ 'Just produce a dumpfile; don\'t commit to an SVN repository. '
+ 'Write the dumpfile to \\fIpath\\fR.'
+ ),
+ metavar='PATH',
+ ))
+
+ group.add_option(ContextOption(
+ '--dry-run',
+ action='store_true',
+ help=(
+ 'do not create a repository or a dumpfile; just print what '
+ 'would happen.'
+ ),
+ man_help=(
+ 'Do not create a repository or a dumpfile; just print the '
+ 'details of what cvs2svn would do if it were really converting '
+ 'your repository.'
+ ),
+ ))
+
+ # Deprecated options:
+ self.parser.set_default('dump_only', False)
+ group.add_option(IncompatibleOption(
+ '--dump-only',
+ action='callback', callback=self.callback_dump_only,
+ help=optparse.SUPPRESS_HELP,
+ man_help=optparse.SUPPRESS_HELP,
+ ))
+ group.add_option(IncompatibleOption(
+ '--create',
+ action='callback', callback=self.callback_create,
+ help=optparse.SUPPRESS_HELP,
+ man_help=optparse.SUPPRESS_HELP,
+ ))
+
+ return group
+
+ def _get_conversion_options_group(self):
+ group = RunOptions._get_conversion_options_group(self)
+
+ self.parser.set_default('trunk_base', config.DEFAULT_TRUNK_BASE)
+ group.add_option(IncompatibleOption(
+ '--trunk', type='string',
+ action='store', dest='trunk_base',
+ help=(
+ 'path for trunk (default: %s)'
+ % (config.DEFAULT_TRUNK_BASE,)
+ ),
+ man_help=(
+ 'Set the top-level path to use for trunk in the Subversion '
+ 'repository. The default is \\fI%s\\fR.'
+ % (config.DEFAULT_TRUNK_BASE,)
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('branches_base', config.DEFAULT_BRANCHES_BASE)
+ group.add_option(IncompatibleOption(
+ '--branches', type='string',
+ action='store', dest='branches_base',
+ help=(
+ 'path for branches (default: %s)'
+ % (config.DEFAULT_BRANCHES_BASE,)
+ ),
+ man_help=(
+ 'Set the top-level path to use for branches in the Subversion '
+ 'repository. The default is \\fI%s\\fR.'
+ % (config.DEFAULT_BRANCHES_BASE,)
+ ),
+ metavar='PATH',
+ ))
+ self.parser.set_default('tags_base', config.DEFAULT_TAGS_BASE)
+ group.add_option(IncompatibleOption(
+ '--tags', type='string',
+ action='store', dest='tags_base',
+ help=(
+ 'path for tags (default: %s)'
+ % (config.DEFAULT_TAGS_BASE,)
+ ),
+ man_help=(
+ 'Set the top-level path to use for tags in the Subversion '
+ 'repository. The default is \\fI%s\\fR.'
+ % (config.DEFAULT_TAGS_BASE,)
+ ),
+ metavar='PATH',
+ ))
+ group.add_option(ContextOption(
+ '--no-prune',
+ action='store_false', dest='prune',
+ help='don\'t prune empty directories',
+ man_help=(
+ 'When all files are deleted from a directory in the Subversion '
+ 'repository, don\'t delete the empty directory (the default is '
+ 'to delete any empty directories).'
+ ),
+ ))
+ group.add_option(ContextOption(
+ '--no-cross-branch-commits',
+ action='store_false', dest='cross_branch_commits',
+ help='prevent the creation of cross-branch commits',
+ man_help=(
+ 'Prevent the creation of commits that affect files on multiple '
+ 'branches at once.'
+ ),
+ ))
+
+ return group
+
+ def _get_extraction_options_group(self):
+ group = RunOptions._get_extraction_options_group(self)
+
+ self.parser.set_default('use_internal_co', False)
+ group.add_option(IncompatibleOption(
+ '--use-internal-co',
+ action='store_true',
+ help=(
+ 'use internal code to extract revision contents '
+ '(fastest but disk space intensive) (default)'
+ ),
+ man_help=(
+ 'Use internal code to extract revision contents. This '
+ 'is up to 50% faster than using \\fB--use-rcs\\fR, but needs '
+ 'a lot of disk space: roughly the size of your CVS repository '
+ 'plus the peak size of a complete checkout of the repository '
+ 'with all branches that existed and still had commits pending '
+ 'at a given time. This option is the default.'
+ ),
+ ))
+ self.parser.set_default('use_cvs', False)
+ group.add_option(IncompatibleOption(
+ '--use-cvs',
+ action='store_true',
+ help=(
+ 'use CVS to extract revision contents (slower than '
+ '--use-internal-co or --use-rcs)'
+ ),
+ man_help=(
+ 'Use CVS to extract revision contents. This option is slower '
+ 'than \\fB--use-internal-co\\fR or \\fB--use-rcs\\fR.'
+ ),
+ ))
+ self.parser.set_default('use_rcs', False)
+ group.add_option(IncompatibleOption(
+ '--use-rcs',
+ action='store_true',
+ help=(
+ 'use RCS to extract revision contents (faster than '
+ '--use-cvs but fails in some cases)'
+ ),
+ man_help=(
+ 'Use RCS \'co\' to extract revision contents. This option is '
+ 'faster than \\fB--use-cvs\\fR but fails in some cases.'
+ ),
+ ))
+
+ return group
+
+ def _get_environment_options_group(self):
+ group = RunOptions._get_environment_options_group(self)
+
+ group.add_option(ContextOption(
+ '--svnadmin', type='string',
+ action='store', dest='svnadmin_executable',
+ help='path to the "svnadmin" program',
+ man_help=(
+ 'Path to the \\fIsvnadmin\\fR program. (\\fIsvnadmin\\fR is '
+ 'needed when the \\fB-s\\fR/\\fB--svnrepos\\fR output option is '
+ 'used.)'
+ ),
+ metavar='PATH',
+ ))
+
+ return group
+
+ def callback_dump_only(self, option, opt_str, value, parser):
+ parser.values.dump_only = True
+ Log().error(
+ warning_prefix +
+ ': The --dump-only option is deprecated (it is implied '
+ 'by --dumpfile).\n'
+ )
+
+ def callback_create(self, option, opt_str, value, parser):
+ Log().error(
+ warning_prefix +
+ ': The behaviour produced by the --create option is now the '
+ 'default;\n'
+ 'passing the option is deprecated.\n'
+ )
+
+ def callback_manpage(self, option, opt_str, value, parser):
+ f = codecs.getwriter('utf_8')(sys.stdout)
+ ManWriter(
+ parser,
+ section='1',
+ date=datetime.date.today(),
+ source='Version %s' % (VERSION,),
+ manual='User Commands',
+ short_desc=short_desc,
+ synopsis=synopsis,
+ long_desc=long_desc,
+ files=files,
+ authors=authors,
+ see_also=see_also,
+ ).write_manpage(f)
+ sys.exit(0)
+
+ def process_extraction_options(self):
+ """Process options related to extracting data from the CVS repository."""
+
+ ctx = Ctx()
+ options = self.options
+
+ not_both(options.use_rcs, '--use-rcs',
+ options.use_cvs, '--use-cvs')
+
+ not_both(options.use_rcs, '--use-rcs',
+ options.use_internal_co, '--use-internal-co')
+
+ not_both(options.use_cvs, '--use-cvs',
+ options.use_internal_co, '--use-internal-co')
+
+ if options.use_rcs:
+ ctx.revision_recorder = NullRevisionRecorder()
+ ctx.revision_excluder = NullRevisionExcluder()
+ ctx.revision_reader = RCSRevisionReader(options.co_executable)
+ elif options.use_cvs:
+ ctx.revision_recorder = NullRevisionRecorder()
+ ctx.revision_excluder = NullRevisionExcluder()
+ ctx.revision_reader = CVSRevisionReader(options.cvs_executable)
+ else:
+ # --use-internal-co is the default:
+ ctx.revision_recorder = InternalRevisionRecorder(compress=True)
+ ctx.revision_excluder = InternalRevisionExcluder()
+ ctx.revision_reader = InternalRevisionReader(compress=True)
+
+ def process_output_options(self):
+ """Process the options related to SVN output."""
+
+ ctx = Ctx()
+ options = self.options
+
+ if options.dump_only and not options.dumpfile:
+ raise FatalError("'--dump-only' requires '--dumpfile' to be specified.")
+
+ if not options.svnrepos and not options.dumpfile and not ctx.dry_run:
+ raise FatalError("must pass one of '-s' or '--dumpfile'.")
+
+ not_both(options.svnrepos, '-s',
+ options.dumpfile, '--dumpfile')
+
+ not_both(options.dumpfile, '--dumpfile',
+ options.existing_svnrepos, '--existing-svnrepos')
+
+ not_both(options.bdb_txn_nosync, '--bdb-txn-nosync',
+ options.existing_svnrepos, '--existing-svnrepos')
+
+ not_both(options.dumpfile, '--dumpfile',
+ options.bdb_txn_nosync, '--bdb-txn-nosync')
+
+ not_both(options.fs_type, '--fs-type',
+ options.existing_svnrepos, '--existing-svnrepos')
+
+ if (
+ options.fs_type
+ and options.fs_type != 'bdb'
+ and options.bdb_txn_nosync
+ ):
+ raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
+ % options.fs_type)
+
+ if options.svnrepos:
+ if options.existing_svnrepos:
+ ctx.output_option = ExistingRepositoryOutputOption(options.svnrepos)
+ else:
+ ctx.output_option = NewRepositoryOutputOption(
+ options.svnrepos,
+ fs_type=options.fs_type, bdb_txn_nosync=options.bdb_txn_nosync,
+ create_options=options.create_options)
+ else:
+ ctx.output_option = DumpfileOutputOption(options.dumpfile)
+
+ def add_project(
+ self,
+ project_cvs_repos_path,
+ trunk_path=None, branches_path=None, tags_path=None,
+ initial_directories=[],
+ symbol_transforms=None,
+ symbol_strategy_rules=[],
+ ):
+ """Add a project to be converted.
+
+ Most arguments are passed straight through to the Project
+ constructor. SYMBOL_STRATEGY_RULES is an iterable of
+ SymbolStrategyRules that will be applied to symbols in this
+ project."""
+
+ if trunk_path is not None:
+ trunk_path = normalize_svn_path(trunk_path, allow_empty=True)
+ if branches_path is not None:
+ branches_path = normalize_svn_path(branches_path, allow_empty=False)
+ if tags_path is not None:
+ tags_path = normalize_svn_path(tags_path, allow_empty=False)
+
+ initial_directories = [
+ path
+ for path in [trunk_path, branches_path, tags_path]
+ if path
+ ] + [
+ normalize_svn_path(path)
+ for path in initial_directories
+ ]
+
+ symbol_strategy_rules = list(symbol_strategy_rules)
+
+ # Add rules to set the SVN paths for LODs depending on whether
+ # they are the trunk, tags, or branches:
+ if trunk_path is not None:
+ symbol_strategy_rules.append(TrunkPathRule(trunk_path))
+ if branches_path is not None:
+ symbol_strategy_rules.append(BranchesPathRule(branches_path))
+ if tags_path is not None:
+ symbol_strategy_rules.append(TagsPathRule(tags_path))
+
+ id = len(self.projects)
+ project = Project(
+ id,
+ project_cvs_repos_path,
+ initial_directories=initial_directories,
+ symbol_transforms=symbol_transforms,
+ )
+
+ self.projects.append(project)
+ self.project_symbol_strategy_rules.append(symbol_strategy_rules)
+
+ def clear_projects(self):
+ """Clear the list of projects to be converted.
+
+ This method is for the convenience of options files, which may
+ want to import one another."""
+
+ del self.projects[:]
+ del self.project_symbol_strategy_rules[:]
+
+ def process_options(self):
+ # Consistency check for options and arguments.
+ if len(self.args) == 0:
+ self.usage()
+ sys.exit(1)
+
+ if len(self.args) > 1:
+ Log().error(error_prefix + ": must pass only one CVS repository.\n")
+ self.usage()
+ sys.exit(1)
+
+ cvsroot = self.args[0]
+
+ self.process_extraction_options()
+ self.process_output_options()
+ self.process_symbol_strategy_options()
+ self.process_property_setter_options()
+
+ # Create the default project (using ctx.trunk, ctx.branches, and
+ # ctx.tags):
+ self.add_project(
+ cvsroot,
+ trunk_path=self.options.trunk_base,
+ branches_path=self.options.branches_base,
+ tags_path=self.options.tags_base,
+ symbol_transforms=self.options.symbol_transforms,
+ symbol_strategy_rules=self.options.symbol_strategy_rules,
+ )
+
+
diff --git a/cvs2svn_lib/symbol.py b/cvs2svn_lib/symbol.py
new file mode 100644
index 0000000..e3a6b35
--- /dev/null
+++ b/cvs2svn_lib/symbol.py
@@ -0,0 +1,246 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that represent trunk, branches, and tags.
+
+The classes in this module represent several concepts related to
+symbols and lines of development in the abstract; that is, not within
+a particular file, but across all files in a project.
+
+The classes in this module are organized into the following class
+hierarchy:
+
+AbstractSymbol
+ |
+ +--LineOfDevelopment
+ | |
+ | +--Trunk
+ | |
+ | +--IncludedSymbol (also inherits from TypedSymbol)
+ | |
+ | +--Branch
+ | |
+ | +--Tag
+ |
+ +--Symbol
+ |
+ +--TypedSymbol
+ |
+ +--IncludedSymbol (also inherits from LineOfDevelopment)
+ | |
+ | +--Branch
+ | |
+ | +--Tag
+ |
+ +--ExcludedSymbol
+
+Please note the use of multiple inheritance.
+
+All AbstractSymbols contain an id that is globally unique across all
+AbstractSymbols. Moreover, the id of an AbstractSymbol remains the
+same even if the symbol is mutated (as described below), and two
+AbstractSymbols are considered equal iff their ids are the same, even
+if the two instances have different types. Symbols in different
+projects always have different ids and are therefore always distinct.
+(Indeed, this is pretty much the defining characteristic of a
+project.) Even if, for example, two projects each have branches with
+the same name, the Symbols representing the branches are distinct and
+have distinct ids. (This is important to avoid having to rewrite
+databases with new symbol ids in CollateSymbolsPass.)
+
+AbstractSymbols are all initially created in CollectRevsPass as either
+Trunk or Symbol instances. A Symbol instance is essentially an
+undifferentiated Symbol.
+
+In CollateSymbolsPass, it is decided which symbols will be converted
+as branches, which as tags, and which excluded altogether. At the
+beginning of this pass, the symbols are all represented by instances
+of the non-specific Symbol class. During CollateSymbolsPass, each
+Symbol instance is replaced by an instance of Branch, Tag, or
+ExcludedSymbol with the same id. (Trunk instances are left
+unchanged.) At the end of CollateSymbolsPass, all ExcludedSymbols are
+discarded and processing continues with only Trunk, Branch, and Tag
+instances. These three classes inherit from LineOfDevelopment;
+therefore, in later passes the term LineOfDevelopment (abbreviated to
+LOD) is used to refer to such objects."""
+
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import path_join
+
+
+class AbstractSymbol:
+ """Base class for all other classes in this file."""
+
+ def __init__(self, id, project):
+ self.id = id
+ self.project = project
+
+ def __hash__(self):
+ return self.id
+
+ def __eq__(self, other):
+ return self.id == other.id
+
+
+class LineOfDevelopment(AbstractSymbol):
+ """Base class for Trunk, Branch, and Tag.
+
+ This is basically the abstraction for what will be a root tree in
+ the Subversion repository."""
+
+ def __init__(self, id, project):
+ AbstractSymbol.__init__(self, id, project)
+ self.base_path = None
+
+ def get_path(self, *components):
+ """Return the svn path for this LineOfDevelopment."""
+
+ return path_join(self.base_path, *components)
+
+
+class Trunk(LineOfDevelopment):
+ """Represent the main line of development."""
+
+ def __getstate__(self):
+ return (self.id, self.project.id, self.base_path,)
+
+ def __setstate__(self, state):
+ (self.id, project_id, self.base_path,) = state
+ self.project = Ctx()._projects[project_id]
+
+ def __cmp__(self, other):
+ if isinstance(other, Trunk):
+ return cmp(self.project, other.project)
+ elif isinstance(other, Symbol):
+ # Allow Trunk to compare less than Symbols:
+ return -1
+ else:
+ raise NotImplementedError()
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return 'Trunk'
+
+ def __repr__(self):
+ return '%s<%x>' % (self, self.id,)
+
+
+class Symbol(AbstractSymbol):
+ """Represents a symbol within one project in the CVS repository.
+
+ Instance of the Symbol class itself are used to represent symbols
+ from the CVS repository. CVS, of course, distinguishes between
+ normal tags and branch tags, but we allow symbol types to be changed
+ in CollateSymbolsPass. Therefore, we store all CVS symbols as
+ Symbol instances at the beginning of the conversion.
+
+ In CollateSymbolsPass, Symbols are replaced by Branches, Tags, and
+ ExcludedSymbols (the latter being discarded at the end of that
+ pass)."""
+
+ def __init__(self, id, project, name, preferred_parent_id=None):
+ AbstractSymbol.__init__(self, id, project)
+ self.name = name
+
+ # If this symbol has a preferred parent, this member is the id of
+ # the LineOfDevelopment instance representing it. If the symbol
+ # never appeared in a CVSTag or CVSBranch (for example, because
+ # all of the branches on this LOD have been detached from the
+ # dependency tree), then this field is set to None. This field is
+ # set during FilterSymbolsPass.
+ self.preferred_parent_id = preferred_parent_id
+
+ def __getstate__(self):
+ return (self.id, self.project.id, self.name, self.preferred_parent_id,)
+
+ def __setstate__(self, state):
+ (self.id, project_id, self.name, self.preferred_parent_id,) = state
+ self.project = Ctx()._projects[project_id]
+
+ def __cmp__(self, other):
+ if isinstance(other, Symbol):
+ return cmp(self.project, other.project) \
+ or cmp(self.name, other.name) \
+ or cmp(self.id, other.id)
+ elif isinstance(other, Trunk):
+ # Allow Symbols to compare greater than Trunk:
+ return +1
+ else:
+ raise NotImplementedError()
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return '%s<%x>' % (self, self.id,)
+
+
+class TypedSymbol(Symbol):
+ """A Symbol whose type (branch, tag, or excluded) has been decided."""
+
+ def __init__(self, symbol):
+ Symbol.__init__(
+ self, symbol.id, symbol.project, symbol.name,
+ symbol.preferred_parent_id,
+ )
+
+
+class IncludedSymbol(TypedSymbol, LineOfDevelopment):
+ """A TypedSymbol that will be included in the conversion."""
+
+ def __init__(self, symbol):
+ TypedSymbol.__init__(self, symbol)
+ # We can't call the LineOfDevelopment constructor, so initialize
+ # its extra member explicitly:
+ try:
+ # If the old symbol had a base_path set, then use it:
+ self.base_path = symbol.base_path
+ except AttributeError:
+ self.base_path = None
+
+ def __getstate__(self):
+ return (TypedSymbol.__getstate__(self), self.base_path,)
+
+ def __setstate__(self, state):
+ (super_state, self.base_path,) = state
+ TypedSymbol.__setstate__(self, super_state)
+
+
+class Branch(IncludedSymbol):
+ """An object that describes a CVS branch."""
+
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return 'Branch(%r)' % (self.name,)
+
+
+class Tag(IncludedSymbol):
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return 'Tag(%r)' % (self.name,)
+
+
+class ExcludedSymbol(TypedSymbol):
+ def __str__(self):
+ """For convenience only. The format is subject to change at any time."""
+
+ return 'ExcludedSymbol(%r)' % (self.name,)
+
+
diff --git a/cvs2svn_lib/symbol_database.py b/cvs2svn_lib/symbol_database.py
new file mode 100644
index 0000000..824f97b
--- /dev/null
+++ b/cvs2svn_lib/symbol_database.py
@@ -0,0 +1,68 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SymbolDatabase class."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class SymbolDatabase:
+ """Read-only access to symbol database.
+
+ This class allows iteration and lookups id -> symbol, where symbol
+ is a TypedSymbol instance. The whole database is read into memory
+ upon construction."""
+
+ def __init__(self):
+ # A map { id : TypedSymbol }
+ self._symbols = {}
+
+ f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'rb')
+ symbols = cPickle.load(f)
+ f.close()
+ for symbol in symbols:
+ self._symbols[symbol.id] = symbol
+
+ def get_symbol(self, id):
+ """Return the symbol instance with id ID.
+
+ Raise KeyError if the symbol is not known."""
+
+ return self._symbols[id]
+
+ def __iter__(self):
+ """Iterate over the Symbol instances within this database."""
+
+ return self._symbols.itervalues()
+
+ def close(self):
+ self._symbols = None
+
+
+def create_symbol_database(symbols):
+ """Create and fill a symbol database.
+
+ Record each symbol that is listed in SYMBOLS, which is an iterable
+ containing Trunk and TypedSymbol objects."""
+
+ f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'wb')
+ cPickle.dump(symbols, f, -1)
+ f.close()
+
diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py
new file mode 100644
index 0000000..0d35a50
--- /dev/null
+++ b/cvs2svn_lib/symbol_statistics.py
@@ -0,0 +1,521 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module gathers and processes statistics about lines of development."""
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import IncludedSymbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+
+
+class SymbolPlanError(FatalException):
+ pass
+
+
+class SymbolPlanException(SymbolPlanError):
+ def __init__(self, stats, symbol, msg):
+ self.stats = stats
+ self.symbol = symbol
+ SymbolPlanError.__init__(
+ self,
+ 'Cannot convert the following symbol to %s: %s\n %s'
+ % (symbol, msg, self.stats,)
+ )
+
+
+class IndeterminateSymbolException(SymbolPlanException):
+ def __init__(self, stats, symbol):
+ SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type')
+
+
+class _Stats:
+ """A summary of information about a symbol (tag or branch).
+
+ Members:
+
+ lod -- the LineOfDevelopment instance of the lod being described
+
+ tag_create_count -- the number of files in which this lod appears
+ as a tag
+
+ branch_create_count -- the number of files in which this lod
+ appears as a branch
+
+ branch_commit_count -- the number of files in which there were
+ commits on this lod
+
+ trivial_import_count -- the number of files in which this branch
+ was purely a non-trunk default branch containing exactly one
+ revision.
+
+ pure_ntdb_count -- the number of files in which this branch was
+ purely a non-trunk default branch (consisting only of
+ non-trunk default branch revisions).
+
+ branch_blockers -- a set of Symbol instances for any symbols that
+ sprout from a branch with this name.
+
+ possible_parents -- a map {LineOfDevelopment : count} indicating
+ in how many files each LOD could have served as the parent of
+ self.lod."""
+
+ def __init__(self, lod):
+ self.lod = lod
+ self.tag_create_count = 0
+ self.branch_create_count = 0
+ self.branch_commit_count = 0
+ self.branch_blockers = set()
+ self.trivial_import_count = 0
+ self.pure_ntdb_count = 0
+ self.possible_parents = { }
+
+ def register_tag_creation(self):
+ """Register the creation of this lod as a tag."""
+
+ self.tag_create_count += 1
+
+ def register_branch_creation(self):
+ """Register the creation of this lod as a branch."""
+
+ self.branch_create_count += 1
+
+ def register_branch_commit(self):
+ """Register that there were commit(s) on this branch in one file."""
+
+ self.branch_commit_count += 1
+
+ def register_branch_blocker(self, blocker):
+ """Register BLOCKER as preventing this symbol from being deleted.
+
+ BLOCKER is a tag or a branch that springs from a revision on this
+ symbol."""
+
+ self.branch_blockers.add(blocker)
+
+ def register_trivial_import(self):
+ """Register that this branch is a trivial import branch in one file."""
+
+ self.trivial_import_count += 1
+
+ def register_pure_ntdb(self):
+ """Register that this branch is a pure import branch in one file."""
+
+ self.pure_ntdb_count += 1
+
+ def register_possible_parent(self, lod):
+ """Register that LOD was a possible parent for SELF.lod in a file."""
+
+ self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1
+
+ def register_branch_possible_parents(self, cvs_branch, cvs_file_items):
+ """Register any possible parents of this symbol from CVS_BRANCH."""
+
+ # This routine is a bottleneck. So we define some local variables
+ # to speed up access to frequently-needed variables.
+ register = self.register_possible_parent
+ parent_cvs_rev = cvs_file_items[cvs_branch.source_id]
+
+ # The "obvious" parent of a branch is the branch holding the
+ # revision where the branch is rooted:
+ register(parent_cvs_rev.lod)
+
+ # Any other branches that are rooted at the same revision and
+ # were committed earlier than the branch are also possible
+ # parents:
+ symbol = cvs_branch.symbol
+ for branch_id in parent_cvs_rev.branch_ids:
+ parent_symbol = cvs_file_items[branch_id].symbol
+ # A branch cannot be its own parent, nor can a branch's
+ # parent be a branch that was created after it. So we stop
+ # iterating when we reached the branch whose parents we are
+ # collecting:
+ if parent_symbol == symbol:
+ break
+ register(parent_symbol)
+
+ def register_tag_possible_parents(self, cvs_tag, cvs_file_items):
+ """Register any possible parents of this symbol from CVS_TAG."""
+
+ # This routine is a bottleneck. So use local variables to speed
+ # up access to frequently-needed objects.
+ register = self.register_possible_parent
+ parent_cvs_rev = cvs_file_items[cvs_tag.source_id]
+
+ # The "obvious" parent of a tag is the branch holding the
+ # revision where the branch is rooted:
+ register(parent_cvs_rev.lod)
+
+ # Branches that are rooted at the same revision are also
+ # possible parents:
+ for branch_id in parent_cvs_rev.branch_ids:
+ parent_symbol = cvs_file_items[branch_id].symbol
+ register(parent_symbol)
+
+ def is_ghost(self):
+ """Return True iff this lod never really existed."""
+
+ return (
+ not isinstance(self.lod, Trunk)
+ and self.branch_commit_count == 0
+ and not self.branch_blockers
+ and not self.possible_parents
+ )
+
+ def check_valid(self, symbol):
+ """Check whether SYMBOL is a valid conversion of SELF.lod.
+
+ It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL
+ is a TypedSymbol and that the information that it contains is
+ consistent with that stored in SELF.lod. (This routine does not
+ do higher-level tests of whether the chosen conversion is actually
+ sensible.) If there are any problems, raise a
+ SymbolPlanException."""
+
+ if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)):
+ raise IndeterminateSymbolException(self, symbol)
+
+ if symbol.id != self.lod.id:
+ raise SymbolPlanException(self, symbol, 'IDs must match')
+
+ if symbol.project != self.lod.project:
+ raise SymbolPlanException(self, symbol, 'Projects must match')
+
+ if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name:
+ raise SymbolPlanException(self, symbol, 'Names must match')
+
+ def check_preferred_parent_allowed(self, symbol):
+ """Check that SYMBOL's preferred_parent_id is an allowed parent.
+
+ SYMBOL is the planned conversion of SELF.lod. Verify that its
+ preferred_parent_id is a possible parent of SELF.lod. If not,
+ raise a SymbolPlanException describing the problem."""
+
+ if isinstance(symbol, IncludedSymbol) \
+ and symbol.preferred_parent_id is not None:
+ for pp in self.possible_parents.keys():
+ if pp.id == symbol.preferred_parent_id:
+ return
+ else:
+ raise SymbolPlanException(
+ self, symbol,
+ 'The selected parent is not among the symbol\'s '
+ 'possible parents.'
+ )
+
+ def __str__(self):
+ return (
+ '\'%s\' is '
+ 'a tag in %d files, '
+ 'a branch in %d files, '
+ 'a trivial import in %d files, '
+ 'a pure import in %d files, '
+ 'and has commits in %d files'
+ % (self.lod, self.tag_create_count, self.branch_create_count,
+ self.trivial_import_count, self.pure_ntdb_count,
+ self.branch_commit_count)
+ )
+
+ def __repr__(self):
+ retval = ['%s\n possible parents:\n' % (self,)]
+ parent_counts = self.possible_parents.items()
+ parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
+ for (symbol, count) in parent_counts:
+ if isinstance(symbol, Trunk):
+ retval.append(' trunk : %d\n' % count)
+ else:
+ retval.append(' \'%s\' : %d\n' % (symbol.name, count))
+ if self.branch_blockers:
+ blockers = list(self.branch_blockers)
+ blockers.sort()
+ retval.append(' blockers:\n')
+ for blocker in blockers:
+ retval.append(' \'%s\'\n' % (blocker,))
+ return ''.join(retval)
+
+
+class SymbolStatisticsCollector:
+ """Collect statistics about lines of development.
+
+ Record a summary of information about each line of development in
+ the RCS files for later storage into a database. The database is
+ created in CollectRevsPass and it is used in CollateSymbolsPass (via
+ the SymbolStatistics class).
+
+ collect_data._SymbolDataCollector inserts information into instances
+ of this class by by calling its register_*() methods.
+
+ Its main purpose is to assist in the decisions about which symbols
+ can be treated as branches and tags and which may be excluded.
+
+ The data collected by this class can be written to the file
+ config.SYMBOL_STATISTICS."""
+
+ def __init__(self):
+ # A map { lod -> _Stats } for all lines of development:
+ self._stats = { }
+
+ def __getitem__(self, lod):
+ """Return the _Stats record for line of development LOD.
+
+ Create and register a new one if necessary."""
+
+ try:
+ return self._stats[lod]
+ except KeyError:
+ stats = _Stats(lod)
+ self._stats[lod] = stats
+ return stats
+
+ def register(self, cvs_file_items):
+ """Register the statistics for each symbol in CVS_FILE_ITEMS."""
+
+ for lod_items in cvs_file_items.iter_lods():
+ if lod_items.lod is not None:
+ branch_stats = self[lod_items.lod]
+
+ branch_stats.register_branch_creation()
+
+ if lod_items.cvs_revisions:
+ branch_stats.register_branch_commit()
+
+ if lod_items.is_trivial_import():
+ branch_stats.register_trivial_import()
+
+ if lod_items.is_pure_ntdb():
+ branch_stats.register_pure_ntdb()
+
+ for cvs_symbol in lod_items.iter_blockers():
+ branch_stats.register_branch_blocker(cvs_symbol.symbol)
+
+ if lod_items.cvs_branch is not None:
+ branch_stats.register_branch_possible_parents(
+ lod_items.cvs_branch, cvs_file_items
+ )
+
+ for cvs_tag in lod_items.cvs_tags:
+ tag_stats = self[cvs_tag.symbol]
+
+ tag_stats.register_tag_creation()
+
+ tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items)
+
+ def purge_ghost_symbols(self):
+ """Purge any symbols that don't have any activity.
+
+ Such ghost symbols can arise if a symbol was defined in an RCS
+ file but pointed at a non-existent revision."""
+
+ for stats in self._stats.values():
+ if stats.is_ghost():
+ Log().warn('Deleting ghost symbol: %s' % (stats.lod,))
+ del self._stats[stats.lod]
+
+ def close(self):
+ """Store the stats database to the SYMBOL_STATISTICS file."""
+
+ f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb')
+ cPickle.dump(self._stats.values(), f, -1)
+ f.close()
+ self._stats = None
+
+
+class SymbolStatistics:
+ """Read and handle line of development statistics.
+
+ The statistics are read from a database created by
+ SymbolStatisticsCollector. This class has methods to process the
+ statistics information and help with decisions about:
+
+ 1. What tags and branches should be processed/excluded
+
+ 2. What tags should be forced to be branches and vice versa (this
+ class maintains some statistics to help the user decide)
+
+ 3. Are there inconsistencies?
+
+ - A symbol that is sometimes a branch and sometimes a tag
+
+ - A forced branch with commit(s) on it
+
+ - A non-excluded branch depends on an excluded branch
+
+ The data in this class is read from a pickle file."""
+
+ def __init__(self, filename):
+ """Read the stats database from FILENAME."""
+
+ # A map { LineOfDevelopment -> _Stats } for all lines of
+ # development:
+ self._stats = { }
+
+ # A map { LineOfDevelopment.id -> _Stats } for all lines of
+ # development:
+ self._stats_by_id = { }
+
+ stats_list = cPickle.load(open(filename, 'rb'))
+
+ for stats in stats_list:
+ self._stats[stats.lod] = stats
+ self._stats_by_id[stats.lod.id] = stats
+
+ def __len__(self):
+ return len(self._stats)
+
+ def __getitem__(self, lod_id):
+ return self._stats_by_id[lod_id]
+
+ def get_stats(self, lod):
+ """Return the _Stats object for LineOfDevelopment instance LOD.
+
+ Raise KeyError if no such lod exists."""
+
+ return self._stats[lod]
+
+ def __iter__(self):
+ return self._stats.itervalues()
+
+ def _check_blocked_excludes(self, symbol_map):
+ """Check for any excluded LODs that are blocked by non-excluded symbols.
+
+ If any are found, describe the problem to Log().error() and raise
+ a FatalException."""
+
+ # A list of (lod,[blocker,...]) tuples for excludes that are
+ # blocked by the specified non-excluded blockers:
+ problems = []
+
+ for lod in symbol_map.itervalues():
+ if isinstance(lod, ExcludedSymbol):
+ # Symbol is excluded; make sure that its blockers are also
+ # excluded:
+ lod_blockers = []
+ for blocker in self.get_stats(lod).branch_blockers:
+ if isinstance(symbol_map.get(blocker, None), IncludedSymbol):
+ lod_blockers.append(blocker)
+ if lod_blockers:
+ problems.append((lod, lod_blockers))
+
+ if problems:
+ s = []
+ for (lod, lod_blockers) in problems:
+ s.append(
+ '%s: %s cannot be excluded because the following symbols '
+ 'depend on it:\n'
+ % (error_prefix, lod,)
+ )
+ for blocker in lod_blockers:
+ s.append(' %s\n' % (blocker,))
+ s.append('\n')
+ Log().error(''.join(s))
+
+ raise FatalException()
+
+ def _check_invalid_tags(self, symbol_map):
+ """Check for commits on any symbols that are to be converted as tags.
+
+ SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
+ indicating how each AbstractSymbol is to be converted. If there
+ is a commit on a symbol, then it cannot be converted as a tag. If
+ any tags with commits are found, output error messages describing
+ the problems then raise a FatalException."""
+
+ Log().quiet("Checking for forced tags with commits...")
+
+ invalid_tags = [ ]
+ for symbol in symbol_map.itervalues():
+ if isinstance(symbol, Tag):
+ stats = self.get_stats(symbol)
+ if stats.branch_commit_count > 0:
+ invalid_tags.append(symbol)
+
+ if not invalid_tags:
+ # No problems found:
+ return
+
+ s = []
+ s.append(
+ '%s: The following branches cannot be forced to be tags '
+ 'because they have commits:\n'
+ % (error_prefix,)
+ )
+ for tag in invalid_tags:
+ s.append(' %s\n' % (tag.name))
+ s.append('\n')
+ Log().error(''.join(s))
+
+ raise FatalException()
+
+ def check_consistency(self, symbol_map):
+ """Check the plan for how to convert symbols for consistency.
+
+ SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
+ indicating how each AbstractSymbol is to be converted. If any
+ problems are detected, describe the problem to Log().error() and
+ raise a FatalException."""
+
+ # We want to do all of the consistency checks even if one of them
+ # fails, so that the user gets as much feedback as possible. Set
+ # this variable to True if any errors are found.
+ error_found = False
+
+ # Check that the planned preferred parents are OK for all
+ # IncludedSymbols:
+ for lod in symbol_map.itervalues():
+ if isinstance(lod, IncludedSymbol):
+ stats = self.get_stats(lod)
+ try:
+ stats.check_preferred_parent_allowed(lod)
+ except SymbolPlanException, e:
+ Log().error('%s\n' % (e,))
+ error_found = True
+
+ try:
+ self._check_blocked_excludes(symbol_map)
+ except FatalException:
+ error_found = True
+
+ try:
+ self._check_invalid_tags(symbol_map)
+ except FatalException:
+ error_found = True
+
+ if error_found:
+ raise FatalException(
+ 'Please fix the above errors and restart CollateSymbolsPass'
+ )
+
+ def exclude_symbol(self, symbol):
+ """SYMBOL has been excluded; remove it from our statistics."""
+
+ del self._stats[symbol]
+ del self._stats_by_id[symbol.id]
+
+ # Remove references to this symbol from other statistics objects:
+ for stats in self._stats.itervalues():
+ stats.branch_blockers.discard(symbol)
+ if symbol in stats.possible_parents:
+ del stats.possible_parents[symbol]
+
+
diff --git a/cvs2svn_lib/symbol_strategy.py b/cvs2svn_lib/symbol_strategy.py
new file mode 100644
index 0000000..9d562a8
--- /dev/null
+++ b/cvs2svn_lib/symbol_strategy.py
@@ -0,0 +1,685 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""SymbolStrategy classes determine how to convert symbols."""
+
+import re
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import path_join
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import TypedSymbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.symbol_statistics import SymbolPlanError
+
+
+class StrategyRule:
+ """A single rule that might determine how to convert a symbol."""
+
+ def start(self, symbol_statistics):
+ """This method is called once before get_symbol() is ever called.
+
+ The StrategyRule can override this method to do whatever it wants
+ to prepare itself for work. SYMBOL_STATISTICS is an instance of
+ SymbolStatistics containing the statistics for all symbols in all
+ projects."""
+
+ pass
+
+ def get_symbol(self, symbol, stats):
+ """Return an object describing what to do with the symbol in STATS.
+
+ SYMBOL holds a Trunk or Symbol object as it has been determined so
+ far. Hopefully one of these method calls will turn any naked
+ Symbol instances into TypedSymbols.
+
+ If this rule applies to the SYMBOL (whose statistics are collected
+ in STATS), then return a new or modified AbstractSymbol object.
+ If this rule doesn't apply, return SYMBOL unchanged."""
+
+ raise NotImplementedError()
+
+ def finish(self):
+ """This method is called once after get_symbol() is done being called.
+
+ The StrategyRule can override this method do whatever it wants to
+ release resources, etc."""
+
+ pass
+
+
+class _RegexpStrategyRule(StrategyRule):
+ """A Strategy rule that bases its decisions on regexp matches.
+
+ If self.regexp matches a symbol name, return self.action(symbol);
+ otherwise, return the symbol unchanged."""
+
+ def __init__(self, pattern, action):
+ """Initialize a _RegexpStrategyRule.
+
+ PATTERN is a string that will be treated as a regexp pattern.
+ PATTERN must match a full symbol name for the rule to apply (i.e.,
+ it is anchored at the beginning and end of the symbol name).
+
+ ACTION is the class representing how the symbol should be
+ converted. It should be one of the classes Branch, Tag, or
+ ExcludedSymbol.
+
+ If PATTERN matches a symbol name, then get_symbol() returns
+ ACTION(name, id); otherwise it returns SYMBOL unchanged."""
+
+ try:
+ self.regexp = re.compile('^' + pattern + '$')
+ except re.error:
+ raise FatalError("%r is not a valid regexp." % (pattern,))
+
+ self.action = action
+
+ def log(self, symbol):
+ raise NotImplementedError()
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif self.regexp.match(symbol.name):
+ self.log(symbol)
+ return self.action(symbol)
+ else:
+ return symbol
+
+
+class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
+ """Force symbols matching pattern to be branches."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, Branch)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Converting symbol %s as a branch because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
+ """Force symbols matching pattern to be tags."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, Tag)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Converting symbol %s as a tag because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
+ """Exclude symbols matching pattern."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Excluding symbol %s because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ExcludeTrivialImportBranchRule(StrategyRule):
+ """If a symbol is a trivial import branch, exclude it.
+
+ A trivial import branch is defined to be a branch that only had a
+ single import on it (no other kinds of commits) in every file in
+ which it appeared. In most cases these branches are worthless."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ if stats.tag_create_count == 0 \
+ and stats.branch_create_count == stats.trivial_import_count:
+ Log().verbose(
+ 'Excluding branch %s because it is a trivial import branch.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+ else:
+ return symbol
+
+
+class ExcludeVendorBranchRule(StrategyRule):
+ """If a symbol is a pure vendor branch, exclude it.
+
+ A pure vendor branch is defined to be a branch that only had imports
+ on it (no other kinds of commits) in every file in which it
+ appeared."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ if stats.tag_create_count == 0 \
+ and stats.branch_create_count == stats.pure_ntdb_count:
+ Log().verbose(
+ 'Excluding branch %s because it is a pure vendor branch.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+ else:
+ return symbol
+
+
+class UnambiguousUsageRule(StrategyRule):
+ """If a symbol is used unambiguously as a tag/branch, convert it as such."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ is_tag = stats.tag_create_count > 0
+ is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
+ if is_tag and is_branch:
+ # Can't decide
+ return symbol
+ elif is_branch:
+ Log().verbose(
+ 'Converting symbol %s as a branch because it is always used '
+ 'as a branch.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+ elif is_tag:
+ Log().verbose(
+ 'Converting symbol %s as a tag because it is always used '
+ 'as a tag.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+ else:
+ # The symbol didn't appear at all:
+ return symbol
+
+
+class BranchIfCommitsRule(StrategyRule):
+ """If there was ever a commit on the symbol, convert it as a branch."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif stats.branch_commit_count > 0:
+ Log().verbose(
+ 'Converting symbol %s as a branch because there are commits on it.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+ else:
+ return symbol
+
+
+class HeuristicStrategyRule(StrategyRule):
+ """Convert symbol based on how often it was used as a branch/tag.
+
+ Whichever happened more often determines how the symbol is
+ converted."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif stats.tag_create_count >= stats.branch_create_count:
+ Log().verbose(
+ 'Converting symbol %s as a tag because it is more often used '
+ 'as a tag.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a branch because it is more often used '
+ 'as a branch.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+class AllBranchRule(StrategyRule):
+ """Convert all symbols as branches.
+
+ Usually this rule will appear after a list of more careful rules
+ (including a general rule like UnambiguousUsageRule) and will
+ therefore only apply to the symbols not handled earlier."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a branch because no other rules applied.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+class AllTagRule(StrategyRule):
+ """Convert all symbols as tags.
+
+ We don't worry about conflicts here; they will be caught later by
+ SymbolStatistics.check_consistency().
+
+ Usually this rule will appear after a list of more careful rules
+ (including a general rule like UnambiguousUsageRule) and will
+ therefore only apply to the symbols not handled earlier."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a tag because no other rules applied.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+
+
+class TrunkPathRule(StrategyRule):
+ """Set the base path for Trunk."""
+
+ def __init__(self, trunk_path):
+ self.trunk_path = trunk_path
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, Trunk) and symbol.base_path is None:
+ symbol.base_path = self.trunk_path
+
+ return symbol
+
+
+class SymbolPathRule(StrategyRule):
+ """Set the base paths for symbol LODs."""
+
+ def __init__(self, symbol_type, base_path):
+ self.symbol_type = symbol_type
+ self.base_path = base_path
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
+ symbol.base_path = path_join(self.base_path, symbol.name)
+
+ return symbol
+
+
+class BranchesPathRule(SymbolPathRule):
+ """Set the base paths for Branch LODs."""
+
+ def __init__(self, branch_path):
+ SymbolPathRule.__init__(self, Branch, branch_path)
+
+
+class TagsPathRule(SymbolPathRule):
+ """Set the base paths for Tag LODs."""
+
+ def __init__(self, tag_path):
+ SymbolPathRule.__init__(self, Tag, tag_path)
+
+
+class HeuristicPreferredParentRule(StrategyRule):
+ """Use a heuristic rule to pick preferred parents.
+
+ Pick the parent that should be preferred for any TypedSymbols. As
+ parent, use the symbol that appeared most often as a possible parent
+ of the symbol in question. If multiple symbols are tied, choose the
+ one that comes first according to the Symbol class's natural sort
+ order."""
+
+ def _get_preferred_parent(self, stats):
+ """Return the LODs that are most often possible parents in STATS.
+
+ Return the set of LinesOfDevelopment that appeared most often as
+ possible parents. The return value might contain multiple symbols
+ if multiple LinesOfDevelopment appeared the same number of times."""
+
+ best_count = -1
+ best_symbol = None
+ for (symbol, count) in stats.possible_parents.items():
+ if count > best_count or (count == best_count and symbol < best_symbol):
+ best_count = count
+ best_symbol = symbol
+
+ if best_symbol is None:
+ return None
+ else:
+ return best_symbol
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
+ preferred_parent = self._get_preferred_parent(stats)
+ if preferred_parent is None:
+ Log().verbose('%s has no preferred parent' % (symbol,))
+ else:
+ symbol.preferred_parent_id = preferred_parent.id
+ Log().verbose(
+ 'The preferred parent of %s is %s' % (symbol, preferred_parent,)
+ )
+
+ return symbol
+
+
+class ManualTrunkRule(StrategyRule):
+ """Change the SVN path of Trunk LODs.
+
+ Members:
+
+ project_id -- (int or None) The id of the project whose trunk
+ should be affected by this rule. If project_id is None, then
+ the rule is not project-specific.
+
+ svn_path -- (str) The SVN path that should be used as the base
+ directory for this trunk. This member must not be None,
+ though it may be the empty string for a single-project,
+ trunk-only conversion.
+
+ """
+
+ def __init__(self, project_id, svn_path):
+ self.project_id = project_id
+ self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+
+ def get_symbol(self, symbol, stats):
+ if (self.project_id is not None
+ and self.project_id != stats.lod.project.id):
+ return symbol
+
+ if isinstance(symbol, Trunk):
+ symbol.base_path = self.svn_path
+
+ return symbol
+
+
+def convert_as_branch(symbol):
+ Log().verbose(
+ 'Converting symbol %s as a branch because of manual setting.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+def convert_as_tag(symbol):
+ Log().verbose(
+ 'Converting symbol %s as a tag because of manual setting.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+
+
+def exclude(symbol):
+ Log().verbose(
+ 'Excluding symbol %s because of manual setting.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+
+
+class ManualSymbolRule(StrategyRule):
+ """Change how particular symbols are converted.
+
+ Members:
+
+ project_id -- (int or None) The id of the project whose trunk
+ should be affected by this rule. If project_id is None, then
+ the rule is not project-specific.
+
+ symbol_name -- (str) The name of the symbol that should be
+ affected by this rule.
+
+ conversion -- (callable or None) A callable that converts the
+ symbol to its preferred output type. This should normally be
+ one of (convert_as_branch, convert_as_tag, exclude). If this
+ member is None, then this rule does not affect the symbol's
+ output type.
+
+ svn_path -- (str) The SVN path that should be used as the base
+ directory for this trunk. This member must not be None,
+ though it may be the empty string for a single-project,
+ trunk-only conversion.
+
+ parent_lod_name -- (str or None) The name of the line of
+ development that should be preferred as the parent of this
+ symbol. (The preferred parent is the line of development from
+ which the symbol should sprout.) If this member is set to the
+ string '.trunk.', then the symbol will be set to sprout
+ directly from trunk. If this member is set to None, then this
+ rule won't affect the symbol's parent.
+
+ """
+
+ def __init__(
+ self, project_id, symbol_name, conversion, svn_path, parent_lod_name
+ ):
+ self.project_id = project_id
+ self.symbol_name = symbol_name
+ self.conversion = conversion
+ if svn_path is None:
+ self.svn_path = None
+ else:
+ self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+ self.parent_lod_name = parent_lod_name
+
+ def _get_parent_by_id(self, parent_lod_name, stats):
+ """Return the LOD object for the parent with name PARENT_LOD_NAME.
+
+ STATS is the _Stats object describing a symbol whose parent needs
+ to be determined from its name. If none of its possible parents
+ has name PARENT_LOD_NAME, raise a SymbolPlanError."""
+
+ for pp in stats.possible_parents.keys():
+ if isinstance(pp, Trunk):
+ pass
+ elif pp.name == parent_lod_name:
+ return pp
+ else:
+ parent_counts = stats.possible_parents.items()
+ parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
+ lines = [
+ '%s is not a valid parent for %s;'
+ % (parent_lod_name, stats.lod,),
+ ' possible parents (with counts):'
+ ]
+ for (symbol, count) in parent_counts:
+ if isinstance(symbol, Trunk):
+ lines.append(' .trunk. : %d' % count)
+ else:
+ lines.append(' %s : %d' % (symbol.name, count))
+ raise SymbolPlanError('\n'.join(lines))
+
+ def get_symbol(self, symbol, stats):
+ if (self.project_id is not None
+ and self.project_id != stats.lod.project.id):
+ return symbol
+
+ elif isinstance(symbol, Trunk):
+ return symbol
+
+ elif self.symbol_name == stats.lod.name:
+ if self.conversion is not None:
+ symbol = self.conversion(symbol)
+
+ if self.parent_lod_name is None:
+ pass
+ elif self.parent_lod_name == '.trunk.':
+ symbol.preferred_parent_id = stats.lod.project.trunk_id
+ else:
+ symbol.preferred_parent_id = self._get_parent_by_id(
+ self.parent_lod_name, stats
+ ).id
+
+ if self.svn_path is not None:
+ symbol.base_path = self.svn_path
+
+ return symbol
+
+
+class SymbolHintsFileRule(StrategyRule):
+ """Use manual symbol configurations read from a file.
+
+ The input file is line-oriented with the following format:
+
+ <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
+
+ Where the fields are separated by whitespace and
+
+ project-id -- the numerical id of the Project to which the
+ symbol belongs (numbered starting with 0). This field can
+ be '.' if the rule is not project-specific.
+
+ symbol-name -- the name of the symbol being specified, or
+ '.trunk.' if the rule should apply to trunk.
+
+ conversion -- how the symbol should be treated in the
+ conversion. This is one of the following values: 'branch',
+ 'tag', or 'exclude'. This field can be '.' if the rule
+ shouldn't affect how the symbol is treated in the
+ conversion.
+
+ svn-path -- the SVN path that should serve as the root path of
+ this LOD. The path should be expressed as a path relative
+ to the SVN root directory, with or without a leading '/'.
+ This field can be omitted or '.' if the rule shouldn't
+ affect the LOD's SVN path.
+
+ parent-lod-name -- the name of the LOD that should serve as this
+ symbol's parent. This field can be omitted or '.' if the
+ rule shouldn't affect the symbol's parent, or it can be
+ '.trunk.' to indicate that the symbol should sprout from the
+ project's trunk."""
+
+ comment_re = re.compile(r'^(\#|$)')
+
+ conversion_map = {
+ 'branch' : convert_as_branch,
+ 'tag' : convert_as_tag,
+ 'exclude' : exclude,
+ '.' : None,
+ }
+
+ def __init__(self, filename):
+ self.filename = filename
+
+ def start(self, symbol_statistics):
+ self._rules = []
+
+ f = open(self.filename, 'r')
+ for l in f:
+ l = l.rstrip()
+ s = l.lstrip()
+ if self.comment_re.match(s):
+ continue
+ fields = s.split()
+
+ if len(fields) < 3:
+ raise FatalError(
+ 'The following line in "%s" cannot be parsed:\n "%s"'
+ % (self.filename, l,)
+ )
+
+ project_id = fields.pop(0)
+ symbol_name = fields.pop(0)
+ conversion = fields.pop(0)
+
+ if fields:
+ svn_path = fields.pop(0)
+ if svn_path == '.':
+ svn_path = None
+ elif svn_path[0] == '/':
+ svn_path = svn_path[1:]
+ else:
+ svn_path = None
+
+ if fields:
+ parent_lod_name = fields.pop(0)
+ else:
+ parent_lod_name = '.'
+
+ if fields:
+ raise FatalError(
+ 'The following line in "%s" cannot be parsed:\n "%s"'
+ % (self.filename, l,)
+ )
+
+ if project_id == '.':
+ project_id = None
+ else:
+ try:
+ project_id = int(project_id)
+ except ValueError:
+ raise FatalError(
+ 'Illegal project_id in the following line:\n "%s"' % (l,)
+ )
+
+ if symbol_name == '.trunk.':
+ if conversion not in ['.', 'trunk']:
+ raise FatalError('Trunk cannot be converted as a different type')
+
+ if parent_lod_name != '.':
+ raise FatalError('Trunk\'s parent cannot be set')
+
+ if svn_path is None:
+ # This rule doesn't do anything:
+ pass
+ else:
+ self._rules.append(ManualTrunkRule(project_id, svn_path))
+
+ else:
+ try:
+ conversion = self.conversion_map[conversion]
+ except KeyError:
+ raise FatalError(
+ 'Illegal conversion in the following line:\n "%s"' % (l,)
+ )
+
+ if parent_lod_name == '.':
+ parent_lod_name = None
+
+ if conversion is None \
+ and svn_path is None \
+ and parent_lod_name is None:
+ # There is nothing to be done:
+ pass
+ else:
+ self._rules.append(
+ ManualSymbolRule(
+ project_id, symbol_name,
+ conversion, svn_path, parent_lod_name
+ )
+ )
+
+ for rule in self._rules:
+ rule.start(symbol_statistics)
+
+ def get_symbol(self, symbol, stats):
+ for rule in self._rules:
+ symbol = rule.get_symbol(symbol, stats)
+
+ return symbol
+
+ def finish(self):
+ for rule in self._rules:
+ rule.finish()
+
+ del self._rules
+
+
diff --git a/cvs2svn_lib/symbol_transform.py b/cvs2svn_lib/symbol_transform.py
new file mode 100644
index 0000000..a4995b8
--- /dev/null
+++ b/cvs2svn_lib/symbol_transform.py
@@ -0,0 +1,236 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to transform symbol names."""
+
+
+import os
+import re
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import normalize_svn_path
+
+
+class SymbolTransform:
+ """Transform symbol names arbitrarily."""
+
+ def transform(self, cvs_file, symbol_name, revision):
+ """Possibly transform SYMBOL_NAME, which was found in CVS_FILE.
+
+ Return the transformed symbol name. If this SymbolTransform
+ doesn't apply, return the original SYMBOL_NAME. If this symbol
+ should be ignored entirely, return None. (Please note that
+ ignoring a branch via this mechanism only causes the branch *name*
+ to be ignored; the branch contents will still be converted.
+ Usually branches should be excluded using --exclude.)
+
+ REVISION contains the CVS revision number to which the symbol was
+ attached in the file as a string (with zeros removed).
+
+ This method is free to use the information in CVS_FILE (including
+ CVS_FILE.project) to decide whether and/or how to transform
+ SYMBOL_NAME."""
+
+ raise NotImplementedError()
+
+
+class ReplaceSubstringsSymbolTransform(SymbolTransform):
+ """Replace specific substrings in symbol names.
+
+ If the substring occurs multiple times, replace all copies."""
+
+ def __init__(self, old, new):
+ self.old = old
+ self.new = new
+
+ def transform(self, cvs_file, symbol_name, revision):
+ return symbol_name.replace(self.old, self.new)
+
+
+class NormalizePathsSymbolTransform(SymbolTransform):
+ def transform(self, cvs_file, symbol_name, revision):
+ try:
+ return normalize_svn_path(symbol_name)
+ except IllegalSVNPathError, e:
+ raise FatalError('Problem with %s: %s' % (symbol_name, e,))
+
+
+class CompoundSymbolTransform(SymbolTransform):
+ """A SymbolTransform that applies other SymbolTransforms in series.
+
+ Each of the contained SymbolTransforms is applied, one after the
+ other. If any of them returns None, then None is returned (the
+ following SymbolTransforms are ignored)."""
+
+ def __init__(self, symbol_transforms):
+ """Ininitialize a CompoundSymbolTransform.
+
+ SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances."""
+
+ self.symbol_transforms = list(symbol_transforms)
+
+ def transform(self, cvs_file, symbol_name, revision):
+ for symbol_transform in self.symbol_transforms:
+ symbol_name = symbol_transform.transform(
+ cvs_file, symbol_name, revision
+ )
+ if symbol_name is None:
+ # Don't continue with other symbol transforms:
+ break
+
+ return symbol_name
+
+
+class RegexpSymbolTransform(SymbolTransform):
+ """Transform symbols by using a regexp textual substitution."""
+
+ def __init__(self, pattern, replacement):
+ """Create a SymbolTransform that transforms symbols matching PATTERN.
+
+ PATTERN is a regular expression that should match the whole symbol
+ name. REPLACEMENT is the replacement text, which may include
+ patterns like r'\1' or r'\g<1>' or r'\g<name>' (where 'name' is a
+ reference to a named substring in the pattern of the form
+ r'(?P<name>...)')."""
+
+ self.pattern = re.compile('^' + pattern + '$')
+ self.replacement = replacement
+
+ def transform(self, cvs_file, symbol_name, revision):
+ return self.pattern.sub(self.replacement, symbol_name)
+
+
+class SymbolMapper(SymbolTransform):
+ """A SymbolTransform that transforms specific symbol definitions.
+
+ The user has to specify the exact CVS filename, symbol name, and
+ revision number to be transformed, and the new name (or None if the
+ symbol should be ignored). The mappings can be set via a
+ constructor argument or by calling __setitem__()."""
+
+ def __init__(self, items=[]):
+ """Initialize the mapper.
+
+ ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
+ new_name) which will be set as mappings."""
+
+ # A map {(cvs_filename, symbol_name, revision) : new_name}:
+ self._map = {}
+
+ for (cvs_filename, symbol_name, revision, new_name) in items:
+ self[cvs_filename, symbol_name, revision] = new_name
+
+ def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
+ """Set a mapping for a particular file, symbol, and revision."""
+
+ cvs_filename = os.path.normcase(os.path.normpath(cvs_filename))
+ key = (cvs_filename, symbol_name, revision)
+ if key in self._map:
+ Log().warn(
+ 'Overwriting symbol transform for\n'
+ ' filename=%r symbol=%s revision=%s'
+ % (cvs_filename, symbol_name, revision,)
+ )
+ self._map[key] = new_name
+
+ def transform(self, cvs_file, symbol_name, revision):
+ cvs_filename = os.path.normcase(os.path.normpath(cvs_file.filename))
+ return self._map.get(
+ (cvs_filename, symbol_name, revision), symbol_name
+ )
+
+
+class SubtreeSymbolMapper(SymbolTransform):
+ """A SymbolTransform that transforms symbols within a whole repo subtree.
+
+ The user has to specify a CVS repository path (a filename or
+ directory) and the original symbol name. All symbols under that
+ path will be renamed to the specified new name (which can be None if
+ the symbol should be ignored). The mappings can be set via a
+ constructor argument or by calling __setitem__(). Only the most
+ specific rule is applied."""
+
+ def __init__(self, items=[]):
+ """Initialize the mapper.
+
+ ITEMS is a list of tuples (cvs_path, symbol_name, new_name)
+ which will be set as mappings. cvs_path is a string naming a
+ directory within the CVS repository."""
+
+ # A map {symbol_name : {cvs_path : new_name}}:
+ self._map = {}
+
+ for (cvs_path, symbol_name, new_name) in items:
+ self[cvs_path, symbol_name] = new_name
+
+ def __setitem__(self, (cvs_path, symbol_name), new_name):
+ """Set a mapping for a particular file and symbol."""
+
+ try:
+ symbol_map = self._map[symbol_name]
+ except KeyError:
+ symbol_map = {}
+ self._map[symbol_name] = symbol_map
+
+ cvs_path = os.path.normcase(os.path.normpath(cvs_path))
+ if cvs_path in symbol_map:
+ Log().warn(
+ 'Overwriting symbol transform for\n'
+ ' directory=%r symbol=%s'
+ % (cvs_path, symbol_name,)
+ )
+ symbol_map[cvs_path] = new_name
+
+ def transform(self, cvs_file, symbol_name, revision):
+ try:
+ symbol_map = self._map[symbol_name]
+ except KeyError:
+ # No rules for that symbol name
+ return symbol_name
+
+ cvs_path = os.path.normcase(os.path.normpath(cvs_file.filename))
+ while True:
+ try:
+ return symbol_map[cvs_path]
+ except KeyError:
+ new_cvs_path = os.path.dirname(cvs_path)
+ if new_cvs_path == cvs_path:
+ # No rules found for that path; return symbol name unaltered.
+ return symbol_name
+ else:
+ cvs_path = new_cvs_path
+
+
+class IgnoreSymbolTransform(SymbolTransform):
+ """Ignore symbols matching a specified regular expression."""
+
+ def __init__(self, pattern):
+ """Create an SymbolTransform that ignores symbols matching PATTERN.
+
+ PATTERN is a regular expression that should match the whole symbol
+ name."""
+
+ self.pattern = re.compile('^' + pattern + '$')
+
+ def transform(self, cvs_file, symbol_name, revision):
+ if self.pattern.match(symbol_name):
+ return None
+ else:
+ return symbol_name
+
+
diff --git a/cvs2svn_lib/time_range.py b/cvs2svn_lib/time_range.py
new file mode 100644
index 0000000..f7dc234
--- /dev/null
+++ b/cvs2svn_lib/time_range.py
@@ -0,0 +1,44 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to manage time ranges."""
+
+
+class TimeRange(object):
+ __slots__ = ('t_min', 't_max')
+
+ def __init__(self):
+ # Start out with a t_min higher than any incoming time T, and a
+ # t_max lower than any incoming T. This way the first T will push
+ # t_min down to T, and t_max up to T, naturally (without any
+ # special-casing), and successive times will then ratchet them
+ # outward as appropriate.
+ self.t_min = 1L<<32
+ self.t_max = 0
+
+ def add(self, timestamp):
+ """Expand the range to encompass TIMESTAMP."""
+
+ if timestamp < self.t_min:
+ self.t_min = timestamp
+ if timestamp > self.t_max:
+ self.t_max = timestamp
+
+ def __cmp__(self, other):
+ # Sorted by t_max, and break ties using t_min.
+ return cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
+
+
diff --git a/cvs2svn_lib/version.py b/cvs2svn_lib/version.py
new file mode 100644
index 0000000..7900964
--- /dev/null
+++ b/cvs2svn_lib/version.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python2
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+# The version of cvs2svn:
+VERSION = '2.3.0'
+
+
+# If this file is run as a script, print the cvs2svn version number to
+# stdout:
+if __name__ == '__main__':
+ print VERSION
+
+
diff --git a/cvs2svn_rcsparse/__init__.py b/cvs2svn_rcsparse/__init__.py
new file mode 100644
index 0000000..829c117
--- /dev/null
+++ b/cvs2svn_rcsparse/__init__.py
@@ -0,0 +1,26 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""This package provides parsing tools for RCS files."""
+
+from common import *
+
+try:
+ from tparse import parse
+except ImportError:
+ try:
+ from texttools import Parser
+ except ImportError:
+ from default import Parser
+
+ def parse(file, sink):
+ return Parser().parse(file, sink)
diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py
new file mode 100644
index 0000000..3eed600
--- /dev/null
+++ b/cvs2svn_rcsparse/common.py
@@ -0,0 +1,324 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""common.py: common classes and functions for the RCS parsing tools."""
+
+import calendar
+import string
+
+class Sink:
+ def set_head_revision(self, revision):
+ pass
+
+ def set_principal_branch(self, branch_name):
+ pass
+
+ def set_access(self, accessors):
+ pass
+
+ def define_tag(self, name, revision):
+ pass
+
+ def set_locker(self, revision, locker):
+ pass
+
+ def set_locking(self, mode):
+ """Used to signal locking mode.
+
+ Called with mode argument 'strict' if strict locking
+ Not called when no locking used."""
+
+ pass
+
+ def set_comment(self, comment):
+ pass
+
+ def set_expansion(self, mode):
+ pass
+
+ def admin_completed(self):
+ pass
+
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ pass
+
+ def tree_completed(self):
+ pass
+
+ def set_description(self, description):
+ pass
+
+ def set_revision_info(self, revision, log, text):
+ pass
+
+ def parse_completed(self):
+ pass
+
+
+# --------------------------------------------------------------------------
+#
+# EXCEPTIONS USED BY RCSPARSE
+#
+
+class RCSParseError(Exception):
+ pass
+
+
+class RCSIllegalCharacter(RCSParseError):
+ pass
+
+
+class RCSExpected(RCSParseError):
+ def __init__(self, got, wanted):
+ RCSParseError.__init__(
+ self,
+ 'Unexpected parsing error in RCS file.\n'
+ 'Expected token: %s, but saw: %s'
+ % (wanted, got)
+ )
+
+
+class RCSStopParser(Exception):
+ pass
+
+
+# --------------------------------------------------------------------------
+#
+# STANDARD TOKEN STREAM-BASED PARSER
+#
+
+class _Parser:
+ stream_class = None # subclasses need to define this
+
+ def _read_until_semicolon(self):
+ """Read all tokens up to and including the next semicolon token.
+
+ Return the tokens (not including the semicolon) as a list."""
+
+ tokens = []
+
+ while 1:
+ token = self.ts.get()
+ if token == ';':
+ break
+ tokens.append(token)
+
+ return tokens
+
+ def _parse_admin_head(self, token):
+ rev = self.ts.get()
+ if rev == ';':
+ # The head revision is not specified. Just drop the semicolon
+ # on the floor.
+ pass
+ else:
+ self.sink.set_head_revision(rev)
+ self.ts.match(';')
+
+ def _parse_admin_branch(self, token):
+ branch = self.ts.get()
+ if branch != ';':
+ self.sink.set_principal_branch(branch)
+ self.ts.match(';')
+
+ def _parse_admin_access(self, token):
+ accessors = self._read_until_semicolon()
+ if accessors:
+ self.sink.set_access(accessors)
+
+ def _parse_admin_symbols(self, token):
+ while 1:
+ tag_name = self.ts.get()
+ if tag_name == ';':
+ break
+ self.ts.match(':')
+ tag_rev = self.ts.get()
+ self.sink.define_tag(tag_name, tag_rev)
+
+ def _parse_admin_locks(self, token):
+ while 1:
+ locker = self.ts.get()
+ if locker == ';':
+ break
+ self.ts.match(':')
+ rev = self.ts.get()
+ self.sink.set_locker(rev, locker)
+
+ def _parse_admin_strict(self, token):
+ self.sink.set_locking("strict")
+ self.ts.match(';')
+
+ def _parse_admin_comment(self, token):
+ self.sink.set_comment(self.ts.get())
+ self.ts.match(';')
+
+ def _parse_admin_expand(self, token):
+ expand_mode = self.ts.get()
+ self.sink.set_expansion(expand_mode)
+ self.ts.match(';')
+
+ admin_token_map = {
+ 'head' : _parse_admin_head,
+ 'branch' : _parse_admin_branch,
+ 'access' : _parse_admin_access,
+ 'symbols' : _parse_admin_symbols,
+ 'locks' : _parse_admin_locks,
+ 'strict' : _parse_admin_strict,
+ 'comment' : _parse_admin_comment,
+ 'expand' : _parse_admin_expand,
+ 'desc' : None,
+ }
+
+ def parse_rcs_admin(self):
+ while 1:
+ # Read initial token at beginning of line
+ token = self.ts.get()
+
+ try:
+ f = self.admin_token_map[token]
+ except KeyError:
+ # We're done once we reach the description of the RCS tree
+ if token[0] in string.digits:
+ self.ts.unget(token)
+ return
+ else:
+ # Chew up "newphrase"
+ # warn("Unexpected RCS token: $token\n")
+ pass
+ else:
+ if f is None:
+ self.ts.unget(token)
+ return
+ else:
+ f(self, token)
+
+ def _parse_rcs_tree_entry(self, revision):
+ # Parse date
+ self.ts.match('date')
+ date = self.ts.get()
+ self.ts.match(';')
+
+ # Convert date into timestamp
+ date_fields = string.split(date, '.')
+ # According to rcsfile(5): the year "contains just the last two
+ # digits of the year for years from 1900 through 1999, and all the
+ # digits of years thereafter".
+ if len(date_fields[0]) == 2:
+ date_fields[0] = '19' + date_fields[0]
+ date_fields = map(string.atoi, date_fields)
+ EPOCH = 1970
+ if date_fields[0] < EPOCH:
+ raise ValueError, 'invalid year'
+ timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
+
+ # Parse author
+ ### NOTE: authors containing whitespace are violations of the
+ ### RCS specification. We are making an allowance here because
+ ### CVSNT is known to produce these sorts of authors.
+ self.ts.match('author')
+ author = ' '.join(self._read_until_semicolon())
+
+ # Parse state
+ self.ts.match('state')
+ state = ''
+ while 1:
+ token = self.ts.get()
+ if token == ';':
+ break
+ state = state + token + ' '
+ state = state[:-1] # toss the trailing space
+
+ # Parse branches
+ self.ts.match('branches')
+ branches = self._read_until_semicolon()
+
+ # Parse revision of next delta in chain
+ self.ts.match('next')
+ next = self.ts.get()
+ if next == ';':
+ next = None
+ else:
+ self.ts.match(';')
+
+ # there are some files with extra tags in them. for example:
+ # owner 640;
+ # group 15;
+ # permissions 644;
+ # hardlinks @configure.in@;
+ # this is "newphrase" in RCSFILE(5). we just want to skip over these.
+ while 1:
+ token = self.ts.get()
+ if token == 'desc' or token[0] in string.digits:
+ self.ts.unget(token)
+ break
+ # consume everything up to the semicolon
+ self._read_until_semicolon()
+
+ self.sink.define_revision(revision, timestamp, author, state, branches,
+ next)
+
+ def parse_rcs_tree(self):
+ while 1:
+ revision = self.ts.get()
+
+ # End of RCS tree description ?
+ if revision == 'desc':
+ self.ts.unget(revision)
+ return
+
+ self._parse_rcs_tree_entry(revision)
+
+ def parse_rcs_description(self):
+ self.ts.match('desc')
+ self.sink.set_description(self.ts.get())
+
+ def parse_rcs_deltatext(self):
+ while 1:
+ revision = self.ts.get()
+ if revision is None:
+ # EOF
+ break
+ text, sym2, log, sym1 = self.ts.mget(4)
+ if sym1 != 'log':
+ print `text[:100], sym2[:100], log[:100], sym1[:100]`
+ raise RCSExpected(sym1, 'log')
+ if sym2 != 'text':
+ raise RCSExpected(sym2, 'text')
+ ### need to add code to chew up "newphrase"
+ self.sink.set_revision_info(revision, log, text)
+
+ def parse(self, file, sink):
+ self.ts = self.stream_class(file)
+ self.sink = sink
+
+ self.parse_rcs_admin()
+
+ # let sink know when the admin section has been completed
+ self.sink.admin_completed()
+
+ self.parse_rcs_tree()
+
+ # many sinks want to know when the tree has been completed so they can
+ # do some work to prep for the arrival of the deltatext
+ self.sink.tree_completed()
+
+ self.parse_rcs_description()
+ self.parse_rcs_deltatext()
+
+ # easiest for us to tell the sink it is done, rather than worry about
+ # higher level software doing it.
+ self.sink.parse_completed()
+
+ self.ts = self.sink = None
+
+# --------------------------------------------------------------------------
diff --git a/cvs2svn_rcsparse/debug.py b/cvs2svn_rcsparse/debug.py
new file mode 100644
index 0000000..cfeaf2b
--- /dev/null
+++ b/cvs2svn_rcsparse/debug.py
@@ -0,0 +1,122 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""debug.py: various debugging tools for the rcsparse package."""
+
+import time
+
+from __init__ import parse
+import common
+
+
+class DebugSink(common.Sink):
+ def set_head_revision(self, revision):
+ print 'head:', revision
+
+ def set_principal_branch(self, branch_name):
+ print 'branch:', branch_name
+
+ def define_tag(self, name, revision):
+ print 'tag:', name, '=', revision
+
+ def set_comment(self, comment):
+ print 'comment:', comment
+
+ def set_description(self, description):
+ print 'description:', description
+
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ print 'revision:', revision
+ print ' timestamp:', timestamp
+ print ' author:', author
+ print ' state:', state
+ print ' branches:', branches
+ print ' next:', next
+
+ def set_revision_info(self, revision, log, text):
+ print 'revision:', revision
+ print ' log:', log
+ print ' text:', text[:100], '...'
+
+
+class DumpSink(common.Sink):
+ """Dump all the parse information directly to stdout.
+
+ The output is relatively unformatted and untagged. It is intended as a
+ raw dump of the data in the RCS file. A copy can be saved, then changes
+ made to the parsing engine, then a comparison of the new output against
+ the old output.
+ """
+ def __init__(self):
+ global sha
+ import sha
+
+ def set_head_revision(self, revision):
+ print revision
+
+ def set_principal_branch(self, branch_name):
+ print branch_name
+
+ def define_tag(self, name, revision):
+ print name, revision
+
+ def set_comment(self, comment):
+ print comment
+
+ def set_description(self, description):
+ print description
+
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ print revision, timestamp, author, state, branches, next
+
+ def set_revision_info(self, revision, log, text):
+ print revision, sha.new(log).hexdigest(), sha.new(text).hexdigest()
+
+ def tree_completed(self):
+ print 'tree_completed'
+
+ def parse_completed(self):
+ print 'parse_completed'
+
+
+def dump_file(fname):
+ parse(open(fname, 'rb'), DumpSink())
+
+def time_file(fname):
+ f = open(fname, 'rb')
+ s = common.Sink()
+ t = time.time()
+ parse(f, s)
+ t = time.time() - t
+ print t
+
+def _usage():
+ print 'This is normally a module for importing, but it has a couple'
+ print 'features for testing as an executable script.'
+ print 'USAGE: %s COMMAND filename,v' % sys.argv[0]
+ print ' where COMMAND is one of:'
+ print ' dump: filename is "dumped" to stdout'
+ print ' time: filename is parsed with the time written to stdout'
+ sys.exit(1)
+
+if __name__ == '__main__':
+ import sys
+ if len(sys.argv) != 3:
+ _usage()
+ if sys.argv[1] == 'dump':
+ dump_file(sys.argv[2])
+ elif sys.argv[1] == 'time':
+ time_file(sys.argv[2])
+ else:
+ _usage()
diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py
new file mode 100644
index 0000000..57f9fc6
--- /dev/null
+++ b/cvs2svn_rcsparse/default.py
@@ -0,0 +1,172 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+#
+# This file was originally based on portions of the blame.py script by
+# Curt Hagenlocher.
+#
+# -----------------------------------------------------------------------
+
+import string
+import common
+
+class _TokenStream:
+ token_term = frozenset(string.whitespace + ';:')
+
+ # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+ # grab a good-sized chunk, but not too large to overwhelm memory.
+ # note: we use a multiple of a standard block size
+ CHUNK_SIZE = 192 * 512 # about 100k
+
+# CHUNK_SIZE = 5 # for debugging, make the function grind...
+
+ def __init__(self, file):
+ self.rcsfile = file
+ self.idx = 0
+ self.buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if self.buf == '':
+ raise RuntimeError, 'EOF'
+
+ def get(self):
+ "Get the next token from the RCS file."
+
+ # Note: we can afford to loop within Python, examining individual
+ # characters. For the whitespace and tokens, the number of iterations
+ # is typically quite small. Thus, a simple iterative loop will beat
+ # out more complex solutions.
+
+ buf = self.buf
+ lbuf = len(buf)
+ idx = self.idx
+
+ while 1:
+ if idx == lbuf:
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '':
+ # signal EOF by returning None as the token
+ del self.buf # so we fail if get() is called again
+ return None
+ lbuf = len(buf)
+ idx = 0
+
+ if buf[idx] not in string.whitespace:
+ break
+
+ idx = idx + 1
+
+ if buf[idx] in ';:':
+ self.buf = buf
+ self.idx = idx + 1
+ return buf[idx]
+
+ if buf[idx] != '@':
+ end = idx + 1
+ token = ''
+ while 1:
+ # find token characters in the current buffer
+ while end < lbuf and buf[end] not in self.token_term:
+ end = end + 1
+ token = token + buf[idx:end]
+
+ if end < lbuf:
+ # we stopped before the end, so we have a full token
+ idx = end
+ break
+
+ # we stopped at the end of the buffer, so we may have a partial token
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ lbuf = len(buf)
+ idx = end = 0
+
+ self.buf = buf
+ self.idx = idx
+ return token
+
+ # a "string" which starts with the "@" character. we'll skip it when we
+ # search for content.
+ idx = idx + 1
+
+ chunks = [ ]
+
+ while 1:
+ if idx == lbuf:
+ idx = 0
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '':
+ raise RuntimeError, 'EOF'
+ lbuf = len(buf)
+ i = string.find(buf, '@', idx)
+ if i == -1:
+ chunks.append(buf[idx:])
+ idx = lbuf
+ continue
+ if i == lbuf - 1:
+ chunks.append(buf[idx:i])
+ idx = 0
+ buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '@':
+ raise RuntimeError, 'EOF'
+ lbuf = len(buf)
+ continue
+ if buf[i + 1] == '@':
+ chunks.append(buf[idx:i+1])
+ idx = i + 2
+ continue
+
+ chunks.append(buf[idx:i])
+
+ self.buf = buf
+ self.idx = i + 1
+
+ return ''.join(chunks)
+
+# _get = get
+# def get(self):
+ token = self._get()
+ print 'T:', `token`
+ return token
+
+ def match(self, match):
+ "Try to match the next token from the input buffer."
+
+ token = self.get()
+ if token != match:
+ raise common.RCSExpected(token, match)
+
+ def unget(self, token):
+ "Put this token back, for the next get() to return."
+
+ # Override the class' .get method with a function which clears the
+ # overridden method then returns the pushed token. Since this function
+ # will not be looked up via the class mechanism, it should be a "normal"
+ # function, meaning it won't have "self" automatically inserted.
+ # Therefore, we need to pass both self and the token thru via defaults.
+
+ # note: we don't put this into the input buffer because it may have been
+ # @-unescaped already.
+
+ def give_it_back(self=self, token=token):
+ del self.get
+ return token
+
+ self.get = give_it_back
+
+ def mget(self, count):
+ "Return multiple tokens. 'next' is at the end."
+ result = [ ]
+ for i in range(count):
+ result.append(self.get())
+ result.reverse()
+ return result
+
+
+class Parser(common._Parser):
+ stream_class = _TokenStream
diff --git a/cvs2svn_rcsparse/parse_rcs_file.py b/cvs2svn_rcsparse/parse_rcs_file.py
new file mode 100644
index 0000000..215845d
--- /dev/null
+++ b/cvs2svn_rcsparse/parse_rcs_file.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python2
+
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2007 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Parse an RCS file, showing the rcsparse callbacks that are called.
+
+This program is useful to see whether an RCS file has a problem (in
+the sense of not being parseable by rcsparse) and also to illuminate
+the correspondence between RCS file contents and rcsparse callbacks.
+
+The output of this program can also be considered to be a kind of
+'canonical' format for RCS files, at least in so far as rcsparse
+returns all relevant information in the file and provided that the
+order of callbacks is always the same."""
+
+
+import sys
+import os
+
+
+class Logger:
+ def __init__(self, f, name):
+ self.f = f
+ self.name = name
+
+ def __call__(self, *args):
+ self.f.write(
+ '%s(%s)\n' % (self.name, ', '.join(['%r' % arg for arg in args]),)
+ )
+
+
+class LoggingSink:
+ def __init__(self, f):
+ self.f = f
+
+ def __getattr__(self, name):
+ return Logger(self.f, name)
+
+
+if __name__ == '__main__':
+ # Since there is nontrivial logic in __init__.py, we have to import
+ # parse() via that file. First make sure that the directory
+ # containing this script is in the path:
+ sys.path.insert(0, os.path.dirname(sys.argv[0]))
+
+ from __init__ import parse
+
+ if sys.argv[1:]:
+ for path in sys.argv[1:]:
+ if os.path.isfile(path) and path.endswith(',v'):
+ parse(
+ open(path, 'rb'), LoggingSink(sys.stdout)
+ )
+ else:
+ sys.stderr.write('%r is being ignored.\n' % path)
+ else:
+ parse(sys.stdin, LoggingSink(sys.stdout))
+
+
diff --git a/cvs2svn_rcsparse/rcparse_redundant_work.patch b/cvs2svn_rcsparse/rcparse_redundant_work.patch
new file mode 100644
index 0000000..b574dd2
--- /dev/null
+++ b/cvs2svn_rcsparse/rcparse_redundant_work.patch
@@ -0,0 +1,99 @@
+=== modified file 'cvs2svn_rcsparse/default.py'
+--- cvs2svn_rcsparse/default.py 2007-11-18 23:05:32 +0000
++++ cvs2svn_rcsparse/default.py 2010-01-23 10:21:47 +0000
+@@ -19,7 +19,7 @@
+ import common
+
+ class _TokenStream:
+- token_term = string.whitespace + ';:'
++ token_term = frozenset(string.whitespace + ';:')
+
+ # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+ # grab a good-sized chunk, but not too large to overwhelm memory.
+@@ -44,15 +44,17 @@
+ # out more complex solutions.
+
+ buf = self.buf
++ lbuf = len(buf)
+ idx = self.idx
+
+ while 1:
+- if idx == len(buf):
++ if idx == lbuf:
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '':
+ # signal EOF by returning None as the token
+ del self.buf # so we fail if get() is called again
+ return None
++ lbuf = len(buf)
+ idx = 0
+
+ if buf[idx] not in string.whitespace:
+@@ -60,7 +62,7 @@
+
+ idx = idx + 1
+
+- if buf[idx] == ';' or buf[idx] == ':':
++ if buf[idx] in ';:':
+ self.buf = buf
+ self.idx = idx + 1
+ return buf[idx]
+@@ -70,17 +72,18 @@
+ token = ''
+ while 1:
+ # find token characters in the current buffer
+- while end < len(buf) and buf[end] not in self.token_term:
++ while end < lbuf and buf[end] not in self.token_term:
+ end = end + 1
+ token = token + buf[idx:end]
+
+- if end < len(buf):
++ if end < lbuf:
+ # we stopped before the end, so we have a full token
+ idx = end
+ break
+
+ # we stopped at the end of the buffer, so we may have a partial token
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
++ lbuf = len(buf)
+ idx = end = 0
+
+ self.buf = buf
+@@ -94,22 +97,24 @@
+ chunks = [ ]
+
+ while 1:
+- if idx == len(buf):
++ if idx == lbuf:
+ idx = 0
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '':
+ raise RuntimeError, 'EOF'
++ lbuf = len(buf)
+ i = string.find(buf, '@', idx)
+ if i == -1:
+ chunks.append(buf[idx:])
+- idx = len(buf)
++ idx = lbuf
+ continue
+- if i == len(buf) - 1:
++ if i == lbuf - 1:
+ chunks.append(buf[idx:i])
+ idx = 0
+ buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
+ if buf == '@':
+ raise RuntimeError, 'EOF'
++ lbuf = len(buf)
+ continue
+ if buf[i + 1] == '@':
+ chunks.append(buf[idx:i+1])
+@@ -121,7 +126,7 @@
+ self.buf = buf
+ self.idx = i + 1
+
+- return string.join(chunks, '')
++ return ''.join(chunks)
+
+ # _get = get
+ # def get(self):
+
diff --git a/cvs2svn_rcsparse/run-tests.py b/cvs2svn_rcsparse/run-tests.py
new file mode 100644
index 0000000..eb9c3ea
--- /dev/null
+++ b/cvs2svn_rcsparse/run-tests.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python2
+
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://viewvc.tigris.org/.
+# ====================================================================
+
+"""Run tests of rcsparse code."""
+
+import sys
+import os
+import glob
+from cStringIO import StringIO
+from difflib import Differ
+
+# Since there is nontrivial logic in __init__.py, we have to import
+# parse() via that file. First make sure that the directory
+# containing this script is in the path:
+script_dir = os.path.dirname(sys.argv[0])
+sys.path.insert(0, script_dir)
+
+from __init__ import parse
+from parse_rcs_file import LoggingSink
+
+
+test_dir = os.path.join(script_dir, 'test-data')
+
+filelist = glob.glob(os.path.join(test_dir, '*,v'))
+filelist.sort()
+
+all_tests_ok = 1
+
+for filename in filelist:
+ sys.stderr.write('%s: ' % (filename,))
+ f = StringIO()
+ try:
+ parse(open(filename, 'rb'), LoggingSink(f))
+ except Exception, e:
+ sys.stderr.write('Error parsing file: %s!\n' % (e,))
+ all_tests_ok = 0
+ else:
+ output = f.getvalue()
+
+ expected_output_filename = filename[:-2] + '.out'
+ expected_output = open(expected_output_filename, 'rb').read()
+
+ if output == expected_output:
+ sys.stderr.write('OK\n')
+ else:
+ sys.stderr.write('Output does not match expected output!\n')
+ differ = Differ()
+ for diffline in differ.compare(
+ expected_output.splitlines(1), output.splitlines(1)
+ ):
+ sys.stderr.write(diffline)
+ all_tests_ok = 0
+
+if all_tests_ok:
+ sys.exit(0)
+else:
+ sys.exit(1)
+
diff --git a/cvs2svn_rcsparse/texttools.py b/cvs2svn_rcsparse/texttools.py
new file mode 100644
index 0000000..7c713eb
--- /dev/null
+++ b/cvs2svn_rcsparse/texttools.py
@@ -0,0 +1,348 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+import string
+
+# note: this will raise an ImportError if it isn't available. the rcsparse
+# package will recognize this and switch over to the default parser.
+from mx import TextTools
+
+import common
+
+
+# for convenience
+_tt = TextTools
+
+_idchar_list = map(chr, range(33, 127)) + map(chr, range(160, 256))
+_idchar_list.remove('$')
+_idchar_list.remove(',')
+#_idchar_list.remove('.') # leave as part of 'num' symbol
+_idchar_list.remove(':')
+_idchar_list.remove(';')
+_idchar_list.remove('@')
+_idchar = string.join(_idchar_list, '')
+_idchar_set = _tt.set(_idchar)
+
+_onechar_token_set = _tt.set(':;')
+
+_not_at_set = _tt.invset('@')
+
+_T_TOKEN = 30
+_T_STRING_START = 40
+_T_STRING_SPAN = 60
+_T_STRING_END = 70
+
+_E_COMPLETE = 100 # ended on a complete token
+_E_TOKEN = 110 # ended mid-token
+_E_STRING_SPAN = 130 # ended within a string
+_E_STRING_END = 140 # ended with string-end ('@') (could be mid-@@)
+
+_SUCCESS = +100
+
+_EOF = 'EOF'
+_CONTINUE = 'CONTINUE'
+_UNUSED = 'UNUSED'
+
+
+# continuation of a token over a chunk boundary
+_c_token_table = (
+ (_T_TOKEN, _tt.AllInSet, _idchar_set),
+ )
+
+class _mxTokenStream:
+
+ # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+ # grab a good-sized chunk, but not too large to overwhelm memory.
+ # note: we use a multiple of a standard block size
+ CHUNK_SIZE = 192 * 512 # about 100k
+
+# CHUNK_SIZE = 5 # for debugging, make the function grind...
+
+ def __init__(self, file):
+ self.rcsfile = file
+ self.tokens = [ ]
+ self.partial = None
+
+ self.string_end = None
+
+ def _parse_chunk(self, buf, start=0):
+ "Get the next token from the RCS file."
+
+ buflen = len(buf)
+
+ assert start < buflen
+
+ # construct a tag table which refers to the buffer we need to parse.
+ table = (
+ #1: ignore whitespace. with or without whitespace, move to the next rule.
+ (None, _tt.AllInSet, _tt.whitespace_set, +1),
+
+ #2
+ (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
+
+ #3: accumulate token text and exit, or move to the next rule.
+ (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),
+
+ #4
+ (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),
+
+ #5: single character tokens exit immediately, or move to the next rule
+ (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),
+
+ #6
+ (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),
+
+ #7: if this isn't an '@' symbol, then we have a syntax error (go to a
+ # negative index to indicate that condition). otherwise, suck it up
+ # and move to the next rule.
+ (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),
+
+ #8
+ (None, _tt.Is, '@', +4, +1),
+ #9
+ (buf, _tt.Is, '@', +1, -1),
+ #10
+ (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
+ #11
+ (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS),
+
+ #12
+ (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
+
+ #13: suck up everything that isn't an AT. go to next rule to look for EOF
+ (buf, _tt.AllInSet, _not_at_set, 0, +1),
+
+ #14: go back to look for double AT if we aren't at the end of the string
+ (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS),
+ )
+
+ # Fast, texttools may be, but it's somewhat lacking in clarity.
+ # Here's an attempt to document the logic encoded in the table above:
+ #
+ # Flowchart:
+ # _____
+ # / /\
+ # 1 -> 2 -> 3 -> 5 -> 7 -> 8 -> 9 -> 10 -> 11
+ # | \/ \/ \/ /\ \/
+ # \ 4 6 12 14 /
+ # \_______/_____/ \ / /
+ # \ 13 /
+ # \__________________________________________/
+ #
+ # #1: Skip over any whitespace.
+ # #2: If now EOF, exit with code _E_COMPLETE.
+ # #3: If we have a series of characters in _idchar_set, then:
+ # #4: Output them as a token, and go back to #1.
+ # #5: If we have a character in _onechar_token_set, then:
+ # #6: Output it as a token, and go back to #1.
+ # #7: If we do not have an '@', then error.
+ # If we do, then log a _T_STRING_START and continue.
+ # #8: If we have another '@', continue on to #9. Otherwise:
+ # #12: If now EOF, exit with code _E_STRING_SPAN.
+ # #13: Record the slice up to the next '@' (or EOF).
+ # #14: If now EOF, exit with code _E_STRING_SPAN.
+ # Otherwise, go back to #8.
+ # #9: If we have another '@', then we've just seen an escaped
+ # (by doubling) '@' within an @-string. Record a slice including
+ # just one '@' character, and jump back to #8.
+ # Otherwise, we've *either* seen the terminating '@' of an @-string,
+ # *or* we've seen one half of an escaped @@ sequence that just
+ # happened to be split over a chunk boundary - in either case,
+ # we continue on to #10.
+ # #10: Log a _T_STRING_END.
+ # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1.
+
+ success, taglist, idx = _tt.tag(buf, table, start)
+
+ if not success:
+ ### need a better way to report this error
+ raise common.RCSIllegalCharacter()
+ assert idx == buflen
+
+ # pop off the last item
+ last_which = taglist.pop()
+
+ i = 0
+ tlen = len(taglist)
+ while i < tlen:
+ if taglist[i] == _T_STRING_START:
+ j = i + 1
+ while j < tlen:
+ if taglist[j] == _T_STRING_END:
+ s = _tt.join(taglist, '', i+1, j)
+ del taglist[i:j]
+ tlen = len(taglist)
+ taglist[i] = s
+ break
+ j = j + 1
+ else:
+ assert last_which == _E_STRING_SPAN
+ s = _tt.join(taglist, '', i+1)
+ del taglist[i:]
+ self.partial = (_T_STRING_SPAN, [ s ])
+ break
+ i = i + 1
+
+ # figure out whether we have a partial last-token
+ if last_which == _E_TOKEN:
+ self.partial = (_T_TOKEN, [ taglist.pop() ])
+ elif last_which == _E_COMPLETE:
+ pass
+ elif last_which == _E_STRING_SPAN:
+ assert self.partial
+ else:
+ assert last_which == _E_STRING_END
+ self.partial = (_T_STRING_END, [ taglist.pop() ])
+
+ taglist.reverse()
+ taglist.extend(self.tokens)
+ self.tokens = taglist
+
+ def _set_end(self, taglist, text, l, r, subtags):
+ self.string_end = l
+
+ def _handle_partial(self, buf):
+ which, chunks = self.partial
+ if which == _T_TOKEN:
+ success, taglist, idx = _tt.tag(buf, _c_token_table)
+ if not success:
+ # The start of this buffer was not a token. So the end of the
+ # prior buffer was a complete token.
+ self.tokens.insert(0, string.join(chunks, ''))
+ else:
+ assert len(taglist) == 1 and taglist[0][0] == _T_TOKEN \
+ and taglist[0][1] == 0 and taglist[0][2] == idx
+ if idx == len(buf):
+ #
+ # The whole buffer was one huge token, so we may have a
+ # partial token again.
+ #
+ # Note: this modifies the list of chunks in self.partial
+ #
+ chunks.append(buf)
+
+ # consumed the whole buffer
+ return len(buf)
+
+ # got the rest of the token.
+ chunks.append(buf[:idx])
+ self.tokens.insert(0, string.join(chunks, ''))
+
+ # no more partial token
+ self.partial = None
+
+ return idx
+
+ if which == _T_STRING_END:
+ if buf[0] != '@':
+ self.tokens.insert(0, string.join(chunks, ''))
+ return 0
+ chunks.append('@')
+ start = 1
+ else:
+ start = 0
+
+ self.string_end = None
+ string_table = (
+ (None, _tt.Is, '@', +3, +1),
+ (_UNUSED, _tt.Is + _tt.AppendMatch, '@', +1, -1),
+ (self._set_end, _tt.Skip + _tt.CallTag, 0, 0, _SUCCESS),
+
+ (None, _tt.EOF, _tt.Here, +1, _SUCCESS),
+
+ # suck up everything that isn't an AT. move to next rule to look
+ # for EOF
+ (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _not_at_set, 0, +1),
+
+ # go back to look for double AT if we aren't at the end of the string
+ (None, _tt.EOF, _tt.Here, -5, _SUCCESS),
+ )
+
+ success, unused, idx = _tt.tag(buf, string_table,
+ start, len(buf), chunks)
+
+ # must have matched at least one item
+ assert success
+
+ if self.string_end is None:
+ assert idx == len(buf)
+ self.partial = (_T_STRING_SPAN, chunks)
+ elif self.string_end < len(buf):
+ self.partial = None
+ self.tokens.insert(0, string.join(chunks, ''))
+ else:
+ self.partial = (_T_STRING_END, chunks)
+
+ return idx
+
+ def _parse_more(self):
+ buf = self.rcsfile.read(self.CHUNK_SIZE)
+ if not buf:
+ return _EOF
+
+ if self.partial:
+ idx = self._handle_partial(buf)
+ if idx is None:
+ return _CONTINUE
+ if idx < len(buf):
+ self._parse_chunk(buf, idx)
+ else:
+ self._parse_chunk(buf)
+
+ return _CONTINUE
+
+ def get(self):
+ try:
+ return self.tokens.pop()
+ except IndexError:
+ pass
+
+ while not self.tokens:
+ action = self._parse_more()
+ if action == _EOF:
+ return None
+
+ return self.tokens.pop()
+
+
+# _get = get
+# def get(self):
+ token = self._get()
+ print 'T:', `token`
+ return token
+
+ def match(self, match):
+ if self.tokens:
+ token = self.tokens.pop()
+ else:
+ token = self.get()
+
+ if token != match:
+ raise common.RCSExpected(token, match)
+
+ def unget(self, token):
+ self.tokens.append(token)
+
+ def mget(self, count):
+ "Return multiple tokens. 'next' is at the end."
+ while len(self.tokens) < count:
+ action = self._parse_more()
+ if action == _EOF:
+ ### fix this
+ raise RuntimeError, 'EOF hit while expecting tokens'
+ result = self.tokens[-count:]
+ del self.tokens[-count:]
+ return result
+
+
+class Parser(common._Parser):
+ stream_class = _mxTokenStream