aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_lib/symbol_statistics.py')
-rw-r--r--cvs2svn_lib/symbol_statistics.py521
1 files changed, 0 insertions, 521 deletions
diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py
deleted file mode 100644
index 0d35a50..0000000
--- a/cvs2svn_lib/symbol_statistics.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module gathers and processes statistics about lines of development."""
-
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import FatalException
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import IncludedSymbol
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.symbol import ExcludedSymbol
-
-
-class SymbolPlanError(FatalException):
- pass
-
-
-class SymbolPlanException(SymbolPlanError):
- def __init__(self, stats, symbol, msg):
- self.stats = stats
- self.symbol = symbol
- SymbolPlanError.__init__(
- self,
- 'Cannot convert the following symbol to %s: %s\n %s'
- % (symbol, msg, self.stats,)
- )
-
-
-class IndeterminateSymbolException(SymbolPlanException):
- def __init__(self, stats, symbol):
- SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type')
-
-
-class _Stats:
- """A summary of information about a symbol (tag or branch).
-
- Members:
-
- lod -- the LineOfDevelopment instance of the lod being described
-
- tag_create_count -- the number of files in which this lod appears
- as a tag
-
- branch_create_count -- the number of files in which this lod
- appears as a branch
-
- branch_commit_count -- the number of files in which there were
- commits on this lod
-
- trivial_import_count -- the number of files in which this branch
- was purely a non-trunk default branch containing exactly one
- revision.
-
- pure_ntdb_count -- the number of files in which this branch was
- purely a non-trunk default branch (consisting only of
- non-trunk default branch revisions).
-
- branch_blockers -- a set of Symbol instances for any symbols that
- sprout from a branch with this name.
-
- possible_parents -- a map {LineOfDevelopment : count} indicating
- in how many files each LOD could have served as the parent of
- self.lod."""
-
- def __init__(self, lod):
- self.lod = lod
- self.tag_create_count = 0
- self.branch_create_count = 0
- self.branch_commit_count = 0
- self.branch_blockers = set()
- self.trivial_import_count = 0
- self.pure_ntdb_count = 0
- self.possible_parents = { }
-
- def register_tag_creation(self):
- """Register the creation of this lod as a tag."""
-
- self.tag_create_count += 1
-
- def register_branch_creation(self):
- """Register the creation of this lod as a branch."""
-
- self.branch_create_count += 1
-
- def register_branch_commit(self):
- """Register that there were commit(s) on this branch in one file."""
-
- self.branch_commit_count += 1
-
- def register_branch_blocker(self, blocker):
- """Register BLOCKER as preventing this symbol from being deleted.
-
- BLOCKER is a tag or a branch that springs from a revision on this
- symbol."""
-
- self.branch_blockers.add(blocker)
-
- def register_trivial_import(self):
- """Register that this branch is a trivial import branch in one file."""
-
- self.trivial_import_count += 1
-
- def register_pure_ntdb(self):
- """Register that this branch is a pure import branch in one file."""
-
- self.pure_ntdb_count += 1
-
- def register_possible_parent(self, lod):
- """Register that LOD was a possible parent for SELF.lod in a file."""
-
- self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1
-
- def register_branch_possible_parents(self, cvs_branch, cvs_file_items):
- """Register any possible parents of this symbol from CVS_BRANCH."""
-
- # This routine is a bottleneck. So we define some local variables
- # to speed up access to frequently-needed variables.
- register = self.register_possible_parent
- parent_cvs_rev = cvs_file_items[cvs_branch.source_id]
-
- # The "obvious" parent of a branch is the branch holding the
- # revision where the branch is rooted:
- register(parent_cvs_rev.lod)
-
- # Any other branches that are rooted at the same revision and
- # were committed earlier than the branch are also possible
- # parents:
- symbol = cvs_branch.symbol
- for branch_id in parent_cvs_rev.branch_ids:
- parent_symbol = cvs_file_items[branch_id].symbol
- # A branch cannot be its own parent, nor can a branch's
- # parent be a branch that was created after it. So we stop
- # iterating when we reached the branch whose parents we are
- # collecting:
- if parent_symbol == symbol:
- break
- register(parent_symbol)
-
- def register_tag_possible_parents(self, cvs_tag, cvs_file_items):
- """Register any possible parents of this symbol from CVS_TAG."""
-
- # This routine is a bottleneck. So use local variables to speed
- # up access to frequently-needed objects.
- register = self.register_possible_parent
- parent_cvs_rev = cvs_file_items[cvs_tag.source_id]
-
- # The "obvious" parent of a tag is the branch holding the
- # revision where the branch is rooted:
- register(parent_cvs_rev.lod)
-
- # Branches that are rooted at the same revision are also
- # possible parents:
- for branch_id in parent_cvs_rev.branch_ids:
- parent_symbol = cvs_file_items[branch_id].symbol
- register(parent_symbol)
-
- def is_ghost(self):
- """Return True iff this lod never really existed."""
-
- return (
- not isinstance(self.lod, Trunk)
- and self.branch_commit_count == 0
- and not self.branch_blockers
- and not self.possible_parents
- )
-
- def check_valid(self, symbol):
- """Check whether SYMBOL is a valid conversion of SELF.lod.
-
- It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL
- is a TypedSymbol and that the information that it contains is
- consistent with that stored in SELF.lod. (This routine does not
- do higher-level tests of whether the chosen conversion is actually
- sensible.) If there are any problems, raise a
- SymbolPlanException."""
-
- if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)):
- raise IndeterminateSymbolException(self, symbol)
-
- if symbol.id != self.lod.id:
- raise SymbolPlanException(self, symbol, 'IDs must match')
-
- if symbol.project != self.lod.project:
- raise SymbolPlanException(self, symbol, 'Projects must match')
-
- if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name:
- raise SymbolPlanException(self, symbol, 'Names must match')
-
- def check_preferred_parent_allowed(self, symbol):
- """Check that SYMBOL's preferred_parent_id is an allowed parent.
-
- SYMBOL is the planned conversion of SELF.lod. Verify that its
- preferred_parent_id is a possible parent of SELF.lod. If not,
- raise a SymbolPlanException describing the problem."""
-
- if isinstance(symbol, IncludedSymbol) \
- and symbol.preferred_parent_id is not None:
- for pp in self.possible_parents.keys():
- if pp.id == symbol.preferred_parent_id:
- return
- else:
- raise SymbolPlanException(
- self, symbol,
- 'The selected parent is not among the symbol\'s '
- 'possible parents.'
- )
-
- def __str__(self):
- return (
- '\'%s\' is '
- 'a tag in %d files, '
- 'a branch in %d files, '
- 'a trivial import in %d files, '
- 'a pure import in %d files, '
- 'and has commits in %d files'
- % (self.lod, self.tag_create_count, self.branch_create_count,
- self.trivial_import_count, self.pure_ntdb_count,
- self.branch_commit_count)
- )
-
- def __repr__(self):
- retval = ['%s\n possible parents:\n' % (self,)]
- parent_counts = self.possible_parents.items()
- parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
- for (symbol, count) in parent_counts:
- if isinstance(symbol, Trunk):
- retval.append(' trunk : %d\n' % count)
- else:
- retval.append(' \'%s\' : %d\n' % (symbol.name, count))
- if self.branch_blockers:
- blockers = list(self.branch_blockers)
- blockers.sort()
- retval.append(' blockers:\n')
- for blocker in blockers:
- retval.append(' \'%s\'\n' % (blocker,))
- return ''.join(retval)
-
-
-class SymbolStatisticsCollector:
- """Collect statistics about lines of development.
-
- Record a summary of information about each line of development in
- the RCS files for later storage into a database. The database is
- created in CollectRevsPass and it is used in CollateSymbolsPass (via
- the SymbolStatistics class).
-
- collect_data._SymbolDataCollector inserts information into instances
- of this class by by calling its register_*() methods.
-
- Its main purpose is to assist in the decisions about which symbols
- can be treated as branches and tags and which may be excluded.
-
- The data collected by this class can be written to the file
- config.SYMBOL_STATISTICS."""
-
- def __init__(self):
- # A map { lod -> _Stats } for all lines of development:
- self._stats = { }
-
- def __getitem__(self, lod):
- """Return the _Stats record for line of development LOD.
-
- Create and register a new one if necessary."""
-
- try:
- return self._stats[lod]
- except KeyError:
- stats = _Stats(lod)
- self._stats[lod] = stats
- return stats
-
- def register(self, cvs_file_items):
- """Register the statistics for each symbol in CVS_FILE_ITEMS."""
-
- for lod_items in cvs_file_items.iter_lods():
- if lod_items.lod is not None:
- branch_stats = self[lod_items.lod]
-
- branch_stats.register_branch_creation()
-
- if lod_items.cvs_revisions:
- branch_stats.register_branch_commit()
-
- if lod_items.is_trivial_import():
- branch_stats.register_trivial_import()
-
- if lod_items.is_pure_ntdb():
- branch_stats.register_pure_ntdb()
-
- for cvs_symbol in lod_items.iter_blockers():
- branch_stats.register_branch_blocker(cvs_symbol.symbol)
-
- if lod_items.cvs_branch is not None:
- branch_stats.register_branch_possible_parents(
- lod_items.cvs_branch, cvs_file_items
- )
-
- for cvs_tag in lod_items.cvs_tags:
- tag_stats = self[cvs_tag.symbol]
-
- tag_stats.register_tag_creation()
-
- tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items)
-
- def purge_ghost_symbols(self):
- """Purge any symbols that don't have any activity.
-
- Such ghost symbols can arise if a symbol was defined in an RCS
- file but pointed at a non-existent revision."""
-
- for stats in self._stats.values():
- if stats.is_ghost():
- Log().warn('Deleting ghost symbol: %s' % (stats.lod,))
- del self._stats[stats.lod]
-
- def close(self):
- """Store the stats database to the SYMBOL_STATISTICS file."""
-
- f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb')
- cPickle.dump(self._stats.values(), f, -1)
- f.close()
- self._stats = None
-
-
-class SymbolStatistics:
- """Read and handle line of development statistics.
-
- The statistics are read from a database created by
- SymbolStatisticsCollector. This class has methods to process the
- statistics information and help with decisions about:
-
- 1. What tags and branches should be processed/excluded
-
- 2. What tags should be forced to be branches and vice versa (this
- class maintains some statistics to help the user decide)
-
- 3. Are there inconsistencies?
-
- - A symbol that is sometimes a branch and sometimes a tag
-
- - A forced branch with commit(s) on it
-
- - A non-excluded branch depends on an excluded branch
-
- The data in this class is read from a pickle file."""
-
- def __init__(self, filename):
- """Read the stats database from FILENAME."""
-
- # A map { LineOfDevelopment -> _Stats } for all lines of
- # development:
- self._stats = { }
-
- # A map { LineOfDevelopment.id -> _Stats } for all lines of
- # development:
- self._stats_by_id = { }
-
- stats_list = cPickle.load(open(filename, 'rb'))
-
- for stats in stats_list:
- self._stats[stats.lod] = stats
- self._stats_by_id[stats.lod.id] = stats
-
- def __len__(self):
- return len(self._stats)
-
- def __getitem__(self, lod_id):
- return self._stats_by_id[lod_id]
-
- def get_stats(self, lod):
- """Return the _Stats object for LineOfDevelopment instance LOD.
-
- Raise KeyError if no such lod exists."""
-
- return self._stats[lod]
-
- def __iter__(self):
- return self._stats.itervalues()
-
- def _check_blocked_excludes(self, symbol_map):
- """Check for any excluded LODs that are blocked by non-excluded symbols.
-
- If any are found, describe the problem to Log().error() and raise
- a FatalException."""
-
- # A list of (lod,[blocker,...]) tuples for excludes that are
- # blocked by the specified non-excluded blockers:
- problems = []
-
- for lod in symbol_map.itervalues():
- if isinstance(lod, ExcludedSymbol):
- # Symbol is excluded; make sure that its blockers are also
- # excluded:
- lod_blockers = []
- for blocker in self.get_stats(lod).branch_blockers:
- if isinstance(symbol_map.get(blocker, None), IncludedSymbol):
- lod_blockers.append(blocker)
- if lod_blockers:
- problems.append((lod, lod_blockers))
-
- if problems:
- s = []
- for (lod, lod_blockers) in problems:
- s.append(
- '%s: %s cannot be excluded because the following symbols '
- 'depend on it:\n'
- % (error_prefix, lod,)
- )
- for blocker in lod_blockers:
- s.append(' %s\n' % (blocker,))
- s.append('\n')
- Log().error(''.join(s))
-
- raise FatalException()
-
- def _check_invalid_tags(self, symbol_map):
- """Check for commits on any symbols that are to be converted as tags.
-
- SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
- indicating how each AbstractSymbol is to be converted. If there
- is a commit on a symbol, then it cannot be converted as a tag. If
- any tags with commits are found, output error messages describing
- the problems then raise a FatalException."""
-
- Log().quiet("Checking for forced tags with commits...")
-
- invalid_tags = [ ]
- for symbol in symbol_map.itervalues():
- if isinstance(symbol, Tag):
- stats = self.get_stats(symbol)
- if stats.branch_commit_count > 0:
- invalid_tags.append(symbol)
-
- if not invalid_tags:
- # No problems found:
- return
-
- s = []
- s.append(
- '%s: The following branches cannot be forced to be tags '
- 'because they have commits:\n'
- % (error_prefix,)
- )
- for tag in invalid_tags:
- s.append(' %s\n' % (tag.name))
- s.append('\n')
- Log().error(''.join(s))
-
- raise FatalException()
-
- def check_consistency(self, symbol_map):
- """Check the plan for how to convert symbols for consistency.
-
- SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
- indicating how each AbstractSymbol is to be converted. If any
- problems are detected, describe the problem to Log().error() and
- raise a FatalException."""
-
- # We want to do all of the consistency checks even if one of them
- # fails, so that the user gets as much feedback as possible. Set
- # this variable to True if any errors are found.
- error_found = False
-
- # Check that the planned preferred parents are OK for all
- # IncludedSymbols:
- for lod in symbol_map.itervalues():
- if isinstance(lod, IncludedSymbol):
- stats = self.get_stats(lod)
- try:
- stats.check_preferred_parent_allowed(lod)
- except SymbolPlanException, e:
- Log().error('%s\n' % (e,))
- error_found = True
-
- try:
- self._check_blocked_excludes(symbol_map)
- except FatalException:
- error_found = True
-
- try:
- self._check_invalid_tags(symbol_map)
- except FatalException:
- error_found = True
-
- if error_found:
- raise FatalException(
- 'Please fix the above errors and restart CollateSymbolsPass'
- )
-
- def exclude_symbol(self, symbol):
- """SYMBOL has been excluded; remove it from our statistics."""
-
- del self._stats[symbol]
- del self._stats_by_id[symbol.id]
-
- # Remove references to this symbol from other statistics objects:
- for stats in self._stats.itervalues():
- stats.branch_blockers.discard(symbol)
- if symbol in stats.possible_parents:
- del stats.possible_parents[symbol]
-
-