summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'distindex/read-index.pl')
-rw-r--r--distindex/read-index.pl43
1 files changed, 0 insertions, 43 deletions
diff --git a/distindex/read-index.pl b/distindex/read-index.pl
deleted file mode 100644
index c279750..0000000
--- a/distindex/read-index.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-my $filename = "sample.out";
-
-open(my $fh, $filename) or die "could not open $filename";
-
-my %documents;
-while (my $line=<$fh>) {
- $line =~ /File-([^-]+)-([^:]+): ([^\n]*)\n/s;
- my $fileid = $1; # numeric or "dist"
- my $field = $2; # string, non-empty
- my $value = $3; # string, may be empty
- #print "Fileid: ". $fileid . "\n";
- #print "field: ". $field . "\n";
- #print "Value: ". $value . "\n";
-
- if ( ! $documents{$fileid} ) {
- $documents{$fileid} = { $field => $value };
- } else {
- $documents{$fileid}{$field} = $value;
- }
-}
-close($fh);
-
-
-# Fields for indexing.
-
-# our %fields = (
-# distfile => 'text',
-# filename => 'text',
-# isdist => 'UnAnalyzedField',
-# size => 'UnAnalyzedField',
-# mtime => 'UnAnalyzedField',
-# md5 => 'UnAnalyzedField',
-# sha1 => 'UnAnalyzedField',
-# );
-
-# analyzer should simply tokenize filenames by its parts
-# i would split up by [/.-_] at least. technically, using
-# (\W|_|\d) as the class of split characters might be reasonable
-