#!/usr/bin/perl
use List::MoreUtils qw(uniq sort_by);
use LaTeX::Encode qw(latex_encode);
use Data::Dumper;
# four files:
# - mlr: input file, unsorted list of messages, format "hash:listname"
# generated by LaTeX, can contain duplicates
# - mlf: cache of message senders; format: one message per two lines
# * first line "hash:listname"
# * second line message sender
# maintained by this script, can be deleted any time but needs a.g.o
# access for recreation
# - mls: cache of message subjects; format: one message per two lines
# * first line "hash:listname"
# * second line message subject
# maintained by this script, can be deleted any time but needs a.g.o
# access for recreation
# - mlt: cache of message dates; format: one message per two lines
# * first line "hash:listname"
# * second line message date
# maintained by this script, can be deleted any time but needs a.g.o
# access for recreation
# - mld: output file, LaTeX fragment that can be sourced
sub getdata {
my $list=shift;
my $hash=shift;
open (my $web, '-|:encoding(UTF-8)', "wget -O - https://archives.gentoo.org/$list/message/$hash");
my $line, $from, $subject, $date;
until ($line=~/From:/) {
$line=<$web>; chomp $line;
}
$from=<$web>;
chomp $from;
$from=~s/^.*
//;
$from=~s/ <.*$//;
$from=~s/"//g;
until ($line=~/>Subject:) {
$line=<$web>; chomp $line;
}
$subject=<$web>;
chomp $subject;
$subject=~s/^.* | //;
$subject=~s/<\/strong><\/td>.*$//;
until ($line=~/>Date:) {
$line=<$web>; chomp $line;
}
$date=<$web>;
chomp $date;
$date=~s/^.*//;
$date=~s/<\/td>.*$//;
return ($from, $subject, $date);
};
# Main code start
# Read the list of messages referenced in the LaTeX file.
open my $mlr, '<', "decisions.mlr";
my @mlrlist = <$mlr>;
close $mlr;
chomp @mlrlist;
my @messages=uniq sort_by { $_ } @mlrlist;
# Read the cache of message senders; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mlf, '<', "decisions.mlf";
my @mlflist = <$mlf>;
close $mlf;
chomp @mlflist;
my %messagefrom=@mlflist;
# Read the cache of message subjects; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mls, '<', "decisions.mls";
my @mlslist = <$mls>;
close $mls;
chomp @mlslist;
my %messagesubject=@mlslist;
# Read the cache of message times; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mlt, '<', "decisions.mlt";
my @mltlist = <$mlt>;
close $mlt;
chomp @mltlist;
my %messagedate=@mltlist;
# Loop through the referenced messages, check if we already have the data,
# and if not fetch and add it.
foreach(@messages) {
my $msg=$_;
my ($msghash, $msglist) = split /:/,$msg,2;
print "List $msglist, hash $msghash\n";
if ($messagesubject{$msg}) {
print " Sender is \"$messagefrom{$msg}\"\n";
print " Subject is \"$messagesubject{$msg}\"\n";
print " Date is \"$messagedate{$msg}\"\n";
} else {
print " Data not yet available, fetching it\n";
my ($from, $subject, $date) = getdata($msglist, $msghash);
$messagefrom{$msg}=$from;
$messagesubject{$msg}=$subject;
$messagedate{$msg}=$date;
print " Sender is \"$messagefrom{$msg}\"\n";
print " Subject is \"$messagesubject{$msg}\"\n";
print " Date is \"$messagedate{$msg}\"\n";
};
};
# Write out the from cache again.
my @mlflistnew= map { $_ => $messagefrom{$_} } sort keys %messagefrom;
open my $mlf, '>', "decisions.mlf";
print $mlf "$_\n" for @mlflistnew;
close $mlf;
# Write out the subject cache again.
my @mlslistnew= map { $_ => $messagesubject{$_} } sort keys %messagesubject;
open my $mls, '>', "decisions.mls";
print $mls "$_\n" for @mlslistnew;
close $mls;
# Write out the date cache again.
my @mltlistnew= map { $_ => $messagedate{$_} } sort keys %messagedate;
open my $mlt, '>', "decisions.mlt";
print $mlt "$_\n" for @mltlistnew;
close $mlt;
# Write out the TeX input file
open my $mld, '>', "decisions.mld";
print $mld '\renewcommand{\gentoomailfrom}[1]{%'."\n";
foreach(@messages) {
my $msg=$_;
my ($msghash, $msglist) = split /:/,$msg,2;
my $from=$messagefrom{$msg};
print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$from.'}}{}%'."\n";
};
print $mld '}'."\n";
print $mld '\renewcommand{\gentoomailsubject}[1]{%'."\n";
foreach(@messages) {
my $msg=$_;
my ($msghash, $msglist) = split /:/,$msg,2;
my $subject=latex_encode($messagesubject{$msg});
print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$subject.'}}{}%'."\n";
};
print $mld '}'."\n";
print $mld '\renewcommand{\gentoomaildate}[1]{%'."\n";
foreach(@messages) {
my $msg=$_;
my ($msghash, $msglist) = split /:/,$msg,2;
my $date=latex_encode($messagedate{$msg});
print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$date.'}}{}%'."\n";
};
print $mld '}'."\n";
close $mld;
| |