#!/usr/bin/perl use List::MoreUtils qw(uniq sort_by); use LaTeX::Encode qw(latex_encode); use Data::Dumper; # four files: # - mlr: input file, unsorted list of messages, format "hash:listname" # generated by LaTeX, can contain duplicates # - mlf: cache of message senders; format: one message per two lines # * first line "hash:listname" # * second line message sender # maintained by this script, can be deleted any time but needs a.g.o # access for recreation # - mls: cache of message subjects; format: one message per two lines # * first line "hash:listname" # * second line message subject # maintained by this script, can be deleted any time but needs a.g.o # access for recreation # - mlt: cache of message dates; format: one message per two lines # * first line "hash:listname" # * second line message date # maintained by this script, can be deleted any time but needs a.g.o # access for recreation # - mld: output file, LaTeX fragment that can be sourced sub getdata { my $list=shift; my $hash=shift; open (my $web, '-|:encoding(UTF-8)', "wget -O - https://archives.gentoo.org/$list/message/$hash"); my $line, $from, $subject, $date; until ($line=~/From:/) { $line=<$web>; chomp $line; } $from=<$web>; chomp $from; $from=~s/^.*//; $from=~s/ <.*$//; $from=~s/"//g; until ($line=~/>Subject:; chomp $line; } $subject=<$web>; chomp $subject; $subject=~s/^.*//; $subject=~s/<\/strong><\/td>.*$//; until ($line=~/>Date:; chomp $line; } $date=<$web>; chomp $date; $date=~s/^.*//; $date=~s/<\/td>.*$//; return ($from, $subject, $date); }; # Main code start # Read the list of messages referenced in the LaTeX file. open my $mlr, '<', "decisions.mlr"; my @mlrlist = <$mlr>; close $mlr; chomp @mlrlist; my @messages=uniq sort_by { $_ } @mlrlist; # Read the cache of message senders; this file can be deleted, but recreating or # updating it requires internet access. open my $mlf, '<', "decisions.mlf"; my @mlflist = <$mlf>; close $mlf; chomp @mlflist; my %messagefrom=@mlflist; # Read the cache of message subjects; this file can be deleted, but recreating or # updating it requires internet access. open my $mls, '<', "decisions.mls"; my @mlslist = <$mls>; close $mls; chomp @mlslist; my %messagesubject=@mlslist; # Read the cache of message times; this file can be deleted, but recreating or # updating it requires internet access. open my $mlt, '<', "decisions.mlt"; my @mltlist = <$mlt>; close $mlt; chomp @mltlist; my %messagedate=@mltlist; # Loop through the referenced messages, check if we already have the data, # and if not fetch and add it. foreach(@messages) { my $msg=$_; my ($msghash, $msglist) = split /:/,$msg,2; print "List $msglist, hash $msghash\n"; if ($messagesubject{$msg}) { print " Sender is \"$messagefrom{$msg}\"\n"; print " Subject is \"$messagesubject{$msg}\"\n"; print " Date is \"$messagedate{$msg}\"\n"; } else { print " Data not yet available, fetching it\n"; my ($from, $subject, $date) = getdata($msglist, $msghash); $messagefrom{$msg}=$from; $messagesubject{$msg}=$subject; $messagedate{$msg}=$date; print " Sender is \"$messagefrom{$msg}\"\n"; print " Subject is \"$messagesubject{$msg}\"\n"; print " Date is \"$messagedate{$msg}\"\n"; }; }; # Write out the from cache again. my @mlflistnew= map { $_ => $messagefrom{$_} } sort keys %messagefrom; open my $mlf, '>', "decisions.mlf"; print $mlf "$_\n" for @mlflistnew; close $mlf; # Write out the subject cache again. my @mlslistnew= map { $_ => $messagesubject{$_} } sort keys %messagesubject; open my $mls, '>', "decisions.mls"; print $mls "$_\n" for @mlslistnew; close $mls; # Write out the date cache again. my @mltlistnew= map { $_ => $messagedate{$_} } sort keys %messagedate; open my $mlt, '>', "decisions.mlt"; print $mlt "$_\n" for @mltlistnew; close $mlt; # Write out the TeX input file open my $mld, '>', "decisions.mld"; print $mld '\renewcommand{\gentoomailfrom}[1]{%'."\n"; foreach(@messages) { my $msg=$_; my ($msghash, $msglist) = split /:/,$msg,2; my $from=$messagefrom{$msg}; print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$from.'}}{}%'."\n"; }; print $mld '}'."\n"; print $mld '\renewcommand{\gentoomailsubject}[1]{%'."\n"; foreach(@messages) { my $msg=$_; my ($msghash, $msglist) = split /:/,$msg,2; my $subject=latex_encode($messagesubject{$msg}); print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$subject.'}}{}%'."\n"; }; print $mld '}'."\n"; print $mld '\renewcommand{\gentoomaildate}[1]{%'."\n"; foreach(@messages) { my $msg=$_; my ($msghash, $msglist) = split /:/,$msg,2; my $date=latex_encode($messagedate{$msg}); print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$date.'}}{}%'."\n"; }; print $mld '}'."\n"; close $mld;