commit de557185bc5f862ef773205dd8fb0f659bb4c1c2 from: Omar Polo date: Fri Aug 26 22:03:55 2022 UTC refactor the parsing into the module change the mscan format string to siplify the parsing, and refactor the parsing code into the module for reuse. pe is an exception in that it doesn't care about the format string, it just need to decide if the current line starts a new thread or not, that's why it doesn't use the newly introduced `parse'. The new format simplifies pe too though. commit - 8d17053dd8b71f5e7ba5547ebd2e758a0a317c7d commit + de557185bc5f862ef773205dd8fb0f659bb4c1c2 blob - d622e78e8616b12ca64ef45ad3f817d1dc4c524c blob + 4647a8cdcd53ce3902d0d9dee9a35eccda10a227 --- GotMArc.pm +++ GotMArc.pm @@ -5,7 +5,7 @@ use v5.32; use Exporter; our @ISA = qw(Exporter); -our @EXPORT_OK = qw($logo san mid2path initpage endpage); +our @EXPORT_OK = qw($logo san parse initpage endpage); our $logo = <<'EOF'; @@ -23,6 +23,13 @@ sub san { return $str; } +sub ssan { + my $str = shift; + $str =~ s/\s+/ /g; + $str =~ s/\s+$//; + return san($str); +} + sub mid2path { my $mid = shift; $mid =~ s,_,__,g; @@ -30,6 +37,26 @@ sub mid2path { return $mid; } +sub parse { + my ($indent, $fname, $mid, $date, $from, $subj) = m{ + ^([^-]*)- # the indent level + ([^ ]+)\s # filename + <([^>]+)> # message id + (\d{4}-\d\d-\d\d[ ]\d\d:\d\d) # date + <([^>]+)> # from + (.*) # subject + }x or die "can't parse: $_"; + + my $level = length($indent); + $level = 10 if $indent =~ m/\.\.\d+\.\./; + + $mid = mid2path($mid); + $from = ssan($from); + $subj = ssan($subj); + + return ($level, $fname, $mid, $date, $from, $subj); +} + my $hdr = do { local $/ = undef; open my $fh, "<", "head.html" blob - 5814ad82e296cf9df0ffd397c339e695d1db7f5b blob + 7f12b242345ac2f79761c623f68e0678b800c617 --- gotmarc +++ gotmarc @@ -8,5 +8,5 @@ export OUTDIR="${OUTDIR:-/var/www/marc}" : ${MDIR:?not defined} -fmt='%R %I %i %16D <%64f> %128S' +fmt='%i-%R %I%16D<%64f>%128S' mlist "${MDIR}" | mthread -r | mscan -f "$fmt" | ./pe | ./mkindex blob - 9fabbaad140e0cab1ec14af09b8ee4773040d49a blob + cada50b35df5cddd491c36eecb6a107ede0c199f --- mexp +++ mexp @@ -10,7 +10,7 @@ use OpenBSD::Pledge; use OpenBSD::Unveil; use lib "."; -use GotMArc qw(san $logo mid2path initpage endpage); +use GotMArc qw(parse san $logo initpage endpage); my $outdir = $ENV{'OUTDIR'}; die 'Set $OUTDIR' unless defined $outdir; @@ -22,20 +22,8 @@ pledge("stdio rpath wpath cpath proc exec") or die "pl my $tid; while (<>) { - chomp; + my ($level, $fname, $mid, $date, $from, $subj) = parse; - m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/; - die "can't parse: $_" unless defined $1; - my ($fname, $mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5, $6); - $subj = san($subj); - $subj =~ s/\s+/ /g; - $subj =~ s/\s+$//; - - $mid = mid2path($mid); - - my $level = length($indent) - 1; - $level = 10 if $indent =~ m/\.\.\d{2}\.\./; - $tid = $mid if $level == 0; die "unknown tid" unless defined $tid; blob - 167d41611cd231cec63d2ab50fd0ec932f914ad6 blob + 81bfc636e650fd068a9da16d825ea5928170c6a4 --- mkindex +++ mkindex @@ -10,7 +10,7 @@ use OpenBSD::Pledge; use OpenBSD::Unveil; use lib "."; -use GotMArc qw($logo san mid2path initpage endpage); +use GotMArc qw($logo parse san initpage endpage); my $outdir = $ENV{'OUTDIR'}; die 'Set $OUTDIR' unless defined $outdir; @@ -173,23 +173,8 @@ pledge("stdio rpath wpath cpath") or die "pledge: $!"; nextfile; while (<>) { - chomp; - m/^[^ ]+ <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/; - die "woops; $_\n" unless defined $1; + my ($level, $fname, $mid, $date, $from, $subj) = parse; - my ($mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5); - $from =~ s/\s+$//; - $from = san($from); - $subj = san($subj); - - my $level = length($indent) - 1; - $level = 10 if $indent =~ m/\.\.\d{2}\.\./; - - $mid = mid2path($mid); - - $subj =~ s/\s+/ /g; - $subj =~ s/\s+$//g; - if ($level == 0) { nextthread $mid, $subj; blob - 594f95b5d36a53df83fcebda3b7b8e6a9a41e2d9 blob + 72fd7a73aca75acbdf27126a43654e1dc12825db --- pe +++ pe @@ -21,7 +21,7 @@ sub process { die "poll: $!" if $poll->poll() == -1; my @handles = $poll->handles(POLLOUT) or die "no procs ready?"; my $handle = $handles[int(rand(@handles))]; - say $handle $_ foreach @_; + print $handle $_ foreach @_; } unveil("./mexp", "rx") or die "unveil mexp: $!"; @@ -30,15 +30,9 @@ pledge("stdio proc exec") or die "pledge: $!"; my @thread; while (<>) { print; # continue the pipeline - chomp; - m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/; - die "can't parse: $_" unless defined $1; - - my $level = length($3) - 1; - $level = 10 if $3 =~ m/\.\.\d{2}\.\./; - - if ($level == 0 && @thread) { + my $new_thread = m/^-/; + if ($new_thread && @thread) { process @thread; @thread = (); }