Commit Diff
Commit:
de557185bc5f862ef773205dd8fb0f659bb4c1c2
From:
Omar Polo <op@omarpolo.com>
Date:
Fri Aug 26 22:03:55 2022 UTC
Message:
refactor the parsing into the module change the mscan format string to siplify the parsing, and refactor the parsing code into the module for reuse. pe is an exception in that it doesn't care about the format string, it just need to decide if the current line starts a new thread or not, that's why it doesn't use the newly introduced `parse'. The new format simplifies pe too though.
commit - 8d17053dd8b71f5e7ba5547ebd2e758a0a317c7d
commit + de557185bc5f862ef773205dd8fb0f659bb4c1c2
blob - d622e78e8616b12ca64ef45ad3f817d1dc4c524c
blob + 4647a8cdcd53ce3902d0d9dee9a35eccda10a227
--- GotMArc.pm
+++ GotMArc.pm
@@ -5,7 +5,7 @@ our @EXPORT_OK = qw($logo san mid2path initpage endpag
use Exporter;
our @ISA = qw(Exporter);
-our @EXPORT_OK = qw($logo san mid2path initpage endpage);
+our @EXPORT_OK = qw($logo san parse initpage endpage);
our $logo = <<'EOF';
<a href="https://gameoftrees.org" target="_blank">
@@ -23,6 +23,13 @@ sub mid2path {
return $str;
}
+sub ssan {
+ my $str = shift;
+ $str =~ s/\s+/ /g;
+ $str =~ s/\s+$//;
+ return san($str);
+}
+
sub mid2path {
my $mid = shift;
$mid =~ s,_,__,g;
@@ -30,6 +37,26 @@ my $hdr = do {
return $mid;
}
+sub parse {
+ my ($indent, $fname, $mid, $date, $from, $subj) = m{
+ ^([^-]*)- # the indent level
+ ([^ ]+)\s # filename
+ <([^>]+)> # message id
+ (\d{4}-\d\d-\d\d[ ]\d\d:\d\d) # date
+ <([^>]+)> # from
+ (.*) # subject
+ }x or die "can't parse: $_";
+
+ my $level = length($indent);
+ $level = 10 if $indent =~ m/\.\.\d+\.\./;
+
+ $mid = mid2path($mid);
+ $from = ssan($from);
+ $subj = ssan($subj);
+
+ return ($level, $fname, $mid, $date, $from, $subj);
+}
+
my $hdr = do {
local $/ = undef;
open my $fh, "<", "head.html"
blob - 5814ad82e296cf9df0ffd397c339e695d1db7f5b
blob + 7f12b242345ac2f79761c623f68e0678b800c617
--- gotmarc
+++ gotmarc
@@ -8,5 +8,5 @@ fmt='%R %I %i %16D <%64f> %128S'
: ${MDIR:?not defined}
-fmt='%R %I %i %16D <%64f> %128S'
+fmt='%i-%R %I%16D<%64f>%128S'
mlist "${MDIR}" | mthread -r | mscan -f "$fmt" | ./pe | ./mkindex
blob - 9fabbaad140e0cab1ec14af09b8ee4773040d49a
blob + cada50b35df5cddd491c36eecb6a107ede0c199f
--- mexp
+++ mexp
@@ -10,7 +10,7 @@ use GotMArc qw(san $logo mid2path initpage endpage);
use OpenBSD::Unveil;
use lib ".";
-use GotMArc qw(san $logo mid2path initpage endpage);
+use GotMArc qw(parse san $logo initpage endpage);
my $outdir = $ENV{'OUTDIR'};
die 'Set $OUTDIR' unless defined $outdir;
@@ -22,20 +22,8 @@ while (<>) {
my $tid;
while (<>) {
- chomp;
+ my ($level, $fname, $mid, $date, $from, $subj) = parse;
- m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "can't parse: $_" unless defined $1;
- my ($fname, $mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5, $6);
- $subj = san($subj);
- $subj =~ s/\s+/ /g;
- $subj =~ s/\s+$//;
-
- $mid = mid2path($mid);
-
- my $level = length($indent) - 1;
- $level = 10 if $indent =~ m/\.\.\d{2}\.\./;
-
$tid = $mid if $level == 0;
die "unknown tid" unless defined $tid;
blob - 167d41611cd231cec63d2ab50fd0ec932f914ad6
blob + 81bfc636e650fd068a9da16d825ea5928170c6a4
--- mkindex
+++ mkindex
@@ -10,7 +10,7 @@ use GotMArc qw($logo san mid2path initpage endpage);
use OpenBSD::Unveil;
use lib ".";
-use GotMArc qw($logo san mid2path initpage endpage);
+use GotMArc qw($logo parse san initpage endpage);
my $outdir = $ENV{'OUTDIR'};
die 'Set $OUTDIR' unless defined $outdir;
@@ -173,23 +173,8 @@ while (<>) {
nextfile;
while (<>) {
- chomp;
- m/^[^ ]+ <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "woops; $_\n" unless defined $1;
+ my ($level, $fname, $mid, $date, $from, $subj) = parse;
- my ($mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5);
- $from =~ s/\s+$//;
- $from = san($from);
- $subj = san($subj);
-
- my $level = length($indent) - 1;
- $level = 10 if $indent =~ m/\.\.\d{2}\.\./;
-
- $mid = mid2path($mid);
-
- $subj =~ s/\s+/ /g;
- $subj =~ s/\s+$//g;
-
if ($level == 0) {
nextthread $mid, $subj;
blob - 594f95b5d36a53df83fcebda3b7b8e6a9a41e2d9
blob + 72fd7a73aca75acbdf27126a43654e1dc12825db
--- pe
+++ pe
@@ -21,7 +21,7 @@ sub process {
die "poll: $!" if $poll->poll() == -1;
my @handles = $poll->handles(POLLOUT) or die "no procs ready?";
my $handle = $handles[int(rand(@handles))];
- say $handle $_ foreach @_;
+ print $handle $_ foreach @_;
}
unveil("./mexp", "rx") or die "unveil mexp: $!";
@@ -30,15 +30,9 @@ while (<>) {
my @thread;
while (<>) {
print; # continue the pipeline
- chomp;
- m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "can't parse: $_" unless defined $1;
-
- my $level = length($3) - 1;
- $level = 10 if $3 =~ m/\.\.\d{2}\.\./;
-
- if ($level == 0 && @thread) {
+ my $new_thread = m/^-/;
+ if ($new_thread && @thread) {
process @thread;
@thread = ();
}
Omar Polo