Commit Diff
Commit:
de557185bc5f862ef773205dd8fb0f659bb4c1c2
Date:
Fri Aug 26 22:03:55 2022
UTC
Message:
refactor the parsing into the module
change the mscan format string to siplify the parsing, and refactor
the parsing code into the module for reuse.
pe is an exception in that it doesn't care about the format string, it
just need to decide if the current line starts a new thread or not,
that's why it doesn't use the newly introduced `parse'. The new
format simplifies pe too though.
--- GotMArc.pm
+++ GotMArc.pm
@@ -5,7 +5,7 @@ our @EXPORT_OK = qw($logo san mid2path initpage endpag
use Exporter;
our @ISA = qw(Exporter);
-our @EXPORT_OK = qw($logo san mid2path initpage endpage);
+our @EXPORT_OK = qw($logo san parse initpage endpage);
our $logo = <<'EOF';
<a href="https://gameoftrees.org" target="_blank">
@@ -23,6 +23,13 @@ sub mid2path {
return $str;
}
+sub ssan {
+ my $str = shift;
+ $str =~ s/\s+/ /g;
+ $str =~ s/\s+$//;
+ return san($str);
+}
+
sub mid2path {
my $mid = shift;
$mid =~ s,_,__,g;
@@ -30,6 +37,26 @@ my $hdr = do {
return $mid;
}
+sub parse {
+ my ($indent, $fname, $mid, $date, $from, $subj) = m{
+ ^([^-]*)- # the indent level
+ ([^ ]+)\s # filename
+ <([^>]+)> # message id
+ (\d{4}-\d\d-\d\d[ ]\d\d:\d\d) # date
+ <([^>]+)> # from
+ (.*) # subject
+ }x or die "can't parse: $_";
+
+ my $level = length($indent);
+ $level = 10 if $indent =~ m/\.\.\d+\.\./;
+
+ $mid = mid2path($mid);
+ $from = ssan($from);
+ $subj = ssan($subj);
+
+ return ($level, $fname, $mid, $date, $from, $subj);
+}
+
my $hdr = do {
local $/ = undef;
open my $fh, "<", "head.html"
--- gotmarc
+++ gotmarc
@@ -8,5 +8,5 @@ fmt='%R %I %i %16D <%64f> %128S'
: ${MDIR:?not defined}
-fmt='%R %I %i %16D <%64f> %128S'
+fmt='%i-%R %I%16D<%64f>%128S'
mlist "${MDIR}" | mthread -r | mscan -f "$fmt" | ./pe | ./mkindex
--- mexp
+++ mexp
@@ -10,7 +10,7 @@ use GotMArc qw(san $logo mid2path initpage endpage);
use OpenBSD::Unveil;
use lib ".";
-use GotMArc qw(san $logo mid2path initpage endpage);
+use GotMArc qw(parse san $logo initpage endpage);
my $outdir = $ENV{'OUTDIR'};
die 'Set $OUTDIR' unless defined $outdir;
@@ -22,20 +22,8 @@ while (<>) {
my $tid;
while (<>) {
- chomp;
+ my ($level, $fname, $mid, $date, $from, $subj) = parse;
- m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "can't parse: $_" unless defined $1;
- my ($fname, $mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5, $6);
- $subj = san($subj);
- $subj =~ s/\s+/ /g;
- $subj =~ s/\s+$//;
-
- $mid = mid2path($mid);
-
- my $level = length($indent) - 1;
- $level = 10 if $indent =~ m/\.\.\d{2}\.\./;
-
$tid = $mid if $level == 0;
die "unknown tid" unless defined $tid;
--- mkindex
+++ mkindex
@@ -10,7 +10,7 @@ use GotMArc qw($logo san mid2path initpage endpage);
use OpenBSD::Unveil;
use lib ".";
-use GotMArc qw($logo san mid2path initpage endpage);
+use GotMArc qw($logo parse san initpage endpage);
my $outdir = $ENV{'OUTDIR'};
die 'Set $OUTDIR' unless defined $outdir;
@@ -173,23 +173,8 @@ while (<>) {
nextfile;
while (<>) {
- chomp;
- m/^[^ ]+ <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "woops; $_\n" unless defined $1;
+ my ($level, $fname, $mid, $date, $from, $subj) = parse;
- my ($mid, $indent, $date, $from, $subj) = ($1, $2, $3, $4, $5);
- $from =~ s/\s+$//;
- $from = san($from);
- $subj = san($subj);
-
- my $level = length($indent) - 1;
- $level = 10 if $indent =~ m/\.\.\d{2}\.\./;
-
- $mid = mid2path($mid);
-
- $subj =~ s/\s+/ /g;
- $subj =~ s/\s+$//g;
-
if ($level == 0) {
nextthread $mid, $subj;
--- pe
+++ pe
@@ -21,7 +21,7 @@ sub process {
die "poll: $!" if $poll->poll() == -1;
my @handles = $poll->handles(POLLOUT) or die "no procs ready?";
my $handle = $handles[int(rand(@handles))];
- say $handle $_ foreach @_;
+ print $handle $_ foreach @_;
}
unveil("./mexp", "rx") or die "unveil mexp: $!";
@@ -30,15 +30,9 @@ while (<>) {
my @thread;
while (<>) {
print; # continue the pipeline
- chomp;
- m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
- die "can't parse: $_" unless defined $1;
-
- my $level = length($3) - 1;
- $level = 10 if $3 =~ m/\.\.\d{2}\.\./;
-
- if ($level == 0 && @thread) {
+ my $new_thread = m/^-/;
+ if ($new_thread && @thread) {
process @thread;
@thread = ();
}
Omar Polo