Commit Diff
Commit:
27363023e413886fbe0851562fd1f02be2467d98
Date:
Thu Aug 25 08:58:12 2022
UTC
Message:
parallelize exporting
mexp is the slowest part of the pipeline. Try to speed it up by
paralellizing it.
this adds a `pe' scripts that sits in front on N mexp childs and
dispatch threads to them, while still outputting the unchanged lines
for mkindex.
Unscientific testing has shown that this effectively reduces the time
for a full export, even if not linearly. (4 jobs cut the time in half,
8 jobs was just barely faster)
--- Makefile
+++ Makefile
@@ -8,7 +8,7 @@ all: .mblaze dirs assets
all: .mblaze dirs assets
mlist '${MDIR}' | mthread -r | \
${ENV} mscan -f '%R %I %i %16D <%64f> %128S' | \
- ${ENV} ./mexp | ${ENV} ./mkindex
+ ${ENV} ./pe | ${ENV} ./mkindex
gzip:
gzip -fkr ${OUTDIR}/
--- mexp
+++ mexp
@@ -60,7 +60,6 @@ while (<>) {
my $tid;
while (<>) {
chomp;
- say; # continue the pipeline
m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
die "can't parse: $_" unless defined $1;
--- /dev/null
+++ pe
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+
+use open ":std", ":encoding(UTF-8)";
+use strict;
+use warnings;
+use v5.32;
+use IO::Poll qw(POLLOUT);
+
+my $jobs = $ENV{'MAKE_JOBS'} // 1;
+
+my $poll = IO::Poll->new();
+for (1..$jobs) {
+ say STDERR "pe: spawning job #$_";
+ open(my $kid, '|-', './mexp') or die "can't exec ./mexp: $!";
+ $poll->mask($kid => POLLOUT);
+}
+
+sub process {
+ die "poll: $!" if $poll->poll() == -1;
+ my @handles = $poll->handles(POLLOUT) or die "no procs ready?";
+ my $handle = $handles[int(rand(@handles))];
+ say $handle $_ foreach @_;
+}
+
+my @thread;
+while (<>) {
+ print; # continue the pipeline
+ chomp;
+
+ m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/;
+ die "can't parse: $_" unless defined $1;
+
+ my $level = length($3) - 1;
+ $level = 10 if $3 =~ m/\.\.\d{2}\.\./;
+
+ if ($level == 0 && @thread) {
+ process @thread;
+ @thread = ();
+ }
+
+ push @thread, $_;
+}
+
+process @thread if @thread;
Omar Polo