commit 27363023e413886fbe0851562fd1f02be2467d98 from: Omar Polo date: Thu Aug 25 08:58:12 2022 UTC parallelize exporting mexp is the slowest part of the pipeline. Try to speed it up by paralellizing it. this adds a `pe' scripts that sits in front on N mexp childs and dispatch threads to them, while still outputting the unchanged lines for mkindex. Unscientific testing has shown that this effectively reduces the time for a full export, even if not linearly. (4 jobs cut the time in half, 8 jobs was just barely faster) commit - 627b3fd1799e508f8851aa7b1fa7ca765b07c7db commit + 27363023e413886fbe0851562fd1f02be2467d98 blob - d3504699261e0d94e0c572c7362da74971676e5e blob + 9e32bdbe3a5343243a7ce7a0574317151adfc551 --- Makefile +++ Makefile @@ -8,7 +8,7 @@ OUTDIR = www all: .mblaze dirs assets mlist '${MDIR}' | mthread -r | \ ${ENV} mscan -f '%R %I %i %16D <%64f> %128S' | \ - ${ENV} ./mexp | ${ENV} ./mkindex + ${ENV} ./pe | ${ENV} ./mkindex gzip: gzip -fkr ${OUTDIR}/ blob - cbfc692f602abdaa0d18953060ed2ade7554b340 blob + 96aa35ea2788149a31b580bd13591450f2c74af4 --- mexp +++ mexp @@ -60,7 +60,6 @@ sub genmbox { my $tid; while (<>) { chomp; - say; # continue the pipeline m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/; die "can't parse: $_" unless defined $1; blob - /dev/null blob + 0344ba47d0efb157d63ce5dd59bd7e72258d0a17 (mode 755) --- /dev/null +++ pe @@ -0,0 +1,44 @@ +#!/usr/bin/env perl + +use open ":std", ":encoding(UTF-8)"; +use strict; +use warnings; +use v5.32; +use IO::Poll qw(POLLOUT); + +my $jobs = $ENV{'MAKE_JOBS'} // 1; + +my $poll = IO::Poll->new(); +for (1..$jobs) { + say STDERR "pe: spawning job #$_"; + open(my $kid, '|-', './mexp') or die "can't exec ./mexp: $!"; + $poll->mask($kid => POLLOUT); +} + +sub process { + die "poll: $!" if $poll->poll() == -1; + my @handles = $poll->handles(POLLOUT) or die "no procs ready?"; + my $handle = $handles[int(rand(@handles))]; + say $handle $_ foreach @_; +} + +my @thread; +while (<>) { + print; # continue the pipeline + chomp; + + m/^([^ ]+) <([^>]+)> (.+)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) <([^>]+)> (.*)/; + die "can't parse: $_" unless defined $1; + + my $level = length($3) - 1; + $level = 10 if $3 =~ m/\.\.\d{2}\.\./; + + if ($level == 0 && @thread) { + process @thread; + @thread = (); + } + + push @thread, $_; +} + +process @thread if @thread;