hollick-mfu.pl
#! perl
#
# Master File Update
# Arg1: file to update
# Arg2: keys to update
use 5.040;
use Getopt::Declare;
use File::Slurp;
use Data::Dump qw/pp/;
our $MasterFile;
our $verbose;
$verbose = 0;
our ($range_start, $range_end);
my $specification = q(
-i <ext> Process in place, back up to originalfilename.ext
Unimplemented
-O <file> Output file
Unimplemented
-M <file> Masterfile to update
{ our $MasterFile; $MasterFile=$file;}
-F <file>... Process named file(s)
{ defer {for (@file) {load()}} }
--verbose Give more info
{ our $verbose; $verbose++ ; }
--range-start <pattern> Regular expression of line to start processing after
{ our $range_start; $range_start = qr/$pattern/ or die; }
--range-end <pattern> Regular expression of line to end processing before
{ our $range_end; $range_end = qr/$pattern/ or die; }
);
## could have load() take one of 'C', 'R', 'U', 'D' and have
## transaction files be CSV for generality
my $args = Getopt::Declare->new($specification);
say "MasterFile='$MasterFile'" if $MasterFile;
# Save keys in transaction files
# rule: if multiple txns for same key, order provided across multiple files rules
# therefor needs order preserving sort.
use sort 'stable'; # guarantee stability; it's the default anyway
our @Transactions ; # [ { action => '(C|R|U|D)', data => { key=>val, ...} } ]
sub load () {
my $filename=$_ or die "no filename on callback";
say "load(){$filename}" if $verbose > 0;
my @Lines = read_file($filename);
chomp for @Lines;
chomp for @Lines;
push @Transactions, make_txn($_,$filename) for @Lines;
}
say "Verbosity=$verbose" if $verbose;
if ($verbose > 1 ){
say pp($_) for @Transactions;
}
########
# Sort Transactions for sanity. Uses Stability of Sort to prefer first seen.
@Transactions = sort { key_of_txn($a) cmp key_of_txn($b) } @Transactions;
if ($verbose > 0 ){
say "## sorted##";
say pp($_) for @Transactions;
}
#### MAIN LOOP
#
open(my $fh, "<", "$MasterFile")
or die "Can't open < $MasterFile: $!";
# TODO open output file per -O; or tempfile if -i
###########################
#### these get modified for the files in question
#
# perl bin/hollick-mfu.pl -M New-Englanders-in-the-1600s.md -F hollick-used-names.lst --range-start '^## Hollick Match Extracts' --range-end '^## END HOLLICK MATCH EXTRACTS'
# should range start/end be encoded here? IDK.
sub make_txn($line, $filename) { # do any necessary parsing and semantics of transaction file lines here
return {action =>'U', data=>{ name => $line}, src=> $filename, } ; }
sub key_of_txn($txn) { return '' unless $txn; return uc $txn->{data}->{name};}
sub key_of_line($str) { return uc $1 if $str =~ m{^ [*] \s+ [*]{0,3} ( [A-Z]+ (?:, \s* [A-Za-z]+ \b )) }x; return ''; };
sub apply_update($ref_record, $action){ # reference for mutation
# for current action, any Update is append a checkmark.
$ref_record->$* =~ s/$/ ✓/;
}
# sub do_create { make new CSV hash by output input columns if using CSV ...}
#################
#
say "range /$range_start/../$range_end/" if $verbose;
sub peek_txn() {return ($Transactions[0] or undef);}
my $LastMasterKey='';
my $CurrentMasterKey='';
my $n=1;
MASTER: while (<$fh>) {
### redo always comes here, which would shatter $n so it's in Continue.
chomp; chomp;
if (/$range_start/../$range_end/){
my $CurrentMasterKey = key_of_line($_);
warn "# $n: '$CurrentMasterKey': '$_'\n" if $verbose;
# warn "# =?='",(key_of_txn(peek_txn())||''),"'\n" if $verbose;
# warn "# -> ",pp($Transactions[0]),"\n" if $verbose;
# warn "## NO KEY ## $n: '$CurrentMasterKey': '$_'";
next unless $CurrentMasterKey; # just keep comment lines etc; which might not always be correct
say "($LastMasterKey) > ($CurrentMasterKey)" if 1 == ($LastMasterKey cmp $CurrentMasterKey);
# We have a Key. Does it compare to current transaction key?
next unless peek_txn(); # if out of transactions, the rest of master is good to go
warn "# comparison '",(key_of_txn(peek_txn())), "' cmp '", $CurrentMasterKey , "'\n" if $verbose;
warn "# comparison:= ", ((key_of_txn(peek_txn()) cmp $CurrentMasterKey) ), "\n" if $verbose;
if (key_of_txn(peek_txn()) lt $CurrentMasterKey){
# if ->{action} 'C'reate, with CSV, could insert new record with mapping of columns. __DEFER__
# else error can't Read Update, Delete a non-matching record
warn "unmatched Txn:", pp(peek_txn()), "\n";
shift @Transactions;
redo MASTER;
}
while (key_of_txn(peek_txn()) eq $CurrentMasterKey) {
# matches, consume transaction (if two Masters with equal key, arbitrarily apply all txn to first.
my $txn = shift @Transactions;
if ('U' eq $txn->{action}){
warn "# $n: Applying U\n" if $verbose;
apply_update(\$_,$txn);
say "# $n: U: $_" if $verbose;
}
elsif ($txn->{action} =~ /[CRD]/){
warn "Txn. unsupported:", pp($txn);
}
else {
# no action if next transaction is greater than current key
}
} # end WHILE TXNs
} # end in range
else {
# not in range, nothing to do, pass thru ok
}
}
continue {
### next always comes here
### Last of this
say $_ if 1; # unless D ...
$LastMasterKey = $CurrentMasterKey;
$n++;
# do_something_else;
# then back to the top to re-check EXPR
}
## last always comes here
# TODO if success, close input, output, then if -i, mv input to input.ext and mov tmp to input
# vim: ai:ts=8:sw=8:ic:noexpandtab: