hollick-mfu.pl


#! perl
#
# Master File Update 
# Arg1: file to update
# Arg2: keys to update

use 5.040;

use Getopt::Declare;
use File::Slurp;
use Data::Dump qw/pp/;

our $MasterFile;
our $verbose;
$verbose = 0;
our ($range_start, $range_end);

my $specification = q(
		-i <ext>	Process in place, back up to originalfilename.ext
						Unimplemented
		-O <file>	Output file
						Unimplemented
		-M <file>	Masterfile to update
						{ our $MasterFile; $MasterFile=$file;}
		-F <file>...	Process named file(s)
						{ defer {for (@file) {load()}} }
		--verbose  	Give more info
						{ our $verbose; $verbose++ ; }
		--range-start <pattern>	Regular expression of line to start processing after
						{ our $range_start; $range_start = qr/$pattern/ or die; }
		--range-end <pattern>	Regular expression of line to end processing before
						{ our $range_end; $range_end = qr/$pattern/ or die; }
);

## could have load() take one of 'C', 'R', 'U', 'D' and have 
## transaction files be CSV for generality

my $args = Getopt::Declare->new($specification);

say "MasterFile='$MasterFile'" if $MasterFile;

# Save keys in transaction files 
# rule: if multiple txns for same key, order provided across multiple files rules
# therefor needs order preserving sort.
use sort 'stable';              # guarantee stability; it's the default anyway
our @Transactions ; # [ { action => '(C|R|U|D)', data => { key=>val, ...} } ]

sub load () {
	my $filename=$_ or die "no filename on callback";
	say "load(){$filename}" if $verbose > 0;
	my @Lines = read_file($filename);
	chomp for @Lines;
	chomp for @Lines;
	push @Transactions, make_txn($_,$filename) for @Lines;
	}

say "Verbosity=$verbose" if $verbose;

if ($verbose > 1 ){
	say pp($_) for @Transactions;
}
########

# Sort Transactions for sanity. Uses Stability of Sort to prefer first seen.
@Transactions = sort { key_of_txn($a) cmp key_of_txn($b) } @Transactions;
if ($verbose > 0 ){
	say "## sorted##";
	say pp($_) for @Transactions;
}

#### MAIN LOOP
#
open(my $fh, "<", "$MasterFile")
    or die "Can't open < $MasterFile: $!";
# TODO open output file per -O; or tempfile if -i 

###########################
#### these get modified for the files in question
#
# perl bin/hollick-mfu.pl -M New-Englanders-in-the-1600s.md -F hollick-used-names.lst  --range-start '^## Hollick Match Extracts' --range-end '^## END HOLLICK MATCH EXTRACTS' 
# should range start/end be encoded here? IDK.
sub make_txn($line, $filename) {  # do any necessary parsing and semantics of transaction file lines here
		return {action =>'U', data=>{ name => $line}, src=> $filename, } ; }
sub key_of_txn($txn) { return '' unless $txn; return uc $txn->{data}->{name};}
sub key_of_line($str) { return uc $1 if $str =~ m{^ [*] \s+ [*]{0,3} ( [A-Z]+ (?:, \s* [A-Za-z]+ \b ))  }x; return ''; };
sub apply_update($ref_record, $action){  # reference for mutation
	# for current action, any Update is append a checkmark.
	$ref_record->$* =~ s/$/ ✓/;
}
# sub do_create { make new CSV hash by output input columns if using CSV ...}
#################

#
say "range /$range_start/../$range_end/" if $verbose;


sub peek_txn() {return  ($Transactions[0] or undef);}
my $LastMasterKey='';
my $CurrentMasterKey='';
my $n=1;
MASTER: while (<$fh>) {
    ### redo always comes here, which would shatter $n so it's in Continue.
    chomp; chomp;
    if (/$range_start/../$range_end/){
	my $CurrentMasterKey = key_of_line($_);
	warn "# $n: '$CurrentMasterKey': '$_'\n" if $verbose;
	# warn "#  =?='",(key_of_txn(peek_txn())||''),"'\n" if $verbose;
	# warn "#  -> ",pp($Transactions[0]),"\n" if $verbose;
	# warn "## NO KEY ## $n: '$CurrentMasterKey': '$_'";
	next unless $CurrentMasterKey; # just keep comment lines etc; which might not always be correct

	say "($LastMasterKey) > ($CurrentMasterKey)" if 1 == ($LastMasterKey cmp $CurrentMasterKey);
	# We have a Key. Does it compare to current transaction key?
	next unless peek_txn();  # if out of transactions, the rest of master is good to go
	warn "# comparison '",(key_of_txn(peek_txn())), "' cmp '", $CurrentMasterKey , "'\n" if $verbose;
	warn "# comparison:= ", ((key_of_txn(peek_txn()) cmp $CurrentMasterKey) ), "\n" if $verbose;
	if (key_of_txn(peek_txn()) lt $CurrentMasterKey){
		# if ->{action} 'C'reate, with CSV, could insert new record with mapping of columns. __DEFER__
		# else error can't Read Update, Delete a non-matching record
		warn "unmatched Txn:", pp(peek_txn()), "\n";
		shift @Transactions;
		redo MASTER;
	} 
	while (key_of_txn(peek_txn()) eq $CurrentMasterKey) {
		# matches, consume transaction (if two Masters with equal key, arbitrarily apply all txn to first.
		my $txn = shift @Transactions; 
		if ('U' eq $txn->{action}){
			warn "# $n: Applying U\n" if $verbose;
			apply_update(\$_,$txn);
			say "# $n: U: $_" if $verbose;
		}
		elsif ($txn->{action} =~ /[CRD]/){
			warn "Txn. unsupported:", pp($txn);
		}
		else {
			# no action if next transaction is greater than current key
		} 
	} # end WHILE TXNs



    } # end in range
    else {
	    # not in range, nothing to do, pass thru ok
    }

} 
continue {
	### next always comes here
	### Last of this
	say $_ if 1; # unless D ... 
	$LastMasterKey = $CurrentMasterKey;
	$n++;
	# do_something_else;
	# then back to the top to re-check EXPR
}
## last always comes here

# TODO if success, close input, output, then if -i, mv input to input.ext and mov tmp to input

# vim: ai:ts=8:sw=8:ic:noexpandtab: