#!/usr/bin/env perl
# License: Public Domain or CC0
# See https://creativecommons.org/publicdomain/zero/1.0/
# The author, Jim Avera (jim.avera at gmail) has waived all copyright and
# related or neighboring rights.  Attribution is requested but is not required.
use strict; use warnings FATAL => 'all'; use 5.010;

{ no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = "dev"; }
our $VERSION = "0.002"; # Future: V ERSION from Dist::Zilla::Plugin::OurPkgVersion
our $DATE = '2023-10-31'; # DATE from Dist::Zilla::Plugin::OurDate

#use 5.12; # for unicode_strings
use v5.18; # for lexical_subs

use strict; use warnings;
use feature qw(switch state say lexical_subs current_sub fc);
use feature qw(unicode_strings unicode_eval evalbytes);
no warnings "experimental::lexical_subs";

use open ':encoding(UTF-8)';
{ use open ':locale'; } # Encode messages for the user's console
STDOUT->autoflush; STDERR->autoflush;

use Carp; $Carp::MaxArgNums = 0;
use Data::Dumper::Interp 6.009 qw/visnew dvis ivis dvisq visq qsh qshlist vis visq/;
use Path::Tiny 0.144;
use File::Spec::Functions qw(tmpdir);
use Text::ParseWords 3.31 qw/parse_line shellwords/;
use Encode qw(decode encode :fallback_all);
use Encode::Locale qw/decode_argv/; # defines encs 'locale_fs' 'console_out'
use FindBin qw/$Bin $Script/;
use Guard qw(guard scope_guard);
use List::Util qw/min max any first/;
use List::MoreUtils qw/indexes firstidx/;
use List::MoreUtils qw/indexes/;
use DateTime;

use Getopt::Long 2.37 qw/GetOptions GetOptionsFromArray/;
use Pod::Usage qw/pod2usage/;

use Spreadsheet::Edit 1000.006 qw(title2ident);
use Spreadsheet::Edit::IO qw/convert_spreadsheet
             sheetname_from_spec filepath_from_spec form_spec_with_sheetname/;
use Spreadsheet::Edit::Log qw/oops btw btwN/;

use ODF::lpOD;
use ODF::lpOD_Helper qw/:DEFAULT arraytostring/;
use ODF::MailMerge qw/replace_tokens MM_SUBST/;

sub _eval_and_preen_exception(&) {
  # Run some code which may fail because of a user error (file not found etc.)
  # Warnings are suppressed, but $@ is set if an exception occurred,
  # with " at /path/to/our/code.pm line xxx" stripped from $@ so it
  # is suitable to be shown to the user.
  state $FILE = __FILE__;
  local $SIG{__WARN__} = sub {};
  my ($r, @r);
  if (wantarray) { @r = eval{ $_[0]->() } } else { $r = eval{ $_[0]->() } };
  if ($@) {
    $@ =~ s/\s* at \S*\Q${FILE}\E line.*//; # strip reference to our code
  }
  wantarray ? @r : $r
}

sub _bracketize($) {
  local $_ = shift;
  return undef unless defined;
  /\A\{.*\}\z/s ? $_ : "{${_}}"
}
sub _debracketize($) {
  local $_ = shift;
  return undef unless defined;
  /\A\{(.*)\}\z/s or confess dvis 'no brackets in $_';
  $1
}

my $exitstatus;

my ($debug, $verbose);
my %sheets; # path => opened sheet
my $doc;
my $body;

sub run_command($@) {
  my ($sourceinfo, $op, @args) = @_;
  my sub mydie { die "(${sourceinfo}) ",qsh($op)," : ", @_,"\n" }
  my sub compile_if_regex($) {
    local $_ = shift;
    if (m#\A(/.*/[a-z]*)\z#a) {
      my $re = eval "qr".$1;  #  "qr/blahblah/msix"
      mydie ivis 'Invalid regex $_ : ',$@ if $@;
      die "bug ",vis(ref $re) unless ref($re) eq "Regexp";
      return $re;
    }
    $_
  }
  my sub eval_if_styled_content($) {
    local $_ = shift;
    if (/^\[\[.*\],\s*["'].*\]$/) { # looks like [Styled content] Perl expr
      my $c = eval $_;
      mydie ivis 'Value $_ looks like a [Styled content] expression but something is wrong:\n',$@ if $@;
      return $c;
    }
    $_
  }
  warn "> ", qshlist($op, @args),"\n" if $verbose;
  if ($op eq 'print') {
    local $^O = 'linux';  # Force quoting for /bin/sh, the style used by
                          # our command scripts on all platforms.
    say qshlist(@args);
  }
  elsif ($op eq 'skeleton') {
    my $skelpath = $args[0] // mydie "No path specified";
    @args == 1 or mydie ivis 'Extraneous argument: @args[1..$#args]';
    warn "Opening ",qsh($skelpath),"\n" if $verbose;
    _eval_and_preen_exception {
      # Not certain whether this dies on error or just warns and returns undef
      $doc = odf_new_document_from_template($skelpath) // mydie $!;
    };
    mydie $@ if $@;
    $body = $doc->get_body;
  }
  elsif ($op eq 'save') {
    my $opt_force;
    GetOptionsFromArray(\@args, 'f' => \$opt_force) or mydie "invalid argument";
    mydie "No desitnation specified" unless @args;
    @args == 1 or mydie ivis 'Extraneous argument: @args[1..$#args]';
    my $dest = path($args[0]);
    if ($opt_force) {
      if ($dest->exists && ! -w $dest->canonpath) {
        warn "> Attempting to give write permission to read-only $dest\n";
        $dest->chmod("u+w");
      }
    } else {
      mydie "$args[0] ALREADY EXISTS (use -f to force)"
        if $dest->exists;
    }
    $doc->save(target => $args[0]);
  }
  elsif ($op eq '_eval_perlcode') {
    # Undocumented, used in tests
    eval join(" ",@args);
    mydie ivis "eval failed: $@" if $@;
  }
  elsif ($op eq 'subst-value') {
    mydie "Odd number arguments.  TOKEN VALUE pairs are expcted"
      unless (scalar(@args) % 2) == 0;
    mydie "A skeleton must be read first" unless $body;
    my %hash;
    while (@args) {
      my $key = compile_if_regex(_debracketize(_bracketize(shift @args)));
      my $value = eval_if_styled_content(shift @args);
      $hash{$key} = $value;
    }
    warn dvis '  %hash\n' if $debug;
    my $count = replace_tokens($body, \%hash);
    warn "  $count token(s) replaced\n" if $debug;
  }
  elsif ($op eq 'mail-merge') {
    my $proto_tag = _bracketize(shift @args) // mydie "missing arguments";
    mydie "A skeleton must be read first" unless $body;
    my $engine = ODF::MailMerge::Engine->new(context => $body,
                                             proto_tag => $proto_tag);
    # Parse the remaining arguments, which are any number of sets of
    #    -a aliasname=colspec ... -k COLUMN ... SPREADSHEET
    # (COLUMNs only apply to secondary spreadsheets)
    # Each spreadsheet is opened, aliases applied, and any COLUMNs verified.
    my @sheets; #  [sheet, \%key2rx, [colspeclist]]
    { Getopt::Long::Configure ("default", "permute");
      my (@colspecs, %aliases);
      GetOptionsFromArray( \@args,
        'k=s' => \@colspecs,
        'a=s' => \%aliases,
        '<>'  => sub {
          my $ss = $_[0];
          my $sheet = Spreadsheet::Edit->new();
          (_eval_and_preen_exception {$sheet->read_spreadsheet($ss)}) // mydie $@;
          while (my ($key,$val) = each %aliases) {
            $sheet->alias($key => compile_if_regex($val));
          }
          btw dvis '$ss %aliases $sheet->colx()' if $debug;
          push @sheets, [$sheet];
          if (@colspecs) {
            mydie "-k COLSPEC args are only applicable to secondary spreadsheets"
              if @sheets == 1;
            foreach (@colspecs) { $_ = compile_if_regex($_) }
            my @cxlist = map{ (_eval_and_preen_exception { $sheet->spectocx($_) })
                                // mydie ivis '$_ : ',$@ } @colspecs;
            # Build index of [seconday's column values] => rx
            my %key2rx;
            $sheet->apply(sub{
              my $rx = $sheet->rx;
              my $row = $sheet->[$rx];
              my @values = map{ $row->[$_] } @cxlist;
              my $key = arraytostring(\@values);
              mydie 'Key(s) ',avis(@colspecs),
                    " do not uniquely identify a record in ",
                    $sheet->data_source(),"\n  (same value(s) in row ",
                    ($rx+1)," and ", ($key2rx{$key}+1),")"
                if exists $key2rx{$key};
              $key2rx{$key} = $rx;
            });
            push @{ $sheets[-1] }, \%key2rx, [@colspecs];
          } else {
            mydie '-k COLSPEC arg(s) must be specified for ',
                  qsh($sheet->data_source()) if @sheets > 1;
          }
          @colspecs = ();
          %aliases = ();
        },
      ) // mydie "Unknown option";
      mydie "No spreadsheet specified" unless @sheets;
      mydie "-a options must preceed the applicable SPREADSHEET" if keys %aliases;
      mydie "-k options must preceed the applicable SPREADSHEET" if @colspecs;
    }

    btw dvis '###AAA @sheets' if $debug;

    # Execute the mail-merge
    my $mainsheet = $sheets[0][0];
    my $maincolx = $mainsheet->colx();
    $mainsheet->apply(sub{
      $engine->add_record({ '*' => sub{
        # wildcard callback
        my ($tokname, $token) = @_;
        my @rowstack = ( $mainsheet->crow() ); # current row in apply
        for (my $i = 1 ; ; $i++) {
          my $row = $rowstack[-1];
          if (exists $row->{$tokname}) {
            return(MM_SUBST,
                   eval_if_styled_content($row->{$tokname} // oops));
          }
          mydie ivis 'Spreadsheet column $tokname does not exist'
            if $i > $#sheets;
          # Locate a record in the next secondary spreadsheet
          my ($sh2, $key2cx, $colspecs) = @{ $sheets[$i] };
          my @primary_values = map{ $rowstack[-1]->[$sh2->spectocx($_)] }
                                  @$colspecs;
          my $key = arraytostring(\@primary_values);
          my $sec_rx = $key2cx->{$key}
            // mydie "No record in secondary has ",
                     avis(map{ $_->[1] } @$colspecs), " with values ",
                     avis(@primary_values);
          push @rowstack, $sheets[$i][0]->[$sec_rx];
        }
      }#'*' wildcard callback
      }); #calling engine->add_record
    });#mainsheet apply
    $engine->finish;
  }
  else {
    mydie 'Unknown command ';
  }
}

sub run_script($$) {
  my ($fh, $sourcename) = @_;
  my sub mydie { die "(${sourcename} line $.) ", @_,"\n" }

  while (<$fh>) {
    while (s/\\\n\z//s) {
      mydie "EOF following \\<newline>" if eof($fh);
      $_ .= <$fh>;
    }
    chomp;
btw dvis 'LINE $_' if $debug;
    # Split ;-seprated commands and #tail comments
    #   N.B. parse_line generates undef if string contains only delimiters!
    #   https://rt.cpan.org/Public/Bug/Display.html?id=50753
    # This preserves all quotes, backslashes and delimiters but
    # isolates delimiters into separate words -- in this case including # and ;
    my @parts = grep{defined} parse_line(qr/(\s+|[\#;])/, "delimiters", $_);
    if ((my $ix = firstidx{ $_ eq '#' } @parts) >= 0) {
btw dvis 'CHOPPING OFF #COMMENT at $ix in @parts' if $debug;
      splice @parts, $ix;
    }
btw dvis '@parts' if $debug;
    my @cmds;
    while ((my $len = firstidx{ $_ eq ';' } @parts) >= 0) {
      push @cmds, [ @parts[0..$len-1] ];
      splice @parts, 0, $len+1;
    }
    push @cmds, \@parts;
    foreach (@cmds) {
      # Re-parse to eliminate quotes, backslashes, etc. to get final data words
      my $cmdstr = join("", @$_);
      my @words = grep{defined} shellwords($cmdstr);
      if (@words) {
        #####################
        run_command("$sourcename line $.", @words)
        #####################
      } else {
        mydie ivis 'Syntax error in $cmdstr\n(probably incorrect quoting)'
          if $cmdstr =~ /\S/;
      }
    }
  }
}

##################################################################
#  MAIN BODY
##################################################################

### PARSE OPTIONS ###
sub badargs_exit(@) { pod2usage(-output => \*STDERR, -exitval => 2, @_) }

my $script;
Getopt::Long::Configure ("default", "gnu_getopt", "auto_version");
decode_argv(Encode::FB_CROAK); # @ARGV now contains *characters*
GetOptions(
   "d|debug"                   => \$debug,
   "v|verbose"                 => \$verbose,
   "h|help"                    => sub{
      pod2usage(-verbose => 2, -output => \*STDOUT, -exitval => 0)
   },
   "e=s"                       => sub{ ($script//="") .= $_[1]."\n"; },
) or badargs_exit(-msg => "Invalid argument");
if ($debug) {
  $verbose = 1;
  $Carp::Verbose = 1;
}
if (@ARGV==0 && !defined $script) {
  if (-t STDIN) {
    badargs_exit(-msg => "No script specified!");
  }
}

### EXECUTE THE SCRIPT(S) ###
if (!defined($script) && @ARGV==0) {
  run_script(*STDIN, "<stdin>");
} else {
  if (defined $script) {
    btw dvis '### $script' if $debug;
    # $script contains decoded characters due to decode_argv() call above;
    # un-do this so the script can be read as a file
    $script = encode("utf8", $script);
    open my $fh, "<", \$script or oops $!;
    binmode($fh, ":utf8") or oops $!;
    run_script($fh, "-e");
  }
  foreach (@ARGV) {
    my $encoded_path = encode("locale_fs", $_, FB_CROAK|LEAVE_SRC);
    open my $fh, "<", $encoded_path or die "$encoded_path : $!\n";
    run_script($fh, $_);
  }
}

### EXIT ###
$exitstatus //= 0;
warn "> Exiting with status $exitstatus\n" if $debug;
exit $exitstatus;

__END__

=encoding utf8

=head1 NAME

odfedit - replace tokens and do mail-merge in an Open Document file

=head1 SYNOPSIS

  odfedit [-v] <scriptfile
  odfedit [-v] [-e 'script'] scriptfiles...

Run C<odfedit --help> for details.

=head1 DESCRIPTION

This command-line tool allows L<ODF::MailMerge> to to be used in simple
ways without writing Perl code.

Place-holder tokens of the form "{key}" are replaced with real content,
either individually or as part of a "mail merge" function where
records from a spreadsheet are used to instantiate multiple copies of
a table.

A command script may be specified with B<-e> option(s) and/or
read from B<scriptfile>(s),
or if neither of those are given, read from I<stdin>.

=head1 SCRIPT SYNTAX

Scripts are parsed similarly to Unix shell scripts:

=over 2

#comments and blank lines are ignored

Command names and arguments are delimited by white space.
"double quotes", 'single quotes' or \-escapes may be used
for arguments with embedded spaces or special characters.

Commands must be on a single logical line. Use \<newline> to fold
long lines and semicolon (;) to separate commands on the same line.

=back

=head1 DEFINITIONS

Any B<TOKEN> argument has brackets added to form "{TOKEN}"
unless the argument already contains the { and }.
Literal { } or : characters may be used with \-escapes
(such arguments should be 'single quoted' in the script).

B<COLSPEC> means a spreadsheet column specifier: It may be an actual
title, an identifier derived from a title by replacing offending characters
with underscores, an absolute column letter ("A", "B" etc.)
or a regular expression written as "/.../" which matches one title.

B<SPREADSHEET> means the path of
a .csv file or a spreadsheet (.ods, .xlsx, etc.)
If a multi-sheet spreadsheet workbook is used, SPREADSHEET must be of
the form "PATH!SHEETNAME" to specify which sheet to use.
Spreadsheets may be used only if a modern version of Libre Office is
installed; and, due to an LO bug, LO may not be open interactively even
for unrelated purposes.   .csv files may always be used without issue.

=head1 COMMANDS

In general, scripts should first specify a I<skeleton> .odt file,
then replace {tokens} and/or perform mail-merge operations,
and finally I<save> to a different filename.

=head2 skeleton I<INPUTPATH>  # .odt, .ods etc. file

Read a document into memory.

It should contain static content and "{token}" strings
(including the curly brackets) where content should be substituted.

=head2 save [-f] I<OUTPUTPATH>  # .odt etc.

Write the document to the specified path, which must not already exist
unless the B<-f> (force) option is specified.

=head2 subst-value I<TOKEN> I<VALUE> ...

Replace "{TOKEN}" with VALUE wherever it appears.

The result will have the same formatting as {TOKEN} in the skeleton
(specifically, the format of the '{' character).

However if VALUE looks like a "[Styled content]" specifier as described in
L<ODF::MailMerge> then it is evaluated as such and may
specify format overrides.

=head2 mail-merge I<PROTO-TOKEN> I<SPREADSHEET>

=head2 mail-merge I<PROTO-TOKEN> B<-a> I<aliasname=COLSPEC> ... <SPREADSHEET>

First, a prototype Table is located which contains "{PROTO-TOKEN}"
somewhere within it.  The "{PROTO-TOKEN}" string is used only to
locate the table in the skeleton and is immediately deleted so it will not
affect the final result.

Then the prototype table is replicated once for each record in SPREADSHEET.
Within each replicate, "{token}" fields are replaced with values
from the corresponding spreadsheet record, using "token" (without the
brackets) as a COLSPEC to identify which column value to use from the record.

=over

If B<-a> option(s) are specified, then each I<aliasname> becomes an
alternate COLSPEC for the indicated column.
For example the prototype table may contain "{aliasname}" instaed
of "{The Actual Column Title}".
I<aliasname> must be an identifier, i.e. must start with a letter and contain
only letters, digits, and underscores.

=back

An error occurs if a "token" does not indicate any column in the spreadsheet.

=head2 mail-merge I<PROTO-TOKEN> I<SPREADSHEET> B<-k> I<COLSPEC> ... I<SPREADSHEET2>

(Although not shown, each SPREADSHEET* arg may also be preceeded
by B<-a> options to define column aliases.)

In this form, some {token} values may be found in a specific record from a
secondary spreadsheet; that record is selected using the values in
the primary sheet's record of columns indicated by the I<COLSPEC>s.

For example, to produce a company directory of department contacts for
Payroll, Personnel, etc., the prototype table might contain tokens
{Dept}, {Name}, {Email} and {Phone}.

However the primary spreadsheet could contain only "Dept" and "Name" columns,
with "Email" and "Phone" values looked up in a secondary spreadsheet
using the value of "Name" as the key.  Here is how it works:

  For each record (i.e. row) in the primary spreadsheet:
    For each {token} in the prototype table:
      If "token" indicates a column in the current record:
        Substitute the value
      Else:
        Locate the record in the secondary spreadsheet which has values
        in the indicated COLSPECs ("Name" in the example) which match the
        corresponding values in the primary record.  An error occurs unless
        exactly one matching record is found.

        If "token" indicates a column in the selected secondary record
        then substitute the value, otherwise fail with an error.

Note that B<-a> options may be used to alias uniquely-named columns
so that the I<COLSPEC>s given with B<-k> options will work to match
the appropriate column in either spreadsheet.

Tertiary, etc. spreadsheets may be specified in additional command arguments;
if "token" is not found in a secondary record, then the record in the
next spreadsheet is selected which has values in columns indicated
by (the next spreadsheet's) -k COLSPEC arguments which match those in
the current secondary spreadsheet or, if not present, a predecessor spreadsheet.

=head2 print [I<argument>s...]

Print the arguments to stdout, separated by spaces, quoting arguments
which contain spaces or special characters.

=head1 SEE ALSO

It may be helpful to refer to the following to understand how all this works:

L<ODF::MailMerge>

L<Spreadsheet::Edit>

=head1 AUTHOR

Jim Avera (jim.avera at gmail)

=head1 LICENSE

CC0 1.0 / Public Domain.   However this requires ODF::lpOD to function so
as a practical matter you must comply with ODF::lpOD's license.

=cut
