#!/usr/bin/env perl
# License: Public Domain or CC0
# See https://creativecommons.org/publicdomain/zero/1.0/
# The author, Jim Avera (jim.avera at gmail) has waived all copyright and
# related or neighboring rights.  Attribution is requested but is not required.
use strict; use warnings FATAL => 'all'; use 5.010;

our $VERSION = '1.007'; # VERSION
our $DATE = '2023-05-30'; # DATE

######################################################################
# POD HAS BEEN MOVED TO THE STUB LIBRARY App::diff_spreadsheets.pm
######################################################################

use strict; use warnings;
use feature qw(switch state say lexical_subs fc);
no warnings "experimental::lexical_subs";

use Carp; $Carp::MaxArgNums = 0;
use Data::Dumper::Interp;
use File::Basename qw(basename dirname fileparse);
use File::Path qw(make_path remove_tree);
use File::Temp qw(tempfile tempdir);
use File::Copy ();
use File::Spec::Functions qw(canonpath catfile catdir splitpath splitdir rootdir tmpdir);
use FindBin qw/$Bin $Script/;
use Getopt::Long qw/GetOptions/;
use Guard qw(guard scope_guard);
use List::Util qw/min max any first/;
use List::MoreUtils qw/indexes/;
use Scalar::Util qw/openhandle/;
use Pod::Find qw/pod_where/;
use Pod::Usage qw/pod2usage/;
use Spreadsheet::Edit qw(title2ident);
use Spreadsheet::Edit::IO qw/convert_spreadsheet
             sheetname_from_spec filepath_from_spec form_spec_with_sheetname/;
use Term::ReadKey ();
sub oops(@) { unshift @_, "oops "; require Carp; goto &Carp::confess; }
#$SIG{__WARN__} = sub{ Carp::cluck @_ };

sub main::Differ::compile_if_regex(@); #forward

# Replace invalid/undesirable filename characters with underscore
sub sanitize_filename(_) { local $_ = shift; s/[^-._[:word:]]/_/g; s/_$//; $_ }

use utf8;
use open ':std', ':locale';
select STDERR; $| = 1; select STDOUT; $| = 1;

use Encode qw/decode/;
require PerlIO;
my ($stdout_encoding) = map{ /encoding\((.+)\)/ ? ($1) : () }
                        PerlIO::get_layers(*STDOUT);
sub decoded_readlink(_) {
  my $r = readlink(shift);
  defined($r) && defined($stdout_encoding)
    ? decode($stdout_encoding, $r) : $r
}

#-------- Get Arguments ---------------
sub call_pod2usage {
  confess "bug" if (scalar(@_) % 2) != 0;
  my %opts = @_;
  if (! $opts{-msg}) {
    if (my $podpath = pod_where({-inc => 1},"App::diff_spreadsheets")) {
      $opts{-input} = $podpath;
    } else {
      warn "Could not find App::diff_spreadsheets in \@INC\n",
           "\@INC:\n", join("\n   ",@INC), "\n";
    }
  }
  pod2usage(\%opts);
}

=head1 SYNOPSYS

Documentation is not here in the executable script!

If you are seeing this, most likely you ran `perldoc diff_spreadsheets`
or something equivalent.
To see the documentation, please run

=over

B<diff_spreadsheets -h>

=back

or

=over

B<perldoc [options] App::diff_spreadsheets>

=back

=cut

sub badargs_exit(@) {
  call_pod2usage(-output => \*STDERR, -exitval => 2, @_);
}

# We could use Term::Encoding to detect the terminal's encoding
# but that would create a possibly-undesirable dependency.
# We just assume UTF-8, which these days is probably correct.
#my $rightarrow = '->';
#my $rightarrow = "\N{RIGHTWARDS ARROW}\N{THIN SPACE}";
#my $rightarrow = "\N{RIGHTWARDS ARROW}\N{NARROW NO-BREAK SPACE}";
my $rightarrow = "\N{RIGHTWARDS ARROW}\N{HAIR SPACE}";

# 0 or undef to not wrap long lines
my $maxwidth = do{
  u($ENV{COLUMNS}) =~ /^[1-9]\d*$/
  ? # Overrides actual terminal width; needed for testing
    $ENV{COLUMNS}
  : do{
      my $wmsg = ""; # Suppress hard-coded "didn't work" from Term::ReadKey
      local $SIG{'__WARN__'} = sub { $wmsg .= $_[0] };
      my ($width, $height) = Term::ReadKey::GetTerminalSize(*STDOUT);
      warn $wmsg if $wmsg && $wmsg !~ /did.*n.*work/i;
      $width // 80
    }
};

my %opts = (
  sort_rows       => 1,
  quote_char      => '"',
  sep_char        => ',',
  encoding        => 'UTF-8',
  trunc_title_width => $maxwidth > 60 ? int($maxwidth/3) : 20,
);
my $method = "native";
my @diff_opts;
my $help;

badargs_exit(-msg => "Usage: $Script file1 file2\n   -h for help") if @ARGV==0;
Getopt::Long::Configure ("default", "gnu_getopt", "auto_version");
GetOptions(
   "always-show-columns=s"     => sub{ push @{$opts{always_show_columns}}, $_[1] },
   "a|text"                    => sub{ push @diff_opts, "-$_[0]"; },
   "B|ignore-blank-lines"      => sub{ push @diff_opts, "-$_[0]"; },
   "b|ignore-space-change"     => sub{ push @diff_opts, "-$_[0]"; },

   # --color is valid only for 'git diff'
   "color=s"                   => sub{ push @diff_opts, "-$_[0]=$_[1]"; },

   "columns=s"                 => sub{ push @{$opts{columns}}, $_[1] },
   "c|C|context=i"             => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "debug"                     => sub{ $opts{debug} = $_[1] },
   "D|ifdef=s"                 => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "d|minimal"                 => sub{ push @diff_opts, "-$_[0]"; },
   "encoding=s"                => sub{ $opts{encoding} = $_[1] },
   "e|ed"                      => sub{ push @diff_opts, "-$_[0]"; },
   "E|ignore-tab-expansion"    => sub{ push @diff_opts, "-$_[0]"; },
   "first-data-row=i"          => sub{ $opts{first_data_row} = $_[1] },
   "F|show-function-line=s"    => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "GTYPE-group-format=s"      => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "hash-func=s"               => sub{ $opts{hash_func} = $_[1] },
   "hashid-func=s"             => sub{ $opts{hashid_func} = $_[1] },
   "horizon-lines=s"           => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "h|help"                    => \$help,
   "id-columns=s"              => sub{ push @{$opts{id_columns}}, $_[1] },
   "i|ignore-case"             => sub{ push @diff_opts, "-$_[0]"; },
   "I|ignore-matching-lines=s" => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "keep-temps!"               => sub{ $opts{keep_temps} = $_[1] },
   "label=s"                   => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "line-format=s"             => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "LTYPE-group-format=s"      => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "l|paginate"                => sub{ push @diff_opts, "-$_[0]"; },
   "m|method=s"                => \$method,
   "n|rcs"                     => sub{ push @diff_opts, "-$_[0]"; },
   "p|show-c-function"         => sub{ push @diff_opts, "-$_[0]"; },
   "quiet"                     => sub{ $opts{quiet} = $_[1] },
   "quote-char=s"              => sub{ $opts{quote_char} = $_[1] },
   "q|brief"                   => sub{ push @diff_opts, "-$_[0]"; },
   "sep-char=s"                => sub{ $opts{sep_char} = $_[1] },
   "setup-code=s"              => sub{ $opts{setup_code} = $_[1] },
   "sheets=s"                  => sub{ push @{$opts{sheet_names}}, $_[1] },
   "show-empties"              => sub{ $opts{show_empties} = $_[1] },
   "silent"                    => sub{ $opts{silent} = $_[1] },
   "sort-rows!"                => sub{ $opts{sort_rows} = $_[1] }, # allow --no-sort
   "speed-large-files"         => sub{ push @diff_opts, "-$_[0]"; },
   "strip-trailing-cr"         => sub{ push @diff_opts, "-$_[0]"; },
   "suppress-common-lines"     => sub{ $opts{suppress_common_lines} = 1; push @diff_opts, "-$_[0]"; },
   "s|report-identical-files"  => sub{ push @diff_opts, "-$_[0]"; },
   "tabsize=i"                 => sub{ push @diff_opts, "-$_[0]", $_[1]; },
   "title-row=i"               => sub{ $opts{title_row} = $_[1] },
   "t|expand-tabs"             => sub{ push @diff_opts, "-$_[0]"; },
   "T|initial-tab"             => sub{ push @diff_opts, "-$_[0]"; },
   "u|U|unified:i"             => sub{ push @diff_opts, "-$_[0]", $_[1]?($_[1]):(); },
   "v|verbose"                 => sub{ $opts{verbose} = $_[1] },

   # valid only for 'git diff':
   "word-diff-regex=s"         => sub{ push @diff_opts, "-$_[0]=$_[1]"; },
   "word-diff=s"               => sub{ push @diff_opts, "-$_[0]=$_[1]"; },
   "ws-error-highlight=s"      => sub{ push @diff_opts, "-$_[0]=$_[1]"; },

   "w|ignore-all-space"        => sub{ push @diff_opts, "-$_[0]"; },
   "W|width=i"                 => sub{ $maxwidth = $_[1]; push @diff_opts, "-$_[0]", $_[1]; },
   "y|side-by-side"            => sub{ push @diff_opts, "-$_[0]"; },
   "Z|ignore-trailing-space"   => sub{ $opts{ign_trailing_spaces} = 1; push @diff_opts, "-$_[0]"; },
   # Options valid only for 'git diff':
) or badargs_exit(-msg => "$Script -h for help");

call_pod2usage(-verbose => 2, -output => \*STDOUT) if $help;

foreach (@diff_opts) { s/^-(\w\w+)/--$1/ }  # change -longopt to --longopt

foreach my $key (qw/columns id_columns always_show_columns sheet_names/) {
  next unless $opts{$key};
  @{$opts{$key}} = map{ split/(?<!\\),/ } @{$opts{$key}}
}
$opts{verbose} //= $opts{debug};


our ($hash_func_code, $hashid_func_code, $setup_code_code);
foreach my $argname (qw(hashid-func hash-func setup-code)) {
  (my $optskey = $argname) =~ s/-/_/g;
  next unless defined $opts{$optskey};
  my $source = "package Usercode;" . $opts{$optskey};
  no strict 'refs';
  ${"${optskey}_code"} = eval $source;
  die "Syntax error in Perl code for --$argname option:\n$@" if $@;
  die "--$argname did not produce a sub ref (got ",u(${"${optskey}_code"}),"\n"
    unless ref(${"${optskey}_code"}) eq "CODE";
}
$hashid_func_code //= sub{ Carp::cluck "bug" if grep{! defined} @{$_[0]}; join ",", @{$_[0]} };
$hash_func_code   //= sub{ Carp::cluck "bug" if grep{! defined} @{$_[0]}; join ",", @{$_[0]} };
$setup_code_code  //= sub{};

badargs_exit(-msg => "Two files must be specified") if @ARGV != 2;
$opts{origpath1} = $ARGV[0];
$opts{origpath2} = $ARGV[1];

if ($opts{keep_temps}) {
  my $dir = File::Spec->catfile(tmpdir(), "dstmp");  # /tmp/dstmp
  remove_tree($dir);
  mkdir $dir or die "mkdir $dir : $!";
  warn "> Created ",qsh($dir),"\n";
  $opts{tempdir} = $dir;
} else {
  $opts{tempdir} = tempdir("diffspread_XXXXX", DIR=>tmpdir(), CLEANUP=>1);
}

# Extract the possibly-multiple "sheets" from each spreadsheet into
# separate .csv files and process the corresponding pairs.
# If a specific sheet was specified, then only that sheet will be extracted.
foreach my $N (1,2) {
  my $origpath = $opts{"origpath$N"};
  my $sheet_from_path = sheetname_from_spec($origpath);
  if (defined $sheet_from_path) {
    die "--sheets argument not allowed because filename specifies a sheet:\n",
        "  ", qsh($origpath), "\n"
      if $opts{sheet_names};
  }
  #my $dir = File::Spec->catdir($opts{tempdir},title2ident($origpath));
  my $dir = File::Spec->catdir($opts{tempdir},"«INFILE$N»");
  remove_tree $dir; make_path $dir || die;
  my $h = convert_spreadsheet($origpath, cvt_to=>"csv",
                                         allsheets => !$sheet_from_path,
                                         outpath => $dir,
                                         debug => $opts{debug},
                             );
  $opts{cvt_from} = $h->{cvt_from};
  if ($h->{cvt_from} =~ /csv/i && !openhandle($origpath)) {
    # Input was already a CSV (possibly detected by peeking at actual content).
    # The "converted" file has a bogus name; use the original path instead.
    $opts{"csvpaths$N"} = [ $origpath ];
  } else {
    opendir(my $dh, $dir) or die "$dir : $!\n", dvis('$h');
    my @csvpaths = sort map{ File::Spec->catdir($dir,$_) }
                        File::Spec->no_upwards(readdir $dh) ;
    if (0 and @csvpaths == 1) { ## FIXME WHY IS THIS DISABLED ???
                                ##Answer: Because we want to see the sheetname even if user did not specify it
      # Only one sheet in this file: Rename the csv to just show the
      # spreadsheet file name without a redundant [sheetname] suffix.
      my $new_tmpfname = File::Spec->catdir($dir, basename($origpath)).".csv";
      rename $csvpaths[0], $new_tmpfname or die;
      $opts{"csvpaths$N"} = [ $new_tmpfname ];
    } else {
      $opts{"csvpaths$N"} = \@csvpaths;
    }
  }
  my %sheet2csvpath;  # sheetname => csvpath
  foreach (@{ $opts{"csvpaths$N"} }) {
    my ($sheetname,undef,undef) = fileparse($_, qr/\.csv/i);
    die if exists $sheet2csvpath{$sheetname};
    $sheet2csvpath{$sheetname} = $_;
  };
  $opts{"sheet2csvpath$N"} = \%sheet2csvpath;
}

my $onlyone = @{ $opts{"csvpaths1"} } == 1 && @{ $opts{"csvpaths2"} } == 1;

# If --sheets arg(s) were given, diff only pairs of so-named sheets.
# Otherwise if each file contains only one sheet then diff them regardless
# of their names, but with a multi-sheet file diff all (and only) pairs of
# same-named sheets.

my $status = 0;
$opts{sheetname_header} = "";
my @pairs;  # { n1 n2 label1 label2 sheetname_header }
if (@{$opts{sheet_names}//[]}) {
  # Sheet names were specified; corresponding sheets might have different
  # names if the user specified a regex.
  foreach (@{$opts{sheet_names}}) {
    my $spec = main::Differ::compile_if_regex($_);
    if (ref($spec)) {
      my @names1 = grep{ /$spec/s } keys %{ $opts{sheet2csvpath1} };
      die "$spec matches no sheet name in $opts{origpath1}\n"
        unless @names1 > 0;
      die "'$spec' matches multiple sheet names in $opts{origpath1}\n"
        if @names1 > 1;
      my @names2 = grep{ /$spec/s } keys %{ $opts{sheet2csvpath2} };
      die "$spec matches no sheet name in $opts{origpath2}\n"
        unless @names2 > 0;
      die "'$spec' matches multiple sheet names in $opts{origpath2}\n"
        if @names2 > 1;
      push @pairs, { n1 => $names1[0], n2 => $names2[0],
                     sheetname_header => $names1[0] eq $names2[0]
                                 ? "*** sheet ".qsh($names1[0])." ***\n"
                                 : "*** sheet ".qsh($names1[0])."/".qsh($names2[0])." ***\n"
                   };
    } else {
      die "sheet '$spec' does not exist in $opts{origpath1}\n"
        unless $opts{sheet2csvpath1}{$spec};
      die "sheet '$spec' does not exist in $opts{origpath2}\n"
        unless $opts{sheet2csvpath2}{$spec};
      push @pairs, { n1 => $spec, n2 => $spec,
                     sheetname_header => "*** sheet ".qsh($spec)." ***\n" };
    }
  }
}
elsif ($onlyone) {
  # User did not specity a sheet name, but there is only one in each file.
  # Compare them regardless of their names.
  my ($name1, $bug1) = keys %{ $opts{sheet2csvpath1} }; die "bug1" if $bug1;
  $opts{path1} = $opts{sheet2csvpath1}{$name1};
  my ($name2, $bug2) = keys %{ $opts{sheet2csvpath2} }; die "bug2" if $bug2;
  # set the displayed names to just the spreadsheet paths sans sheetnames.
  push @pairs, { n1 => $name1, n2 => $name2,
                 label1 => $opts{origpath1}, label2 => $opts{origpath2} };
}
else {
  # User did not specity which sheets, but there are several
  foreach my $name (sort keys %{ $opts{sheet2csvpath1} }) {
    unless ($opts{sheet2csvpath2}{$name}) {
      say "*** sheet '$name' exists ONLY in ",qsh($opts{origpath1}),"\n";
      $status = max($status, 2);
      next;
    }
    push @pairs, { n1 => $name, n2 => $name,
                   sheetname_header => "*** ".qsh($name)." ***\n" };
  }
  foreach my $name (sort keys %{ $opts{sheet2csvpath2} }) {
    unless ($opts{sheet2csvpath1}{$name}) {
      say "*** sheet '$name' exists ONLY in ",qsh($opts{origpath2}),"\n";
      $status = max($status, 2);
      next;
    }
  }
}

foreach my $h (@pairs) {
  my ($name1, $name2, $label1, $label2, $sheetname_header)
              = @$h{qw/n1 n2 label1 label2 sheetname_header/};
  # If no labels provided, default to "/path/to/file.xls[sheetname]"
  $label1 //= form_spec_with_sheetname($opts{origpath1}, $name1);
  $label2 //= form_spec_with_sheetname($opts{origpath2}, $name2);

  $opts{path1} = $opts{sheet2csvpath1}{$name1} // die;
  $opts{path2} = $opts{sheet2csvpath2}{$name2} // die;
  $opts{label1} = $label1;
  $opts{label2} = $label2;
  $opts{sheetname_header} = $sheetname_header // "";
  $status = max($status, &process_pair);
}

exit $status;

sub process_pair {
  # N.B. %opts includes "origpathN" and "pathN" (the latter refers to a csv),
  # "sheetnameN", and "labelN".
  # "sheetname_header" is "" or "title...\n" to print before the first diff.
  # ---- Read data into memory, delete ignored columns, etc. ----
  my $obj = main::Differ->new( %opts );

  # Set data_source() to a unique human-readable representation of each
  # sheet.   This is the caller-specified $opts{nameN}, typically
  # "/path/to/file.xls[sheetname]" or just "/path/to/file",
  # modified to omit the common ancestor directory parts.
  my @splits;
  for my $N (1, 2) {
    push @splits, [ splitpath($obj->{"label$N"} // oops) ]; # [volume,dirs,fname]
  }
  if ($splits[0]->[0] eq $splits[1]->[0]) { # same volume
    $splits[0]->[0] = $splits[1]->[0] = "";
    my @dirs1 = File::Spec->splitdir($splits[0]->[1]);
    my @dirs2 = File::Spec->splitdir($splits[1]->[1]);
    while (@dirs1 && @dirs2 && $dirs1[0] eq $dirs2[0]) {
      shift @dirs1; shift @dirs2;
    }
    $splits[0]->[1] = catdir(@dirs1);
    $splits[1]->[1] = catdir(@dirs2);
  }
  for my $N (1, 2) {
    my $i = $N - 1;
    $obj->{"sheet$N"}->data_source(
           catdir($splits[$i]->[0], $splits[$i]->[1], $splits[$i]->[2]) );
  }

  # Preliminary raw comparison
  PRECHECK: {
    my $rows1 = $obj->{sheet1}->rows();
    my $rows2 = $obj->{sheet2}->rows();
    my $num_cols = $obj->{sheet1}->num_cols();
    last PRECHECK unless $obj->{sheet2}->num_cols() == $num_cols
                      && @$rows1 == @$rows2;
    for my $rx (0..$#$rows1) {
      my $r1 = $rows1->[$rx];
      my $r2 = $rows2->[$rx];
      last PRECHECK if first { $r1->[$_] ne $r2->[$_] } 0..($num_cols-1)
    }
    warn "> Paired sheets contain identical data, skipping fancier algos\n"
      if $opts{debug};
    return 0
  }

  if ($method ne "native") {
    foreach my $key (qw/id_columns always_show_columns hashid_func hash_func/) {
      if ($opts{$key}) {
        (my $optname = $key) =~ s/_/-/g;
        die "--${optname} option applies only to the 'native' method\n"
      }
    }
  }
  if ($method eq "diff") {
    $obj->compare_using_diff(['diff', '-u', @diff_opts]);
    return $obj->exit_status;
  }
  if ($method eq "tkdiff") {
    $obj->compare_using_diff([$method, @diff_opts]);
    return $obj->exit_status;
  }
  elsif ($method eq "git") {
    $obj->compare_using_diff([qw/git diff --no-index --color-words/, @diff_opts]);
    return $obj->exit_status;
  }
  elsif ($method eq "gitchars") {
    $obj->compare_using_diff([qw/git diff --no-index --word-diff=color --word-diff-regex=./, @diff_opts]);
    return $obj->exit_status;
  }
  elsif ($method eq "native") {
    $obj->compare_native();
    return $obj->exit_status;
  }
  else {
    die "Don't know comparison method '$method'\n";
  }
}


########################################
#
#
#
########################################
package main::Differ;

use Carp;
use File::Temp qw(tempfile tempdir);
use List::Util qw(first any min max any sum0);
use File::Basename qw(basename dirname fileparse);
use File::Path qw(make_path remove_tree);
use File::Spec::Functions qw(canonpath catfile catdir splitpath splitdir rootdir tmpdir);
use List::Util qw(min max any first);
use Spreadsheet::Edit qw(:DEFAULT logmsg cx2let let2cx);
use Data::Dumper::Interp;
sub oops(@) { unshift @_, "oops "; require Carp; goto &Carp::confess; }

sub _visualize($);
sub _title_or_origABC($$);

my $seen = {};
sub warnonce(@) {
  my $msg = join "",@_;
  return if $seen->{$msg}++;
  warn $msg;
}

sub cx2origcx($$) {
  my ($sheet, $currcx) = @_;
  #  0 1 2 3 [4] [5] 6 7 [8] 9 10  original
  #  0 1 2 3         4 5     6  7  after deletes
  my $deleted_cxs = $sheet->attributes->{DELETED_CXS} // confess("bug");
  my $nskipped = 0;
  my $oldcx = $currcx;
  for my $dcx (@$deleted_cxs) {
    return($currcx + $nskipped) if $dcx >= $oldcx;
    $oldcx = $dcx;
    ++$nskipped;
  }
  return($currcx + $nskipped);
}
sub cx2origlet($$) { cx2let(&cx2origcx) }

sub compile_if_regex(@) { # compile "/.../msix" strings to qr/.../msix
  my @specs = @_;
  foreach (@specs) {
    if (m#^(/.*/[a-z]*)\z|^m([/\[\{\(\<].*)\z#s) {
      my $regex = eval "qr${1}" // do{
        $@ =~ s/ at \(eval.*//mg;
        die "$@ in $_\n";
      };
      $_ = $regex
    }
  }
  wantarray ? @specs :
  @specs > 1 ? confess("multiple results") :
  $specs[0]
}

sub __truncate($$) {
  my ($aref, $maxwid) = @_;
  my $changed;
  foreach (@$aref) {
    if (length($_) > $maxwid) {
      $changed++;
      # cut before the first actual newline or after visualized newline
      s/\A.+?(?:\\n|(?=\n))\K.*/.../s;
      if (length($_) > $maxwid) {
        my $numdots = max(3, length($_)-$maxwid);
        substr($_, $maxwid - $numdots) = ("." x $numdots);
      }
      #warn dvis '##TT __truncated $_ (len=',length($_),")\n";
    }
  }
  $changed
}

#use constant VISIBLE_SPACE => "\N{SYMBOL FOR SPACE}";
use constant VISIBLE_SPACE => "\N{MIDDLE DOT}";

sub __visualize($) {
  local $_ = shift;
  s/\n/\\n/sg;
  s/\t/\\t/g;
  s/([^[:print:]])/ sprintf "\\x{%02x}", ord($1) /eg;
  if ($opts{ign_leading_spaces}) {
    s/^( +)//;
  } else {
    # make leading spaces visible
    s/^( +)/VISIBLE_SPACE x length($1)/e;
  }
  if ($opts{ign_trailing_spaces}) {
    s/( +)$//;
  } else {
    # make trailing spaces visible
    s/( +)$/VISIBLE_SPACE x length($1)/e;
  }
  $_
}

# Read the two files (only one sheet each) into memory and delete
# columns to be ignored.  In each Spreadsheet::Edit sheet :
#
# To find corresponding columns after deleting ignored columns, even if
# the user specified absolute column letters:
#
#  For each in {always_show_columns} and {id_columns} :
#    alias <uniqueident> to the column and set
#      $sheet->attributes->{TRACKING}{ spec => [list of <uniqueident>] }
#      (each 'spec' might be a regex matching multiple values).
#
#  Set attributes->{DELETED_CXS} => [ original cxs ]
#
sub new {
  my $class = shift;
  my %hash = @_;
  warn __PACKAGE__,"->new",hvis(%hash),"\n" if $hash{debug};

  $hash{always_show_columns} //= $hash{id_columns};

  my @columns = @{ $hash{columns} // [] }; # copy; will be mutated
  my $always_show_columns = $hash{always_show_columns} // [];
  my $id_columns = $hash{id_columns} // [];
  my $negated;
  if (@columns) {
    $negated = $columns[0] =~ s/^-//;
    foreach (@columns[1..$#columns]) {
      die "You can not mix negated and non-negated --columns COLSPECS (",
          vis("$columns[0]")," and ",vis($_),")\n"
        unless s/^-// == $negated;
    }
  }
  ##my @sheet_new_opts = ($hash{debug} ? (verbose => 1) : (silent => 1)); # no alias warns
  my @sheet_new_opts = (silent => 1); # no alias warns
  for my $N (1, 2) {
    # Read into memory
    my $path = $hash{"path$N"} // confess "'path$N' option is required";
    my $sh = Spreadsheet::Edit->new(@sheet_new_opts);

    my $title_row_spec = $hash{title_row}; # TODO: Allow separate specification

    $sh->read_spreadsheet({
                           use_gnumeric => 1,
                           quote_char   => $hash{quote_char},
                           sep_char     => $hash{sep_char},
                           encoding     => $hash{encoding},
                           silent       => $hash{silent},
                           (defined($title_row_spec)
                             ? $title_row_spec == 0
                               ? (title_rx => undef)
                               : (title_rx => ($title_row_spec-1),
                                  required => [compile_if_regex
                                   @$always_show_columns,@$id_columns,@columns],
                                 )
                             : () # allow auto-detect if not specified
                           ),
                          }, $path
                         );

    $hash{"sheet$N"} = $sh;
    $sh->attributes->{ORIG_PATH} = $path;
  }

  my $ncols = 0;
  for my $N (1,2) {
    my $sh = $hash{"sheet$N"};

    $ncols = max($ncols, $sh->num_cols);

    # Create aliases for columns which might move when non-compared
    # columns are deleted later.  The aliases will automatically track,
    # relevant when the user specified absolute column letters.
    # Each <spec> might be a regex which matches multiple columns!
    my %cx2seq;
    foreach my $spec (@$always_show_columns, @$id_columns,
                      @columns #unnecessary???
                     ) {
      my @cxlist = $sh->spectocx(compile_if_regex($spec));

      if (defined(my $prev_aliases = $sh->attributes->{TRACKING}->{ $spec })) {
        # This same spec was seen before (perhaps in both always-show & id),
        # and we already processed the cx(s) the spec specifies, i.e. @cxlist.

        my %seen_cx = map{($_ => 1)} @cxlist; # Sanity check...
        for (@$prev_aliases) {
          oops unless defined delete $seen_cx{ $sh->colx->{$_} };
        }
        oops if keys %seen_cx;
      } else {
        my @aliaslist =
                  map { "__origcx${_}_".$cx2seq{$_}++."_".title2ident($spec) }
                      @cxlist;
        $sh->attributes->{TRACKING}->{ $spec } = \@aliaslist;
        for (0..$#aliaslist) {
          $sh->alias($aliaslist[$_] => $cxlist[$_]);
          oops unless $sh->colx->{$aliaslist[$_]} == $cxlist[$_];
        }
      }
    }

    # Pre-sort the files
    if (@$id_columns && $hash{sort_rows}) {

      my $title_rx = $sh->title_rx;
      $sh->first_data_rx(defined($hash{first_data_row})
                          ? ($hash{first_data_row}-1)
                          : (($title_rx // -1) + 1));

      # FIXME? Can the "Translate --always-show and --id-columns to cx..."
      # be hoisted up here from compare_native()?  Maybe not because
      # that has to be done after deleting ignored columns (otoh maybe we can
      # sort after deleting?)  For now, repeating code from there...
      my @key_cxs = map{
                      map{ $sh->colx->{$_} // oops }
                      @{ $sh->attributes->{TRACKING}->{ $_ } // oops }
                    }
                    @{$hash{id_columns}} ;

      $sh->attributes->{ORIGINAL_RXS} = [
        $sh->sort_rows(sub{
          foreach my $cx (@key_cxs) {
            my $r = $a->[$cx] cmp $b->[$cx];
            return $r if $r;
          }
          0
        })
      ];
    }

    # Remove columns not being compared
    if (@columns) {
      my @cxlist = $sh->spectocx(compile_if_regex(@columns));
      my %delete_cxs;
      if ($negated) {
        %delete_cxs = map{ $_ => 1 } @cxlist;
      } else {
        %delete_cxs = map{ $_ => 1 } 0..$sh->num_cols-1;
        delete @delete_cxs{ @cxlist };
      }
      my @deleted_cxs = sort {$a <=> $b} keys %delete_cxs;
      warn "negated=",vis($negated),
           " delete cxs:",avis(@deleted_cxs),
           " abcs:",avis(map{cx2let} @deleted_cxs),"\n"
         if $hash{debug};
      $sh->delete_cols(@deleted_cxs);
      $sh->attributes->{DELETED_CXS} = \@deleted_cxs;
    } else {
      $sh->attributes->{DELETED_CXS} = [];
    }

    # Change newline etc. and non-printing to visible escapes
    $sh->apply_all(sub{
      foreach (@{ $sh->crow }) {
        $_ = __visualize($_);
      }
    });

    # Truncate overly-long titles
    if (defined(my $title_row = $sh->title_row)) {
      if (__truncate($title_row, $hash{trunc_title_width})) {
        # Is this necessary (or wise) ???
        $sh->title_rx($sh->title_rx); # re-compute %colx
      }
    }
  }

  # Append empty columns if needed to make both sheets the same width
  for my $N (1,2) {
    my $sh = $hash{"sheet$N"};
    while ($sh->num_cols < $ncols) { $sh->insert_col('>$', "") }
  }
  bless \%hash, $class;
}

sub _getncols($$) {
  my ($sh, $rx) = @_;
  $rx >= ($sh->title_rx//0) ? $sh->attributes->{NUMCOLS_USED} : $sh->num_cols
}

sub _title_or_origABC($$) {
  my ($sh, $cx) = @_;
  my $titlerow = $sh->title_row;
  (defined($titlerow) && $titlerow->[$cx] ne "")
    ? $titlerow->[$cx]
    : cx2origlet($sh,$cx)
}

sub _native_output {
  my ($self, $cxlist1, $cxlist2, $diff, $dumbrun) = @_; oops unless @_==5;
  my $restricted_keycols = @{$self->{id_columns}} > 0;
  my $changed;
  $diff->Reset();
  while( $diff->Next() ) {
    my @items1 = $diff->Items(1);
    my @items2 = $diff->Items(2);
    if ($diff->Same() && $restricted_keycols) {
      # Diff looked only at @id_columns and found no changes; there
      # may be other changes, which we manually detect here.
      die "bug" unless @items1 == @items2;
      for my $i (0 .. $#items1) {
        # Each item is [sheet#, rowref, original_rx]
        my ($N1, $row1, $rx1) = @{$items1[$i]};
        my ($N2, $row2, $rx2) = @{$items2[$i]};
        my $hash1 = $hash_func_code->([@{$row1}[ @$cxlist1 ]], $rx1)
             // die "ERROR: hash-func returned undef for row ".($rx1+1)." in ".$self->{"sheet$N1"}->data_source()."\n";
        my $hash2 = $hash_func_code->([@{$row2}[ @$cxlist2 ]], $rx2)
             // die "ERROR: hash-func returned undef for row ".($rx2+1)." in ".$self->{"sheet$N2"}->data_source()."\n";
        if ($hash1 ne $hash2) {
          $self->diff_rows($rx1, $rx2, "CHANGED", $dumbrun);
          $changed = 1;
        } else {
          #print "NOT-CHANGED: ", verbrow_str($rx1,$rx2), "\n"
          #  if $debug && !$dumbrun;
        }
      }
    }
    next
      if $diff->Same();
    $changed = 1;

    if (@items2 == 0) {
      # A hunk of rows deleted from self->{sheet2}
      for my $i (0 .. $#items1) {
        my ($N, $row, $orig_rx) = @{ $items1[$i] };
        $self->show_row($N, $row, $orig_rx, "DELETED", $dumbrun);
      }
    }
    elsif (@items1 == 0) {
      # A hunk of rows added to self->{sheet2}
      for my $i (0 .. $#items2) {
        my ($N, $row, $orig_rx) = @{ $items2[$i] };
        $self->show_row($N, $row, $orig_rx, "ADDED", $dumbrun);
      }
    }
    else {
      # A range replaced
      my $lim = min($#items1, $#items2);
      for my $i (0 .. $lim) {
        if ($restricted_keycols) {
          # Did not match in ID columns, so they must be unrelated
          # records; show as deletes and adds.
          my ($N1, $row1, $rx1) = @{$items1[$i]};
          my ($N2, $row2, $rx2) = @{$items2[$i]};
          #??? When is it not the case that $N1==1 and $N2==2 ???
          $self->show_row($N1, $row1, $rx1, "*Deleted", $dumbrun);
          $self->show_row($N2, $row2, $rx2, "*Added", $dumbrun);
        } else {
          my (undef, undef, $rx1) = @{$items1[$i]};
          my (undef, undef, $rx2) = @{$items2[$i]};
          $self->diff_rows($rx1, $rx2, "Changed", $dumbrun)
        }
      }
      for my $i ($lim+1 .. $#items1) {
        my ($N1, $row1, $rx1) = @{$items1[$i]};
        oops unless $N1==1;
        $self->show_row(1, $row1, $rx1, "Deleted*", $dumbrun);
      }
      for my $i ($lim+1 .. $#items2) {
        my ($N2, $row2, $rx2) = @{$items2[$i]};
        oops unless $N2==2;
        $self->show_row(2, $row2, $rx2, "Added*", $dumbrun);
      }
    }
  }
  return $changed
}#_native_output

sub _check_title_rows {
  my $self = shift;
  my ($sh1, $sh2) = @$self{qw/sheet1 sheet2/};

  if (!defined($self->{title_row})) {
    my sub _titlerow_msg {
      my $sh = shift;
      my $rx = $sh->title_rx;
      defined($rx) ? "--title-row ".($rx+1) : "NO --title-row"
    }
    if ($self->{verbose}) {
      if (u($sh1->title_rx) eq u($sh2->title_rx)) {
        warn "> Auto-detected ", _titlerow_msg($sh1), " in both\n";
      } else {
        warn "> Auto-detected DIFFERENT title rows:\n";
        warn "    ",_titlerow_msg($sh1), " in ", $sh1->data_source, "\n";
        warn "    ",_titlerow_msg($sh2), " in ", $sh2->data_source, "\n";
      }
    }
  } else {
    confess "bug"
      ,vis($sh1->attributes())
      if $self->{title_row}==0 && defined( $sh1->title_rx );
  }
}#_check_title_rows

# Find corresponding columns with the same title or fuzzy-matched titles
sub _pairup_columns {
  my $self = shift;
  my ($sh1, $sh2) = @$self{qw/sheet1 sheet2/};
  # Match titles; if not all matched, try fuzzy alternatives, exhausting all
  # possibilities at each "level of fuzzyness" before trying the next level.
  #
  # Matching by levels avoids wrongly matching a fuzzed title when the same
  # string is a non-fuzzed (or less-fuzzed) title in the other file.
  # For example, with
  #    f1=(Foo, Bar)  f2=(Foo, foo, bar)
  # "Foo" must be matched before trying fuzzed versions; otherwise
  # f1-cx0 might be fuzzed to "foo" and wrongly match f2-cx1 when it
  # should match f2-cx0.
  #
  # Note: Fuzzing can make mistakes.  For example with
  #   f1=(F_oo,Bar) and f2=(Foo, f_oo), no titles match without fuzzing
  # but it is unclear whether f1's "F_oo" should match "Foo" or "f_oo" in f2.
  # Nevertheless, we will pick one and if wrong then the user will get a
  # "change" in every row.

  my sub add_titles($$$) {
    my ($hash, $level, $sh) = @_;
    # Add titles (fuzzed to exactly the specified degree) to a hash unless
    # already present, i.e. never trying a fuzzed title which matches
    # a non-fuzzed (or less-fuzzed) title in the same file.
    my $title_row = $sh->title_row
         // return;
    foreach my $cx (0..$sh->num_cols-1) {
      my $title = $title_row->[$cx];
      my $t = $title;
      {                last if $level==0;
        $t = fc($t);   last if $level==1;
        $t =~ s/[_. ]|${\VISIBLE_SPACE}/-/g;
                       last if $level==2;
        ($t = fc $title) =~ s/\s+|${\VISIBLE_SPACE}//sg;
                       last if $level==3;
        $t =~ s/\W//g; last if $level==4;
        oops
      }
      $hash->{$t} = $cx unless exists $hash->{$t};
    }
  }

  my (@corresp_titles1, @corresp_titles2);
  my (@corresp_cxlist1, @corresp_cxlist2);
  my ($title_row1, $title_row2) = ($sh1->title_row, $sh2->title_row);
  my ($hash1, $hash2) = ({}, {});
  my (%cx1found, %cx2found);
  for my $level (0..4) {
    add_titles($hash1, $level, $sh1);
    add_titles($hash2, $level, $sh2);
    while (my ($ft,$cx1) = each %$hash1) {
      next if $cx1found{$cx1};
      if (defined(my $cx2 = delete $hash2->{$ft})) { # "matching" titles
        oops(dvis '$cx1 $cx2 $ft\n$hash1\n$hash2\%cx1found\n%cx2found')
          if $cx2found{$cx2};
        $cx1found{$cx1} = $cx2found{$cx2} = 1;
        delete $hash1->{$ft};
        push @corresp_cxlist1, $cx1;
        push @corresp_cxlist2, $cx2;
        push @corresp_titles1, $title_row1->[$cx1] // oops;
        push @corresp_titles2, $title_row2->[$cx2] // oops;
      }
    }
    while (my ($ft,$cx2) = each %$hash2) {
      next if $cx2found{$cx2};
      if (defined(my $cx1 = delete $hash1->{$ft})) {
        oops(dvis '$cx2 $cx1 $ft\n$hash2\n$hash1\%cx2found\n%cx1found')
          if $cx1found{$cx1};
        $cx2found{$cx2} = $cx1found{$cx1} = 2;
        delete $hash2->{$ft};
        push @corresp_cxlist1, $cx1;
        push @corresp_cxlist2, $cx2;
        push @corresp_titles1, $title_row1->[$cx1] // oops;
        push @corresp_titles2, $title_row2->[$cx2] // oops;
      }
    }
    last unless keys %$hash1; # stop if original titles all matched
  }
  oops unless @corresp_cxlist1 == @corresp_cxlist2;
  oops unless @corresp_cxlist1 == @corresp_titles1;
  oops unless @corresp_cxlist1 == @corresp_titles2;
  { my @order = sort {$corresp_cxlist1[$a] <=> $corresp_cxlist1[$b]}
                     0..$#corresp_cxlist1;
    @corresp_cxlist1 = @corresp_cxlist1[@order];
    @corresp_cxlist2 = @corresp_cxlist2[@order];
    @corresp_titles1 = @corresp_titles1[@order];
    @corresp_titles2 = @corresp_titles2[@order];
  }
  # Assume un-matched titles *in the same column* correspond
  # (if there are no titles, then all columns correspond!)
  # A collective title is synthesized for each pair.
  # Otherwise unmatched titles are considered unpaired.
  my (@unpaired_cxlist1, @unpaired_cxlist2);
  for my $cx (0..($sh1->num_cols-1)) {
    next if defined $cx1found{$cx};
    if (!defined $cx2found{$cx} && $cx < $sh2->num_cols) {
      $cx1found{$cx} = $cx2found{$cx} = 1;
      push @corresp_cxlist1, $cx;
      push @corresp_cxlist2, $cx;
      my $synthetic_title = "(col ".cx2let($cx).")";
      push @corresp_titles1, $synthetic_title;
      push @corresp_titles2, $synthetic_title;
    } else {
      push @unpaired_cxlist1, $cx;
    }
  }
  for my $cx (0..($sh2->num_cols-2)) {
    next if defined $cx2found{$cx};
    push @unpaired_cxlist2, $cx;
  }
  oops unless @corresp_cxlist1 + @unpaired_cxlist1 == $sh1->num_cols;
  oops unless @corresp_cxlist1 + @unpaired_cxlist2 == $sh2->num_cols;

  $sh1->attributes->{CORRESP_TITLES  } = \@corresp_titles1;
  $sh1->attributes->{CORRESP_CXLIST  } = \@corresp_cxlist1;
  $sh1->attributes->{UNPAIRED_CXLIST } = \@unpaired_cxlist1;

  $sh2->attributes->{CORRESP_TITLES  } = \@corresp_titles2;
  $sh2->attributes->{CORRESP_CXLIST  } = \@corresp_cxlist2;
  $sh2->attributes->{UNPAIRED_CXLIST } = \@unpaired_cxlist2;

  warn dvis '$title_row1\n$title_row2\n'
           .'@corresp_titles1\n@corresp_titles2\n'
           .'@corresp_cxlist1\n@corresp_cxlist2\n'
           .'@unpaired_cxlist1\n@unpaired_cxlist2\n'
    if $opts{debug};

}#_pairup_columns

sub compare_native {
  my $self = shift;
  require Algorithm::Diff;

  for my $N (1, 2) {
    my $sh = $self->{"sheet$N"};
    # Determine widest row in rows not preceeding title row (if any),
    # excluding empty cells on the right end of a row.
    my $ndcols = 0;
    my $title_rx = $sh->title_rx;
    $sh->apply_all(sub{
      return if defined($title_rx) && $sh->rx < $title_rx;
      my $n = $sh->num_cols;
      while ($n > 0 && $sh->crow->[$n-1] eq "") { --$n }
      $ndcols = max($ndcols, $n);
    });
    $sh->attributes->{NUMCOLS_USED} = $ndcols;

    # Translate --always-show and --id columns to cx values for each sheet
    foreach(['always_show_columns','ALWAYS_SHOW'],
            ['id_columns','ID_CXS']) {
      my ($option_key, $attr_key) = @$_;
      foreach my $spec (@{$self->{$option_key}}) {
        my $identlist = $sh->attributes->{TRACKING}->{ $spec } // confess;
        foreach (@$identlist) {
          my $cx = $sh->colx->{$_}
            // die "Can not use ignored column '$_' ($spec) for $option_key",
                   " (",$sh->data_source,")\n";

          $sh->attributes->{$attr_key}->{$cx} = 1; # e.g. attributes->{ID_CXS}
        }
      }
    }
  }

  $self->_check_title_rows();

  # Set attributes CORRESP_TITLES, CORRESP_CXLIST, and UNPAIRED_CXLIST
  $self->_pairup_columns();

  my $restricted_keycols = @{$self->{id_columns}} > 0;

  my @cxlist1 = ( @{ $self->{sheet1}->attributes->{CORRESP_CXLIST} // [] },
                  @{ $self->{sheet1}->attributes->{UNPAIRED_CXLIST}     }
                );
  my @cxlist2 = ( @{ $self->{sheet2}->attributes->{CORRESP_CXLIST} // [] },
                  @{ $self->{sheet2}->attributes->{UNPAIRED_CXLIST}     }
                );

  # FIXME: What is difference between unmatched-in-same-columm
  #  pairs in CORRESP_CXLIST and UNPAIRED_CXLIST ??
  #  If *all* unmatched were just put into UNPAIRED_CXLIST in cx order,
  #  I think they would line up exactly the same way as we get here...

  # Corresponding columns, limited to --id-columns if specified, otherwise all
  my (@idcxlist1, @idcxlist2);
  if ($restricted_keycols) {
    @idcxlist1 = sort {$a <=> $b}
                 keys %{ $self->{sheet1}->attributes->{ID_CXS} };
    @idcxlist2 = sort {$a <=> $b}
                 keys %{ $self->{sheet2}->attributes->{ID_CXS} };
  } else {
    @idcxlist1 = @cxlist1;
    @idcxlist2 = @cxlist2;
  }
  my $max_idcxlist_max = max($#idcxlist1, $#idcxlist2);
  my @idpadding1 = ("") x ($max_idcxlist_max - $#idcxlist1);
  my @idpadding2 = ("") x ($max_idcxlist_max - $#idcxlist2);

  # Call user-specified global setup code (defaults to nop).
  # This could edit the Spreadsheet::Edit sheet contents and/or
  # save information in globals (declared with "our") for later use
  # by hashid-func or hash-func.  All these user-defined functions
  # are compiled into package Usercode.
  # FIXME: Probably too late for this to be useful. UNDOCUMENTING for now...
  $setup_code_code->( $self->{"sheet1"},
                      $self->{"sheet2"},
                      \@idcxlist1,
                      \@idcxlist2,
                    );

  my $keygen = sub{
    # Form a hash of the contents of the "id columns" in a given row
    # (or all columns in the absence of --id-column arguments).
    #
    # The Diff algorithm is run using only these hash values, and so rows
    # which represent the same record (i.e. same 'id' values) will
    # be reported as "unchanged" even though they may differ in other columns,
    # and other kinds of hunks represent additions or deletions of identities
    # to the data.
    #
    # OTOH, if there ARE NO ID columns, then the hash covers all columns
    # and only completely-identical rows are reported as "unchanged"
    # and changes may be reported as add, delete, or replace in a somewhat
    # unpredictable way (which is why using --id-columns is helpful).
    #
    # This processing occurs in _native_output()
    #
    # If one file has fewer columns than the other, the narrower rows are
    # padded with empty strings.
    my ($N, $row, $orig_rx) = @{ $_[0] };
    my $sh = $self->{"sheet$N"};
    if ($orig_rx eq u($sh->title_rx)) {
      # Return a special token to force title rows to "match".  This makes
      # changes in titles appear as a "change" and not "delete" + "add".
      return " __TITLE_ROW__ ";
    }
    my $idcxlist  = ($N==1 ? \@idcxlist1  : \@idcxlist2);
    my $idpadding = ($N==1 ? \@idpadding1 : \@idpadding2);

    my $s = $hashid_func_code->([@{$row}[ @$idcxlist ], @$idpadding], $orig_rx);
    $s;
  };

  # Pre-screen all records, ignoring any for which hashid-func returns
  # undef, but remembering the original rx values for display to the user.
  #
  # Note: An alternate implementation would be to pass the hash strings
  # directly to Algorithm::Diff with no keyGen for faster Diff operation
  # (Diff is optimized for this case), remembering original rx values in
  # a side hash.  However this would double memory usage and performance
  # probably matters only with very large spreadsheets when conserving memory
  # is arguably more important than conserving CPU.

  # Two arrays of [sheet#, rowref, original_rx], one for each sheet:
  my @diffargs = ([], []);
  foreach my $N (1,2) {
    my $sh = $self->{"sheet$N"};
    my $rows = $sh->rows;
    for (my $rx=0; $rx <= $#$rows; $rx++) {
      my $row = $rows->[$rx];
      my $diffitem = [$N, $row, $rx];   # [sheet#, rowref, original_rx]
      push @{ $diffargs[$N-1] }, $diffitem if defined($keygen->($diffitem));
    }
  }

  warn dvis '$diffargs[0]\n$diffargs[1]\n' if $self->{debug};

  my $diff = Algorithm::Diff->new(
    $diffargs[0],
    $diffargs[1],
    { keyGen => $keygen }
  );

  # dumbrun==true to determine the width of the widest displayed title
  # without actually printing anything
  $self->{widest_title_width} = 0;
  my $changed = $self->_native_output(\@cxlist1, \@cxlist2, $diff, 1);

  if ($changed) {
    print $self->{sheetname_header};  # possibly ""
    $diff->Reset;
    my $changed2 = $self->_native_output(\@cxlist1, \@cxlist2, $diff, 0);
    oops if $changed2 != $changed;
  }
  else {
    if ($opts{debug}) {
      ###TEMP FIXME remove this after verified
      $diff->Reset;
      my $changed2 = $self->_native_output(\@cxlist1, \@cxlist2, $diff, 0);
      oops if $changed2;
    }
  }
  $self->{exit_status} = ($changed ? 1 : 0);
}

# Format a value for display as an indented block.
# Newlines in the input are already converted to visible "\n".
# Actual newlines are appended to these markers and indentation
# inserted before second and subsequent lines.  Quotes are not
# included in the result.
# Usage:
#   printf "%*s: '%s'\n", $twid, $title, fmt_value($valstr,$twid+3);
sub fmt_value($$) {
  my ($str, $indent_width) = @_;
  oops if $str =~ /\n/s;
  my $indent = " " x $indent_width;
  $str =~ s/\\n/\\n\n${indent}/gs;
  if ($maxwidth) { # fold
    my $first_mw = $maxwidth - $indent_width;
    if (length($str) > $first_mw) {
      oops "maxwidth $maxwidth is too narrow\nfmw=$first_mw iw=$indent_width <<$str>>"
        if $first_mw < 20;  # sanity
      $str =~ s/\A([^\n]{$first_mw})([^\n]+)/$1\n${indent}$2/m;
      while ($str =~ s/^([^\n]{$maxwidth})([^\n]+)/$1\n${indent}$2/m) { }
    }
  }
  $str
}
#BEGIN {
#$maxwidth = 30; for my $w (0..$maxwidth*3) {
#  my ($i, $NextL) = (0, "A");
#  my $s = "";
#  for (0..$w-1) {
#    my $d = ($i++ % 10);
#    if ($d==0 && $i > 1) { $d = $NextL++ }
#    $s .= $d;
#    if (($_ % 62)==9) { $s .= "\\n" }
#  }
#  print "Input : $s\n";
#  print "Output: ", fmt_value($s,8), "\n";
#}
#die "Test exit";
#}

sub show_cell {
  my ($self, $title, $v1, $v2, $dumbrun) = @_; oops unless @_ == 5;

  my $widest_title_width = $self->{widest_title_width};
  if ($dumbrun) {
    # $title has been visualized with newline replaced by literal '\n',
    # and they will be displayed just that way, without any wrapping.
    my $tw = length($title);
    if ($tw > $widest_title_width) {
      $self->{widest_title_width} = $tw;
    }
    return
  }

  my $twidth = (2+$widest_title_width);  # 2 for indent

  my $v1str = fmt_value($v1, $twidth+3); # +3 for the ": '"

  my $v2str = fmt_value($v2, $twidth+3)
                if defined($v2) && $v1 ne $v2;

  if (! defined $v2) {
    printf "%*s: '%s'\n", $twidth, $title, $v1str;
  }
  elsif ($v1 eq $v2) {
    printf "%*s: '%s' (unchanged)\n", $twidth, $title, $v1str;
  }
  else {
    my $v1_nl = 1 + @{[ $v1str =~ /\n/sg ]};
    my $v2_nl = 1 + @{[ $v2str =~ /\n/sg ]};
    if ($v2_nl <= 1) {
      my $s = sprintf "%*s: '%s'", $twidth, $title, $v1str;
      if ($v2_nl==1
          && (!$maxwidth
              || length($s)+4+length($rightarrow)+length($v2str) <= $maxwidth)
         ) {
        # COL TITLE: 'old text' -> 'new text' (on one line)
        $s .= " $rightarrow '$v2str'";
#$s .= "\n<<<". dvis '$v1 $v2 $v1str $v2str>>>';
      }
      else {
        #  COL TITLE: 'old text'
        #          -> 'new text'
        $s .= sprintf "\n%*s $rightarrow '%s'",
                      $twidth-length($rightarrow), "",
                      $v2str;
      }
      print $s, "\n";
    } else {
      # Long multi-line blocks -- show ala 'diff -u infinity' :
      #  COL TITLE:  unchanged line
      #            + added line
      #            - deleted line
      #              unchanged line
      my @lines1 = split /\\n/, $v1;
      my @lines2 = split /\\n/, $v2;
      my $diff = Algorithm::Diff->new(\@lines1, \@lines2);
      my $title_str  = sprintf "%*s:", $twidth, $title;
      my $indent_str = sprintf "%*s ", $twidth, "";
      my $pfx = $title_str;
      while(my $posn = $diff->Next()) {
        my $d = $diff->Diff;
        next if $d==0 && $opts{suppress_common_lines};
        foreach ($diff->Items(1)) {
          print $pfx, (($d&1) ? "- " : "  "), fmt_value($_, $twidth+3), "\n";
          $pfx = $indent_str;
        }
        next if $d==0; # both lists are the same
        foreach ($diff->Items(2)) {
          print $pfx, (($d&2) ? "+ " : "  "), fmt_value($_, $twidth+3), "\n";
          $pfx = $indent_str;
        }
      }
    }
  }
} #show_cell
sub row_header($$;$) {
  my ($verb, $rx1, $rx2) = @_;
  my $s = "$verb row ".($rx1+1);
  $s .= " (row ".($rx2+1)." in 2nd file)" if defined($rx2) && $rx1 != $rx2;
  sprintf "\n-------- %s %s\n", $s, ('-' x ($maxwidth-10-length($s)));
}
sub show_row {
  #my ($self, $N, $rx, $verb, $dumbrun) = @_; oops if @_ != 5;
  my ($self, $N, $row, $curr_rx, $verb, $dumbrun) = @_; oops if @_ != 6;
  my $sh = $self->{"sheet$N"};
  my $orig_rx = $sh->attributes->{ORIGINAL_RXS}->[$curr_rx] // $curr_rx;
  unless ($dumbrun) {
    print row_header($verb, $orig_rx);
  }
  my sub ABC_label {
    my $cx = shift;
    defined($sh->title_rx()) ? "(".cx2origlet($sh,$cx).")" : cx2let($cx);
  }
  my $title_rx = $sh->title_rx;
  my $corresp_titles  = $sh->attributes->{CORRESP_TITLES};
  my $always_show     = $sh->attributes->{ALWAYS_SHOW};
  my %notseen = map{ ($_=>1) } (0.._getncols($sh,$curr_rx)-1);
  my $all_empty = 1;

  for my $show_always_shown (1, 0) { # Show --id-columns etc. first
    if (defined $corresp_titles) {
      my $corresp_cxlist = $sh->attributes->{CORRESP_CXLIST};
      for my $i (0 .. $#$corresp_cxlist) {
        my $cx = $corresp_cxlist->[$i];
        next if ((!$always_show->{$cx}) ^ (!$show_always_shown));
        my $value = $row->[$cx];
        if ($value ne "" || $self->{show_empties} || $always_show->{ $cx }) {
          my $label = $curr_rx <= ($title_rx//-1)
               ? ABC_label($cx) : $corresp_titles->[$i];
          $self->show_cell($label, $value, undef, $dumbrun);
          $all_empty = 0;
        }
        delete $notseen{$cx};
      }
    }
    for my $cx (@{ $sh->attributes->{UNPAIRED_CXLIST} }) {
      next if (!!$always_show->{ $cx } ^ !!$show_always_shown);
      my $value = $row->[$cx];
      if ($value ne "" || $self->{show_empties} || $always_show->{ $cx }) {
        $self->show_cell(ABC_label($cx), $value, undef, $dumbrun);
        $all_empty = 0;
      }
      delete $notseen{$cx};
    }
  }
  die dvis 'bug\n  %notseen\n  $sh->attributes()\n ' if %notseen;
  print "    (empty row)\n" if $all_empty && !$dumbrun;
}
sub diff_rows {
  my ($self, $rx1, $rx2, $verb, $dumbrun) = @_;
  oops unless @_ == 5;
  my ($sheet1, $sheet2) = @$self{qw/sheet1 sheet2/};
  my $orig_rx1 = $sheet1->attributes->{ORIGINAL_RXS}->[$rx1] // $rx1;
  my $orig_rx2 = $sheet2->attributes->{ORIGINAL_RXS}->[$rx2] // $rx2;
  my $headerline = row_header($verb, $orig_rx1, $orig_rx2);
  my $header_printed = $dumbrun; #suppress during dumb run
  my $row1 = $sheet1->[$rx1] // die "BUG: Undef row in sheet1 at rx1=$rx1\n";
  my $row2 = $sheet2->[$rx2] // die "BUG: Undef row in sheet2 at rx2=$rx2\n";
  my $ncols1 = _getncols($sheet1, $rx1);
  my $ncols2 = _getncols($sheet2, $rx2);
  my %notseen1 = map{ ($_=>1) } (0..$ncols1-1);
  my %notseen2 = map{ ($_=>1) } (0..$ncols2-1);

  my $corresp_titles  = $sheet1->attributes->{CORRESP_TITLES};
  if (defined $corresp_titles) {
    my $corresp_cxlist1 = $sheet1->attributes->{CORRESP_CXLIST};
    my $corresp_cxlist2 = $sheet2->attributes->{CORRESP_CXLIST};
    for my $show_always_shown (1, 0) { # Show --id-columns etc. first
      for my $i (0 .. $#$corresp_cxlist1) {
        my $cx1 = $corresp_cxlist1->[$i];
        next if ((!$sheet1->attributes->{ALWAYS_SHOW}->{$cx1}) ^ (!$show_always_shown));
        my $cx2 = $corresp_cxlist2->[$i];
        delete $notseen1{$cx1};
        delete $notseen2{$cx2};
        if (($row1->[$cx1] ne $row2->[$cx2])
              || $sheet1->attributes->{ALWAYS_SHOW}->{ $cx1 }) {
          print $headerline unless $header_printed++;
          $self->show_cell($corresp_titles->[$i], $row1->[$cx1], $row2->[$cx2], $dumbrun);
        }
      }
    }
  }
  # "Singleton" columns have no corresponding column in the other sheet.
  # Show their cells only if not empty because they don't actually represent
  # different content except when they contain something.
  my $title1_row = $sheet1->title_row;
  for my $cx1 (@{ $sheet1->attributes->{UNPAIRED_CXLIST} }) {
    if ((my $content = $row1->[$cx1]) ne "") {
      print $headerline unless $header_printed++;
      $self->show_cell("f1."._title_or_origABC($sheet1,$cx1), $content, undef, $dumbrun);
    }
    delete $notseen1{$cx1};
  }
  my $title2_row = $sheet2->title_row;
  for my $cx2 (@{ $sheet2->attributes->{UNPAIRED_CXLIST} }) {
    if ((my $content = $row2->[$cx2]) ne "") {
      print $headerline unless $header_printed++;
      $self->show_cell("f2."._title_or_origABC($sheet2,$cx2), $content, undef, $dumbrun);
    }
    delete $notseen2{$cx2};
  }
  die dvis 'bug $rx1 $rx2 $verb %notseen1 %notseen2\n$sheet1->attributes()\n$sheet2->attributes()'
    if keys(%notseen1) or keys(%notseen2);
}

sub compare_using_diff {
  my ($self, $cmd) = @_;
  confess "usage: obj->compare_using_diff([command, args...])"
    unless ref($cmd) eq "ARRAY" && $cmd->[0] =~ /diff|git/i;

#  # With standard diff, we have no way to suppress the header if there are
#  # no diffs, so always print it (it will be "" if no header is ever
#  # appropraite).
#  confess "bug" unless defined $self->{sheetname_header};
#  print $self->{sheetname_header};

  # Write the possibly-edited-for-visibiliity data to temp .csv files,
  # and create symlinks to these csv files for using human-friendly names
  # "labelN" (optionally in subdirs if the labels include "directories").
  # These are used when running [tk]diff for nice output.
  # N.B. this is necessary because tkdiff does not support the --label option!
  # 4/1/23: Ditto for git diff.
  my $tdirdepth = 0;
  foreach my $N (1,2) {
    my $sh = $self->{"sheet$N"};
    ($self->{"linkrpath$N"} = $sh->data_source()) =~ s/^${ \rootdir() }//;
    my @dotdots = $self->{"linkrpath$N"} =~ /(?:^|\/)\.\.(?=$|\/)/g;
    $tdirdepth = max($tdirdepth, $#dotdots+1);
    #FIXME: Is the +1 needed?   Verify with actual cases!
  }
  my $tdir = $self->{tempdir} // tempdir(CLEANUP => 1);
  # If linkrpaths contain .. put them in a sufficiently-deep subdir
  # that they will still be within our temp directory structure
  $tdir = catdir($tdir, ("sub" x $tdirdepth));
  make_path($tdir);

  foreach my $N (1,2) {
    $self->{"tmpcsv$N"} = catfile($tdir, "«TEMPCSV$N».csv");
    $self->{"sheet$N"}->write_csv( $self->{"tmpcsv$N"} );
  }

  foreach my $N (1,2) {
    my $sh = $self->{"sheet$N"};
    (my $rpath = $sh->data_source()) =~ s/^${ \rootdir() }//;
    $self->{"linkrpath$N"} = $rpath;
    $self->{"link$N"} = canonpath(
            catfile(catdir($tdir,dirname($rpath)), basename($rpath)) );
    warn "### link$N = ",qsh($self->{"link$N"}),
         " rpath=",qsh($rpath),
         " linkpath$N = ",qsh($self->{"linkrpath$N"}), "\n" if $self->{debug};
    make_path( dirname($self->{"link$N"}) );
    # Arrgh! git diff never follows symlinks
    if ($cmd->[0] =~ /git/) {
      File::Copy::copy($self->{"tmpcsv$N"}, $self->{"link$N"})
        or die "copy ",$self->{"tmpcsv$N"}," ",$self->{"link$N"}," : $!";
    } else {
      symlink $self->{"tmpcsv$N"}, $self->{"link$N"}
        or die "symlink ",$self->{"tmpcsv$N"}," ",$self->{"link$N"}," : $!";
    }
  }

  my @cmd = (@$cmd, $self->{"linkrpath1"}, $self->{"linkrpath2"});
  my $pid = fork;
  if ($pid == 0) { #CHILD
    warn "> cd ", qsh($tdir), "\n" if $self->{verbose};
    chdir $tdir or die "chdir $tdir : $!";
    warn "> ",join(" ",map{qsh} @cmd),"\n" if $self->{verbose};
    if ($self->{debug}) {
      warn ">    : ",qsh($cmd[-2])," -> ",main::decoded_readlink($cmd[-2]),"\n"
        if -l $cmd[-2];
      warn ">    : ",qsh($cmd[-1])," -> ",main::decoded_readlink($cmd[-1]),"\n"
        if -l $cmd[-1];
      warn "> @cmd\n";
    }
    exec @cmd;
    die "exec failed: $!";
  }
  waitpid($pid,0);
  $self->{exit_status} = (($? >> 8) | ($? & 0xFF));
}

sub exit_status { $_[0]->{exit_status} }

#(end)
