#!/usr/bin/env perl

use 5.12.1;
use warnings;

our $VERSION = "0.20 - 20180815";
our $CMD     = $0 =~ s{.*/}{}r;

sub usage {
    my $err = shift and select STDERR;
    say "usage: $CMD [options] file1 [file2]";
    say "       $CMD --man | --info";
    say "	file1 or file2 can be - (but not both)";
    say "   -V    --version      Show version and exit";
    say "   -v[1] --verbose[=1]  Set verbosity";
    say "  Diff options:";
    say "   -U    --utf-8                 Input is in UTF-8";
    say "   -u[3] --unified=3             Show a unified diff";
    say "   -I    --index        Add indices to the change chunks";
    say "   -I n  --index=4      Only show chunk n";
    say "   -w    --ignore-all-space      Ignore all whitespace";
    say "   -b    --ignore-space-change   Ignore horizontal whitespace changes";
    say "   -Z    --ignore-trailing-space Ignore whitespace at line ending";
    say "   -B    --ignore-blank-lines    Ignore changes where lines are all blank";
    say "   -i    --ignore-case           Ignore case changes";
    say "  Color options:";
    say "         --no-color     Do not use colors";
    say "         --list-colors  List available colors and exit";
    say "         --old=red      Color to indicate removed content";
    say "         --new=green    Color to indicate added   content";
    say "         --bg=white     Background color for colored indicators";
    say "   -p    --pink         Shortcut for --old=magenta";
    say "   -f    --fancy        Use Unicode indicators instead of ^";
    say "   -r    --reverse      Reverse the colors of the indicators";
    exit $err;
    } # usage

my %rc = read_rc ();

use charnames ();
use Algorithm::Diff;
use Term::ANSIColor qw(:constants color);
use Getopt::Long qw(:config bundling);
my $opt_r = $rc{reverse} // 0;
GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { say "$CMD [$VERSION]"; exit 0; },
      "man"		=> sub { exec "pod2man $0 | nroff -man"; },
      "info"		=> sub { exec "pod2text $0"; },

    "U|utf-8!"		=> \(my $opt_U = $rc{utf8} // 0),

#   "C|context:3"	=> \ my $opt_C,	# implement context-diff?
    "u|unified:3"	=> \ my $opt_u,
    "I|idx|index:-1"	=> \(my $opt_I = $rc{index} // 0),

    "c|color!"		=> \(my $opt_c = $rc{color} // 1),
    "p|pink!"		=> \ my $opt_p,
    "r|reverse"		=> sub { $opt_r ^= 1 },
    "f|fancy!"		=> \ my $opt_f,

    "i|ignore-case!"			=> \ my $opt_i,
    "w|ignore-all-space!"		=> \ my $opt_w,
    "b|ignore-ws|ignore-space-change!"	=> \ my $opt_b,
    "Z|ignore-trailing-space!"		=> \ my $opt_Z,
    "E|ignore-tab-expansion!"		=> \ my $opt_E, # NYI
    "B|ignore-blank-lines!"		=> \ my $opt_B, # NYI

      "old=s"		=> \(my $old_color = $rc{old} // "red"  ),
      "new=s"		=> \(my $new_color = $rc{new} // "green"),
      "bg=s"		=> \(my $rev_color = $rc{bg}  // "white"),
      "list-colors!"	=> \ my $list_colors,

    "v|verbose:1"	=> \(my $opt_v = 0),
    ) or usage (1);
$opt_w and $opt_b = $opt_Z = $opt_E = $opt_B = 1;

# Color initialization
$opt_p and $old_color = "magenta";
for ($old_color, $new_color, $rev_color) {
    s/^(.*)[ _]bold$/bold $1/i;
    s/^bold_/bold /i;
    }
my %clr = map { $_ => color (s{^(.*)[ _]bold$}{bold $1}ir =~
                             s{^bold[ _]}{bold }ir) }
	  map { $_, "on_$_", "bold $_" }
    qw( red green blue black white cyan magenta yellow );
my $clr_red = $clr{$old_color};
my $clr_grn = $clr{$new_color};
my $clr_rev = $clr{$rev_color};
my $clr_dgb = $clr{$rc{verbose} || "cyan"};
my $reset   = RESET;
if ($list_colors) {
    say $clr{$_}, $_, $reset for keys %clr;
    exit;
    }

my $clr_old = $opt_r ? $clr_rev . $clr{"on_$old_color" =~ s/bold //ir}
                     : $clr_red . $clr{"on_$rev_color" =~ s/bold //ir};
my $clr_new = $opt_r ? $clr_rev . $clr{"on_$new_color" =~ s/bold //ir}
                     : $clr_grn . $clr{"on_$rev_color" =~ s/bold //ir};
# Indicators
my $chr_old = $opt_f ? $clr_old . "\x{25bc}$reset" : "^";
my $chr_new = $opt_f ? $clr_new . "\x{25b2}$reset" : "^";
my $chr_eql = " ";
my $chr_lft = defined $opt_u ? "-" : "<";
my $chr_rgt = defined $opt_u ? "+" : ">";

binmode STDOUT, ":encoding(utf-8)";

my $f1 = shift or usage (1);
my $f2 = shift // "-";

$f1 eq "-" && $f2 eq "-" and usage (1);

$opt_U and binmode STDIN,  ":encoding(utf-8)";
$opt_U and binmode STDOUT, ":encoding(utf-8)";

my @d1 = $f1 eq "-" ? <> : do {
    open my $fh, "<", $f1 or die "$f1: $!\n";
    $opt_U and binmode $fh, ":encoding(utf-8)";
    <$fh>;
    };
my @d2 = $f2 eq "-" ? <> : do {
    open my $fh, "<", $f2 or die "$f2: $!\n";
    $opt_U and binmode $fh, ":encoding(utf-8)";
    <$fh>;
    };
if ($opt_u) {
    for ([ "---", $f1 ], [ "+++", $f2 ]) {
	if (-f $_->[1]) {
	    say $_->[0], " $_->[1]\t", scalar localtime ((stat $_->[1])[9]);
	    }
	else {
	    say $_->[0], " *STDIN\t",  scalar localtime;
	    }
	}
    }

my $cmp_sub = $opt_i || $opt_b || $opt_Z ? { keyGen => sub {
    my $line = shift;
    $opt_i and $line = lc $line;
    $opt_Z and $line =~ s/[ \t]+$//g;
    $opt_b and $line =~ s/[ \t]+/ /g;
    return $line;
    }} : undef;

my $diff = Algorithm::Diff->new (\@d1, \@d2, $cmp_sub);
$diff->Base (1);

my ($N, $idx, @s) = (0, 0);
while ($diff->Next) {
    $N++;
    if ($diff->Same) {
	if (defined $opt_u) {
	    @s = $diff->Items (1);
	    $N > 1 and print "  $_" for grep { defined } @s[0..($opt_u - 1)];
	    unshift @s, undef while @s < $opt_u;
	    }
	next;
	}
    my $sep = "";
    my @d = map {[ $diff->Items ($_) ]} 1, 2;

    if ($opt_B and "@{$d[0]}" !~ m/\S/ && "@{$d[1]}" !~ m/\S/) {
	# Modify @s for -u?
	next;
	}
    if ($opt_I) {
	$idx++;
	$opt_I > 0 && $idx != $opt_I and next;
	printf "%s[%03d]%s ", ${clr_dgb}, $idx, $reset;
	}

    if (!@{$d[1]}) {
	printf "%d,%dd%d\n", $diff->Get (qw( Min1 Max1 Max2 ));
	if ($opt_c) {
	    $_ = $clr_old . (s/$/$reset/r) for @{$d[0]}
	    }
	}
    elsif (!@{$d[0]}) {
	printf "%da%d,%d\n", $diff->Get (qw( Max1 Min2 Max2 ));
	if ($opt_c) {
	    $_ = $clr_new . (s/$/$reset/r) for @{$d[1]}
	    }
	}
    else {
	$sep = "---\n" unless defined $opt_u;
	printf "%d,%dc%d,%d\n", $diff->Get (qw( Min1 Max1 Min2 Max2 ));
	@d = subdiff (@d);
	}
    if ($opt_u and @s) {
	print "  $_" for grep { defined } map { $s[$#s - $opt_u + $_] } 1..$opt_u;
	}
    print "$chr_lft $_" for @{$d[0]};
    print $sep;
    print "$chr_rgt $_" for @{$d[1]};
    }

sub subdiff {
    my $d = Algorithm::Diff->new (map { [ map { split m// } @$_ ] } @_);
    my ($d1, $d2, $x1, $x2, @h1, @h2) = ("", "", "", "");
    while ($d->Next) {
	my @c = map {[ $d->Items ($_) ]} 1, 2;
	if ($d->Same) {
	    $d1 .= $_ for @{$c[0]};
	    $d2 .= $_ for @{$c[1]};
	    $x1 .= $_ for map { s/\S/$chr_eql/gr } @{$c[0]};
	    $x2 .= $_ for map { s/\S/$chr_eql/gr } @{$c[0]};
	    next;
	    }
	if (@{$c[0]}) {
	    $d1 .= $clr_old if $opt_c;
	    $d1 .= $_ for @{$c[0]};
	    $d1 .= $reset		if $opt_c;
	    $x1 .= $_ for map { s/\S/$chr_old/gr } @{$c[0]};
	    $opt_v and push @h1, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @{$c[0]};
	    }
	if (@{$c[1]}) {
	    $d2 .= $clr_new if $opt_c;
	    $d2 .= $_ for @{$c[1]};
	    $d2 .= $reset		if $opt_c;
	    $x2 .= $_ for map { s/\S/$chr_new/gr } @{$c[1]};
	    $opt_v and push @h2, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @{$c[1]};
	    }
	}
    my @d = map { [ split m/(?<=\n)/ => $_ ] } $d1, $d2;
    unless ($opt_c) {
	my @x = map { [ split m/(?<=\n)/ => $_ ] } $x1, $x2;
	foreach my $n (0, 1) {
	    $d[$n] = [ map { $d[$n][$_], $x[$n][$_] // "" } 0 .. (scalar @{$d[$n]} - 1) ];
	    }
	}
    if ($opt_v) {
	@h1 and push @{$d[0]}, sprintf " -- ${clr_dgb}verbose$reset : %s\n", join ", " => map { $clr_old.$_.$reset } @h1;
	@h2 and push @{$d[1]}, sprintf " -- ${clr_dgb}verbose$reset : %s\n", join ", " => map { $clr_new.$_.$reset } @h2;
	}
    @d;
    } # subdiff

sub read_rc {
    my %rc;
    foreach my $rcf (
	    "$ENV{HOME}/ccdiff.rc",
	    "$ENV{HOME}/.ccdiffrc",
	    "$ENV{HOME}/.config/ccdiff",
	    ) {
	-s $rcf or next;
	(stat $rcf)[2] & 022 and next;
	open my $fh, "<", $rcf or next;
	while (<$fh>) {
	    my ($k, $v) = (m/^\s*([-\w]+)\s*[:=]\s*(.*\S)/) or next;
	    $rc{ lc $k
	        =~ s{[-_]colou?r$}{}ir
	        =~ s{background}{bg}ir
	        =~ s{^(?:unicode|utf-?8?)$}{utf8}ir
	      } = $v
		=~ s{^(?:no|false)$}{0}ir
		=~ s{^(?:yes|true)$}{-1}ir; # -1 is still true
	    }
	}
    %rc;
    } # read_rc

__END__

=head1 NAME

ccdiff - Colored character diff

=head1 SYNOPSIS

 ccdiff [options] file1|- file2|-

 ccdiff --help
 ccdiff --man
 ccdiff --info

=head1 DESCRIPTION

=head1 OPTIONS

=head2 Command line options

=over 2

=item --help -?

Show a summary op the available command-line options and exit.

=item --version -V

Show the version and exit.

=item --man

Show this manual using pod2man and nroff.

=item --info

Show this manual using pod2text.

=item --utf-8 -U

All I/O (streams to compare and standard out) are in UTF-8.

=item --unified[=3] -u [3]

Generate a unified diff. The number of context lines is optional. When omitted
it defaults to 3. Currently there is no provision of dealing with overlapping
diff chunks. If the common part between two diff chunks is shorter than twice
the number of context lines, some lines may show twice.

The default is to use traditional diff:

 5,5c5,5
 < Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 ---
 > Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539

a unified diff (-u1) would be

 5,5c5,5
   Tue Sep  6 05:43:59 2005,B.O.Q.S.,,1125978239,1943341
 - Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 + Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
   Mon Feb 23 10:37:02 2004,R.X.K.S.,van,1077529022,1654127

=item --verbose[=1] -v[1]

Show an additional line for each old or new section in a change chunk (not for
added or deleted lines) that shows the hexadecimal value of each character. If
C<--utf-8> is in effect, it will show the Unicode character name(s).

This is a debugging option, so invisible characters can still be "seen".

C<--verbose> accepts an optional verbosity-level, but no specific behavior for
these have not yet been defined.

=item --color -c

Use colors to show differences. As this is the default, use C<--no-color> or
C<--fancy> to use markers instead of color indicators.

C<--no-color> is especially useful if the terminal does not support colors,
or if you want to copy/paste the output to (ASCII) mail.

=item --pink -p

Change the default C<red> for deleted text to the color closest to pink that
is supported by L<Term::ANSIColor>: C<magenta>.

=item --fancy -f

Use (colored) Unicode indicators under changed text instead of the default C<^>.

I did consider using U+034e (COMBINING UPWARDS ARROW BELOW), but as most
terminals are probably unable to show it, I did not pursue the idea.

=item --reverse -r

Reverse the foreground and background for the colored indicators.

If the foreground color has C<bold>, it will be stripped from the new background
color.

=item --list-colors

List available colors and exit.

=item --old=color

Define the foreground color for deleted text.

=item --new=color

Define the foreground color for added text.

=item --bg=color

Define the background color for changed text.

=item --index --idx -I

Prefix position indicators with an index.

If a positive number is passed (C<--index=4> or C<-I 4>), display just the
chunk with that index. This is useful in combination with C<--verbose>.

=item --ignore-case -i

Ignore case on comparison.

=item --ignore-all-space -w 

Ignore all white-space changes. This will set all options C<-b>, C<-Z>, C<-E>,
and C<-B>.

=item --ignore-trailing-space -Z 

Ignore changes in trailing white-space (TAB's and spaces).

=item --ignore-ws|ignore-space-change -b 

Ignore changes in horizontal white-space (TAB's and spaces). This does not
include white-space changes that splits non-white-space or removes white-space
between two non-white-space elements.

=item --ignore-tab-expansion -E 

NYI

=item --ignore-blank-lines -B 

B<Just Partly Implemented> (WIP)

=back

=head2 Configuration files

In order to be able to overrule the defaults set in C<ccdiff>, one can set
options specific for this login. The following option files are looked for
in this order:

 - $HOME/ccdiff.rc
 - $HOME/.ccdiffrc
 - $HOME/.config/ccdiff

and evaluated in that order. Any options specified in a file later in that
chain will overwrite previously set options.

Option files are only read and evaluated if it is not empty and not writeable
by others than the owner.

The syntax of the file is one option per line. where leading and trailing
white-space is ignored. If that line then starts with one of the options
listed below, followed by optional white-space followed by either an C<=> or
a C<:>, followed by optional white-space and the values, the value is assigned
to the option. The values C<no> and C<false> (case insensitive) are aliases
for C<0>. The values C<yes> and C<true> are aliases to C<-1> (C<-1> being a
true value).

Between parens is the corresponding command-line option.

=over 2

=item color (-c)

 color   : true

defines if colors should be used. The default is to use colors. The C<-c>
command line option will toggle the option when set from a configuration
file.

=item reverse (-r)

 reverse : false

defines if changes are displayed as foreground-color over background-color
or background-color over foreground-color. The default is C<false>, so it will
color the changes withe the appropriate color (C<new> or C<old>) over the
default background color.

=item new (--new)

 new     : green

defines the color to be used for added text. The default is C<green>.

any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.

This option may also be specified as

 new-color
 new_color
 new-colour
 new_colour

=item old (--old)

 old     : red

defines the color to be used for delete text. The default is C<red>.

any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.

This option may also be specified as

 old-color
 old_color
 old-colour
 old_colour

=item bg (--bg)

 bg      : white

defines the color to be used as background for changed text. The default is
C<white>.

any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. The C<bold> attribute is not allowed.

This option may also be specified as

 bg-color
 bg_color
 bg-colour
 bg_colour
 background
 background-color
 background_color
 background-colour
 background_colour

=item verbose

 verbose : cyan

defines the color to be used as color for the verbose tag. The default is
C<cyan>. This color will only be used under C<--verbose>.

any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning.

This option may also be specified as

 verbose-color
 verbose_color
 verbose-colour
 verbose_colour

=item utf8 (-U)

 utf8    : yes

defines whether all I/O is to be interpreted as UTF-8. The default is C<no>.

This option may also be specified as

 unicode
 utf
 utf-8

=item index (-I)

 index   : no

defines if the position indication for a change chunk is prefixed with an
index number. The default is C<no>. The index is 1-based.

Without this option, the position indication would be like

 5,5c5,5
 19,19d18
 42a42,42

with this option, it would be

 [001] 5,5c5,5
 [002] 19,19d18
 [005] 42a42,42

When this option contains a positive integer, C<ccdiff> will only show diff
the diff chunk with that index.

=back

=head1 SEE ALSO

L<Algorithm::Diff>, L<Text::Diff>

=head1 AUTHOR

H.Merijn Brand

=head1 COPYRIGHT AND LICENSE

 Copyright (C) 2018-2018 H.Merijn Brand.  All rights reserved.

This library is free software;  you can redistribute and/or modify it under
the same terms as Perl itself.

=for elvis
:ex:se gw=75|color guide #ff0000:

=cut
