#!perl

our $DATE = '2015-01-17'; # DATE
our $VERSION = '0.09'; # VERSION

use 5.010001;
use strict;
use warnings;

use Getopt::Long;

my %Opts = (
    op          => undef,
    ignore_case => 0,
    ignore_all_space => 0,
);

sub parse_cmdline {
    my $res = GetOptions(
        'ignore-case|i'      => \$Opts{ignore_case},
        'ignore-all-space|w' => \$Opts{ignore_all_space},
        'op=s'          => \$Opts{op},
        'union'         => sub { $Opts{op} = 'union' },
        'intersect'     => sub { $Opts{op} = 'intersect' },
        'diff'          => sub { $Opts{op} = 'diff' },
        'symdiff'       => sub { $Opts{op} = 'symdiff' },
        'version|v'     => sub {
            no warnings;
            say "setop version $main::VERSION ($main::DATE)";
            exit 0;
        },
        'help|h'        => sub {
            print <<USAGE;
Usage:
  setop [OPTIONS]... FILE FILE ...
  setop --help
Options:
  --ignore-case, -i
  --ignore-all-space, -w
  --op=s
  --union (shortcut for --op=union)
  --intersect (shortcut for --op=intersect)
  --diff (shortcut for --op=diff)
  --symdiff (shortcut for --op=symdiff)
For more details, see the manpage/documentation.
USAGE
            exit 0;
        },
    );
    exit 99 if !$res;
    if (!$Opts{op}) {
        warn "Please specify an operation (--op)\n"; exit 99;
    }
    if ($Opts{op} !~ /\A(union|intersect|diff|symdiff)\z/) {
        warn "Unknown op, please see --help for known operations\n"; exit 99;
    }
    unless (@ARGV >= 1) {
        warn "Please specify at least 1 input files\n";
        exit 99;
    }
}

sub run {
    require Tie::IxHash;

    tie my(%res), 'Tie::IxHash';

    my $op  = $Opts{op};
    my $ic  = $Opts{ignore_case};
    my $ias = $Opts{ignore_all_space};
    my $ign = $ic || $ias;

    my $i = 0;
    my $stdin_specified;
  FILE:
    for my $i (1..@ARGV) {
        my $fh;
        my $fname = $ARGV[$i-1];
        if ($fname eq '-') {
            do { warn "Can't use STDIN twice\n"; exit 99 }
                if $stdin_specified++;
            $fh = *STDIN;
        } else {
            open $fh, "<", $fname or die "Can't open input file $fname: $!\n";
        }
        if ($op eq 'union') {

            if ($ign) {
                while (<$fh>) {
                    my $k = $ic ? lc($_) : $_;
                    $k =~ s/\s+//g if $ias;
                    $res{$k} = $_ unless exists $res{$k};
                }

                # print result
                if ($i == @ARGV) {
                    print $res{$_} for keys %res;
                }
            } else {
                while (<$fh>) { $res{$_}++ }

                # print result
                if ($i == @ARGV) {
                    print for keys %res;
                }
            }

        } elsif ($op eq 'intersect') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        $res{$k} = [1,$_] unless exists $res{$k};
                    }
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        if ($res{$k} && $res{$k}[0] == $i-1) {
                            $res{$k}[0]++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print $res{$_}[1] if $res{$_}[0] == $i;
                    }
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_} = 1 }
                } else {
                    while (<$fh>) {
                        if ($res{$_} && $res{$_} == $i-1) {
                            $res{$_}++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print if $res{$_} == $i;
                    }
                }
            }


        } elsif ($op eq 'diff') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        $res{$k} = $_ unless exists $res{$k};
                    }
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        delete $res{$k};
                    }
                }
                # print result
                if ($i == @ARGV) {
                    print $res{$_} for keys %res;
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_}++ }
                } else {
                    while (<$fh>) {
                        delete $res{$_};
                    }
                }
                # print result
                if ($i == @ARGV) {
                    print for keys %res;
                }
            }

        } elsif ($op eq 'symdiff') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        $res{$k} = [1,$_] unless exists $res{$k};
                    }
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        if (!$res{$k}) {
                            $res{$k} = [1, $_];
                        } elsif ($res{$k}[0] <= 2) {
                            $res{$k}[0]++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print $res{$_}[1] if $res{$_}[0] == 1;
                    }
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_} = 1 }
                } else {
                    while (<$fh>) {
                        if (!$res{$_} || $res{$_} <= 2) {
                            $res{$_}++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print if $res{$_} == 1;
                    }
                }
            }

        }

    }

}

# MAIN

parse_cmdline();
run();

1;
# ABSTRACT: Set operations (union, intersection, difference, symmetric diff) on lines of files
# PODNAME: setop

__END__

=pod

=encoding UTF-8

=head1 NAME

setop - Set operations (union, intersection, difference, symmetric diff) on lines of files

=head1 VERSION

This document describes version 0.09 of setop (from Perl distribution App-setop), released on 2015-01-17.

=head1 SYNOPSIS

 setop [OPTION]... FILE FILE ...

Examples:

 % setop --union file1 file2 file3 ;# combine files, duplicates removed, order preserved
 % setop --intersect file1 file2 file3; # show lines common in all three files
 % setop --diff <(ls /path1) <(ls /path2) ;# show files in /path1 not in /path2

=head1 DESCRIPTION

C<setop> treats files as a sets of lines, and performs operations between the
sets.

=head1 OPTIONS

=over

=item * --help, -h

Show help message and exit.

=item * --version, -v

Show version and exit.

=item * --op=S

Pick operation. Known operations are: B<union> (return lines from the first file
and others, duplicate removed, order preserved), B<intersect> (return common
lines found in every file, order preserved), B<diff> (return lines found in the
first file but not the second, duplicate removed, order preserved), B<symdiff>
(short for "symetric difference", return lines found in either file but not
both, duplicate removed, order preserved).

=item * --union

Shortcut for C<--op union>.

=item * --intersect

Shortcut for C<--op intersect>.

=item * --diff

Shortcut for C<--op diff>.

=item * --symdiff

Shortcut for C<--op symdiff>.

=item * --ignore-case, -i

=item * --ignore-all-space, -w

=back

=head1 EXIT CODES

0 on success.

255 on I/O error.

99 on command-line options error.

=head1 HISTORY

I first wrote C<fileop> in Ruby in 2003, since Ruby has nice C<+> and C<->
operators for arrays.

Rewrote in Perl in 2014. Script renamed to C<setop>, changed command-line
options a bit, now preserves order of lines.

=head1 SEE ALSO

=head1 HOMEPAGE

Please visit the project's homepage at L<https://metacpan.org/release/App-setop>.

=head1 SOURCE

Source repository is at L<https://github.com/sharyanto/perl-App-setop>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-setop>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 AUTHOR

perlancar <perlancar@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by perlancar@cpan.org.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
