#!/usr/bin/perl
#===============================================================================
#      PODNAME:  check_plugin
#     ABSTRACT:  check Net::IP::Identifier::Plugin::* plugins
#
#       AUTHOR:  Reid Augustin
#        EMAIL:  reid@LucidPort.com
#      CREATED:  10/12/2014 05:53:55 PM
#===============================================================================

use 5.008;
use strict;
use warnings;
use utf8;

package Local::Net;
use parent 'Net::IP::Identifier::Net';

sub new {
    my ($class, $net) = @_;

    my $self = $class->SUPER::new($net);

    $self->src_str($net);   # will be HASH(...) if $net is already an object

    return $self;
}

sub src_str {
    my ($self, $new) = @_;

    if (@_ > 1) {
        $self->{src_str} = $new;
    }
    return $self->{src_str};
}


package Local::Payload; # a Binode payload
use Moo;

has whois => (
    is => 'rw',
);
has cidr_idx => (
    is => 'rw',
);


package Local::Whois;
use Moo;
use Net::IP;   # for IP_IDENTICAL, etc

has src_str => (
    is => 'rw',
);
has src_net => (
    is => 'rw',
);
has name => (
    is => 'rw',
);
has no_entity => (  # TODO change this to disposition?
    is => 'rw',
);
has believe => (    # for debug: trust input string, don't run jwhois
    is => 'rw',
);
has inetnum => (
    is => 'rw',
);
has disposition => (
    is => 'rw',
);

my @whois_ignore_regexs = (   # lines to ignore in jwhois results
    qr[American Registry for Internet Numbers],
    qr[Various Registries \(Maintained by ARIN\)],
    qr[^parent:],
    qr[^routes],
    qr[^mnt-routes],
);

my @whois_skip_regexs = (   # lines to skip next line in jwhois results
    qr[The following results may also be obtained],
    qr[The query is assumed to be],
);

my $re = Net::IP::Identifier::Regex->new;
my $re_ip    = $re->IP;     # single IPv4/6 (no range/cidr/add notations)
my $re_range = $re->range;  # IPv4/6 range
my $re_cidr  = $re->cidr;   # IPv4/6 cidr
my $re_plus  = $re->plus;   # IPv4/6 plus notation range
my $re_netblock = qr{(?:$re_range|$re_plus|$re_cidr)};

sub netblock_lines_in_whois {
    my ($self) = @_;

    if (not defined $self->{netblock_lines_in_whois}) {
        $self->set_netblocks_lines_from_whois;
    }
    return wantarray
    ? @{$self->{netblock_lines_in_whois}}
    :   $self->{netblock_lines_in_whois};
}

# run a 'whois', collect all lines that contain netblock strings
sub set_netblocks_lines_from_whois {
    my ($self) = @_;

    $self->{netblock_lines_in_whois} = [];

    my $arg = $self->src_str;
    my $name = $self->name;
    my @whois;
    if ($self->believe) {
        print ("not running jwhois $arg (-believe is set),");
        @whois = ("...  $arg\n");
        goto Found; # don't bother to scan for entity name, trust me!
    }
    else {
        if ($arg =~ m/-/) { # a range?
            ($arg) = $self->src_net->range_to_cidrs;    # just take first CIDR
        }
        print ("running jwhois $arg,");
        @whois = `jwhois $arg`;
    }

    for my $line (@whois) { # make sure we see $name somewhere in jwhois result
        goto Found if ($line =~ m/$name/i);  # Found one! skip error stuff
    }
    $self->no_entity(1);    # TODO change this to disposition?
    print " $name not found in whois output\n";
    return; # no lines containing $name in jwhois.  does this IP belong to $name?

Found:
    print (" done\n");
    my $skip;   # set means skip next whois line

Line:
    for my $line (@whois) {
        if ($skip) {
            $skip = 0;
            next;
        }
        if ($line =~ m/^inetnum:\s*(\S+)/) {
            $self->{netblock_lines_in_whois} = [ $line ];
            $self->inetnum($1);
            last;   # inetnum trumps all others
        }
        elsif ($line =~ m/\b($re_netblock)\b/) {
            # found a network, plus, or cidr range
            for my $re (@whois_ignore_regexs) {
                next Line if ($line =~ m/$re/); # but it's one we should ignore
            }
            push @{$self->netblock_lines_in_whois}, $line;
        }
        for my $re (@whois_skip_regexs) {   # lines that indicate we should
            if ($line =~ m/$re/) {          # skip the following line
                $skip = 1;
                last;
            }
        }
    }
}

# extract netblocks from a line
sub netblocks_from_line {
    my ($self, $line) = @_;

    my @blocks = $line =~ m/\b($re_netblock)\b/;
    return @blocks;
}

sub filtered_netblocks {
    my ($self) = @_;

    if (not $self->{filtered_netblocks}) {
        $self->set_filtered_netblocks;
    }
    return wantarray
    ? @{$self->{filtered_netblocks}}
    :   $self->{filtered_netblocks};
}

# filter netblock_lines_in_whois, returning only netblocks that pertain
#   to our entity name (whois results often have random netblocks
#   and IPs floating around).
sub set_filtered_netblocks {
    my ($self, @new) = @_;

    if (@new) {
        my @nets = map { Local::Net->new($_) } @new;
        map { die "Not a Local::Net object\n" if (not $_->isa('Local::Net')) } @nets;
        $self->{filtered_netblocks} = \@nets;
        return;
    }

    $self->{filtered_netblocks} = [];
    if ($self->inetnum) {   # inetnum is solid, use it
        return $self->set_filtered_netblocks(
            Local::Net->new($self->inetnum),
        );
    }
    if (@{$self->netblock_lines_in_whois} == 1) {    # only one line with a netblock, must be the one
        my @netblocks = $self->netblocks_from_line($self->netblock_lines_in_whois->[0]);
        return $self->set_filtered_netblocks(
            map { Local::Net->new($_) }
            @netblocks,
        );
    }

    my @netblock_lines_with_name;
    my $name = $self->name;
    for my $line (@{$self->netblock_lines_in_whois}) {
        if ($line =~ m/$name/i) {
            push @netblock_lines_with_name, $line;
        }
    }

    my @netblocks;
    if (@netblock_lines_with_name) {  # prefer lines that contain entity name
        @netblocks = map { $self->netblocks_from_line($_) } @netblock_lines_with_name;
    }
    else {  # no netblock lines contain entity name
        # use all available netblock_lines_in_whois
        @netblocks = map { $self->netblocks_from_line($_) } @{$self->netblock_lines_in_whois};
    }

    my @nets;   # create array of Local::Net objects here
    for my $netblock (@netblocks) {
        my $net = Local::Net->new($netblock);
        # reject class A or larger
        next if ($net->prefixlen and $net->prefixlen <= 8);
        push @nets, $net;
    }

    # remove nets that are equal or contained within larger nets
    for my $ii (0 .. $#nets) {
        for my $jj ($ii+1 .. $#nets) {
            next if (not $nets[$ii] or not $nets[$jj]);
            my $overlap = $nets[$ii]->overlaps($nets[$jj]);
            if ($overlap eq $IP_IDENTICAL) {    # duplicate netblocks, choose on notation
                if ($nets[$ii]->src_str =~ m/\+/) {    # avoid + notation
                    $nets[$ii] = undef;
                }
                elsif ($nets[$ii]->src_str =~ m/-/) {     # prefer - notation
                    $nets[$jj] = undef;
                }
                else {
                    $nets[$ii] = undef;
                }
            }
            elsif ($overlap eq $IP_A_IN_B_OVERLAP) {    # ii is inside jj
                $nets[$ii] = undef
            }
            elsif ($overlap eq $IP_B_IN_A_OVERLAP) {    # jj is inside ii
                $nets[$jj] = undef
            }
        }
    }

    @nets = grep { defined } @nets;
    map {
     die "Not a Local::Net object\n" if (not $_->isa('Local::Net'))
    } @nets;
    if (@nets > 1) {
        my $nets = join '', $self->netblock_lines_in_whois;
        die "Too many netblock candidate lines in jwhois result:$nets\n";
    }
    $self->{filtered_netblocks} = \@nets;
}

# returns filtered_netblocks, but split into CIDRs if necessary
sub filtered_cidrs {
    my ($self) = @_;

    if (not $self->{filtered_cidrs}) {
        my @cidrs;
        for my $fn (@{$self->filtered_netblocks}) {
            push @cidrs, $fn->range_to_cidrs;
        }
        $self->{filtered_cidrs} = \@cidrs;
    }
    return wantarray
    ? @{$self->{filtered_cidrs}}
    :   $self->{filtered_cidrs};
}

# return integer form of first IP of first filtered_cidr or a dummy
sub intip { # this method is used for sorting
    my ($self) = @_;

    my $net = $self->filtered_cidrs->[0];
    return $net->intip if ($net);
    if (not $self->{dummy_net}) {
        $self->{dummy_net} = Local::Net->new('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff');
    }
    return $self->{dummy_net}->intip + $self->src_net->intip;
}

1;

package main;

use open qw( :utf8 :std );  # UTF8 for all files and STDIO
use IO::File;
use File::Spec;
use Getopt::Long qw(:config pass_through);  # for GetOptions(...)
use Net::IP;
use Net::IP::Identifier::Regex;
use Net::IP::Identifier::Binode;
use Sort::Key::IPv4 qw(ipv4sort);
use Scalar::Util qw( refaddr );

# VERSION

my (undef, undef, $myName) = File::Spec->splitpath($0);

my $file;
my $entity;
my $verbose = 0;
my $believe = 0;    # believe the input, don't verify by running jwhois (for debug)

exit 0 if (not
    GetOptions(
        'file=s'   => \$file,
        'entity=s' => \$entity,
        'verbose'  => \$verbose,
        'believe'  => \$believe,
    )
);

if (@ARGV and
    not $file) {
    $file = shift @ARGV;
}

my $fh;
if ($file) {
    open $fh, '<', $file or die "failed to open $file for reading: $!\n";
}
elsif (@ARGV) {
    require IO::String;
    my @args;
    for my $ii (0 .. $#ARGV) {
        my $arg = $ARGV[$ii];
        if (($arg eq '-' or $arg eq '+')    # allow two types of net range with spaces
            and @args
            and $ii < @ARGV) {
            $args[-1] .= "$arg$ARGV[++$ii]";
        }
        else {
            push @args, $arg;
        }
    }
    $fh = IO::String->new(join "\n", @args)
}
else {
    $fh = *STDIN;
}

my $Re = Net::IP::Identifier::Regex->new;

my @src_strs;
my $package_entity;
my $re_entity;
my $ip_any = $Re->IP_any;   # IPv4/6 range, plus, cidr or individual IP
while (my $line = $fh->getline) {
    last if not defined $line;

    next if ($line =~ m/\b_NO_CHECK_\b/);
    if ($line =~ m/\b_ENTITY_REGEX_\s+(.+)\s*#?/) {
        $re_entity = $1;
        next;
    }
    if ($line =~ m/^package .*::(\w+)/) {
        $package_entity = $1;
        next;
    }
    while (1) {
        if ($line =~ s/\b($ip_any)\b//) {
            next if (length $1 < 4);
            push @src_strs, $1;
        }
        else {
            last;
        }
    }
}

my ($file_entity) = $file =~ m/.*?([^\/]*)$/;
$file_entity =~ s/\.pm$//;

$entity ||= $re_entity || $package_entity || $file_entity;
die "Please define the entity name\n" if not $entity;

# create hash of Whois objects from netblocks found in the input file
# we won't actually call whois on them just yet...
my %whois_hash;
for my $src_str (@src_strs) {
    my $net = Local::Net->new($src_str);
    my $differ = $net->differ;
    $whois_hash{$net->masklen}{$src_str} = Local::Whois->new(
        src_str => $src_str,
        src_net => $net,
        name    => $entity,
        believe => $believe,
    );
}

my $root_v6 = Net::IP::Identifier::Binode->new;
# Place the IPv4 block in the IPv6 tree (IPv4 mapped IPv6)
my $root_v4 = $root_v6->construct(Local::Net->new('::ffff:0:0/96')->masked_ip);

for my $masklen (sort { $b <=> $a } keys %whois_hash) {
    my $whois_group = $whois_hash{$masklen};

    # get src_strs in order of block size, then IP of base
    for my $src_str (
        map { $_->[0] }           # get the sorted keys
        sort { $a->[1] <=> $b->[1] } # use numeric comparison
        map  { [ $_, ($masklen << 128) # size of netblock
                 + $whois_group->{$_}->src_net->intip ] } # base address
        keys %{$whois_group}) {
        my $whois = $whois_group->{$src_str};
        # note that we haven't actually run whois yet, so we can't really
        # trust it.  But we can see if it's already in our binary tree
        my $src_net = $whois->src_net;
        my $root = $src_net->version == 6 ? $root_v6 : $root_v4;
        my $found;
        $root->follow($src_net->masked_ip, sub {
                $found = $_[0]->payload if ($_[0]->payload); 
                return $found;  # stop when we find anything
            }
        );
        if ($found) {
            # already something there, so skip this one
            my $prev = $found->whois->src_str;
            $whois->disposition("skip: overlaps $prev");
print "  skipping $src_str (overlaps $prev)\n";
        }
        else {  # ok, nothing at this IP address yet
            # fetch whois and glean netblock info from it
            my @filtered_cidrs = $whois->filtered_cidrs;

            for my $ii (0 .. $#filtered_cidrs) {
                my $filtered_cidr = $filtered_cidrs[$ii];
                my $node = $root->construct($filtered_cidr->masked_ip);
                $node->payload(
                    Local::Payload->new(
                        cidr_idx => $ii,
                        whois => $whois,         # set the whois entry as the node payload
                    ),
                );

printf "attach %s to %s\n", $whois->src_str, $filtered_cidr->as_string;
                # clear any (smaller, lower) nodes that have been inserted
                # earlier
                clear_nodes($node, "absorbed by $src_str");
            }
            $whois->disposition("added");
        }
    }
}

print "\nResult:\n";
print_nodes($root_v6);

exit 0;

sub clear_nodes {
    my ($node, $new_disposition) = @_;

    if ($node->zero) {
        clear_nodes($node->zero, $new_disposition);
        $node->zero->payload->whois->disposition($new_disposition) if $node->zero->payload;
        # $node->zero(undef);
    }
    if ($node->one) {
        clear_nodes($node->one, $new_disposition);
        $node->one->payload->whois->disposition($new_disposition) if $node->one->payload;
        # $node->one(undef);
    }
}

sub print_nodes {
    my ($node) = @_;

    my @payloads;
    $node->traverse_width_first(
        sub {
            push @payloads, $_[0]->payload if $_[0]->payload;
            return 0;   # always continue
        }
    );
    die "No whois data\n" if not @payloads;

    # Collect all consective blocks into a single range.
    #    the same whois object may appear in consecutive blocks because we
    #    split ranges (if necessary) when we enter them into the binary
    #    tree.  If so, cidr_idx indicates which netblock is represented.
    my ($first_whois, $first_version, $last_int);
    for my $ii (0 .. $#payloads) {
        my $whois    = $payloads[$ii]->whois;
        my $cidr_idx = $payloads[$ii]->cidr_idx;
        my $net      = $whois->filtered_cidrs->[$cidr_idx];
        if ($ii > 0) {
            if ($last_int + 1 == $net->intip and    # consecutive and
                $first_version == $net->version) {  # same version
                $last_int = $net->last_int;     # combine
              # $net->disposition("combined into $src_str");
                next;
            }
            # gap.  print collected nets so far
            print_payload($first_whois, $first_version, $last_int);
        }
        # start working on the next consecutive block of nets
        $first_whois   = $whois;
        $first_version = $whois->filtered_cidrs->[$cidr_idx]->version;
        $last_int      = $whois->filtered_cidrs->[$cidr_idx]->last_int;
    }
    # there's block one left, print it out
    print_payload($first_whois, $first_version, $last_int);
}

# print results from whois.  If it's the same as the original source string
#     that launched the whois, just print the result.  If different, print
#     original followed by authorized version from whois result.  This
#     makes it easy to spot changes.
sub print_payload {
    my ($whois, $version, $last_int) = @_; # the entry in %whois, IPv4/6, the last IP for this netblock

    my $src_str = $whois->src_str;
    my $result_str = sprintf "%s - %s",
        $whois->filtered_cidrs->[0]->ip,
        Local::Net->int_to_ip($last_int, $version);

    $src_str = '' if ($src_str eq $result_str); # if same, don't print source

    printf "%-31s  %s\n", $src_str, $result_str;
}


