#!/usr/bin/perl

use strict;
use warnings FATAL => 'all';

our $VERSION = '1.0300';

use YATG::SharedStorage;
YATG::SharedStorage->factory(qw(cache results));

use YATG::Config;
use YATG::Callback;

use Symbol;
use Readonly;
use Time::HiRes qw(time sleep);

use DBI;
use SNMP;
use SNMP::Effective;
die "must not use SNMP::Effective 1.0\n"
    if $SNMP::Effective::VERSION == 1.0;
use Net::Netmask;
use Regexp::Common 'net';
use Log::Dispatch::Syslog;

use POSIX;
use FindBin;
use File::Basename;
use File::Spec::Functions;

my $config = YATG::Config->parse(@ARGV) || die "failed to load config\n";
Readonly my $INTERVAL => $config->yatg->{'interval'};
Readonly my $POLLERS  => $config->yatg->{'max_pollers'};
Readonly my $TIMEOUT  => $config->yatg->{'timeout'};
Readonly my $DEBUG    => $ENV{YATG_DEBUG} || $config->yatg->{'debug'};

my $logger = Log::Dispatch::Syslog->new($config->log_dispatch_syslog);
sub to_log {
    my $msg = shift;
    return unless $msg;

    $msg =~ s/^/[$$] /gm;
    $logger->log(
        level => $config->log_dispatch_syslog->{'min_level'},
        message => $msg
    );
}

Readonly my @modules => qw( Disk RPC Memcached STDOUT );
Readonly my @basic_oids => (
        'ifDescr'       => '.1.3.6.1.2.1.2.2.1.2',
        'ifAdminStatus' => '.1.3.6.1.2.1.2.2.1.7',
);

my $results = YATG::SharedStorage->results({});
my $cache   = YATG::SharedStorage->cache({
    oid_for  => { @basic_oids },
    leaf_for => { reverse @basic_oids },
});

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# make the daemon cross-platform, so exec always calls the script
# itself with the right path, no matter how the script was invoked.
Readonly my $script => File::Basename::basename($0);
Readonly my $SELF => catfile $FindBin::Bin, $script;

# POSIX unmasks the sigprocmask properly
my $sigset = POSIX::SigSet->new;
my $action = POSIX::SigAction->new(
    'hup_handler', $sigset, &POSIX::SA_NODEFER
);
POSIX::sigaction(&POSIX::SIGHUP, $action);

sub hup_handler {
    to_log("SIGHUP received, restarting...\n") if ! $DEBUG;
    exec ($SELF, @ARGV) or die "Couldn't restart $0: $!\n";
} # thanks go to perlipc man page for this

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# rebuild device cache (also on sigHUP)

# list of all device ips
my $dbh = DBI->connect(
    @{$config->yatg->{'dbi_connect'}},
    {PrintError => 1, RaiseError => 1, AutoCommit => 1}
);

$cache->{devices} = $dbh->selectcol_arrayref(
    $config->yatg->{'dbi_ip_query'});
$dbh->disconnect;

# work out what OIDs to grab, and how
SNMP::addMibDirs(@{$config->yatg->{'mibdirs'}});
SNMP::loadModules('ALL');
my %modhash = map{lc($_) => $_} @modules;
my @to_load = ();

foreach my $leaf (keys %{$config->yatg->{'oids'}}) {
    my %tokens = map {lc($_) => 1} @{$config->yatg->{'oids'}->{$leaf}};

    # difference or counter
    $cache->{oids}->{$leaf}->{diff} = $tokens{diff} ? 1 : 0;

    # store module to load
    $cache->{oids}->{$leaf}->{store} =
        $modhash{ (grep {$modhash{$_}} keys %tokens)[0] };
    push @to_load, $cache->{oids}->{$leaf}->{store};

    # transate leaf to OID
    $cache->{oid_for}->{$leaf} = SNMP::translateObj($leaf)
        or die "Failed to translate leaf $leaf to OID\n";

    my $oid = $cache->{oid_for}->{$leaf};
    $cache->{leaf_for}->{ $oid } = $leaf;

    # prune IPs for each OID
    my @nets_incl = grep {$_ = Net::Netmask->new2($_)}
                    grep {m#^$RE{net}{IPv4}(?:/\d+)?$#} keys %tokens;
    my @nets_excl = grep {$_ = Net::Netmask->new2($_)}
                    grep {m#^!$RE{net}{IPv4}(?:/\d+)?$#} keys %tokens;

    if (scalar @nets_incl == 0 and scalar @nets_excl == 0) {
        # short-circuit
        map { $cache->{get_for}->{$_}->{ $oid } = 1 }
            @{$cache->{devices}};
    }
    else {
        foreach my $ip (@{$cache->{devices}}) {
            $cache->{get_for}->{$ip}->{ $oid } = 1
                if  ((scalar grep { $_->match($ip) } @nets_incl) > 0
                     and (scalar grep { $_->match($ip) } @nets_excl) == 0)
                or  (scalar @nets_incl == 0
                     and (scalar grep { $_->match($ip) } @nets_excl) == 0);
        }
    }

    # indexed oid?
    if ($tokens{ifindex}) {
        $cache->{oids}->{$leaf}->{indexer} = 'iid';
        foreach my $ip (
            grep { exists $cache->{get_for}->{$_}->{ $oid } }
                 @{$cache->{devices}} ) {

            $cache->{get_for}->{$ip}->{ $cache->{oid_for}->{ifDescr} } = 1;
            $cache->{get_for}->{$ip}->{ $cache->{oid_for}->{ifAdminStatus} } = 1;
            $cache->{$ip}->{build_ifindex} = 1;
        }
    }
    else {
        $cache->{oids}->{$leaf}->{indexer} = 0;
    }
}

# load storage module(s)
foreach my $module (@to_load) {
    die "Request to load undefined module\n" if !defined $module;
    eval "require YATG::Store::$module"
        or die "Failed to load $module store module: $@\n";
}

# connect to each device and find out its community,
# shortcut if there is only one community supplied
if (scalar @{$config->yatg->{'communities'}} == 1) {
    map {$cache->{community_for}->{$_}
                = $config->yatg->{'communities'}->[0]}
        @{$cache->{devices}};
}
else {
    foreach my $ip (@{$cache->{devices}}) {
        foreach my $c (@{$config->yatg->{'communities'}}) {
            my $sess = SNMP::Session->new(
                DestHost  => $ip,
                Version   => 1,
                Timeout   => 10000,
                Community => $c,
            );
            my $val = $sess->get('sysUpTime.0');
    
            # failure, go to next community
            next if !$val or $sess->{ErrorNum} or $sess->{ErrorStr};
    
            # success, use this community and go to next device
            $cache->{community_for}->{$ip} = $c;
            last;
        }
    }
}

to_log("Cache rebuild took ". (time - $^T) ." seconds.\n") if ! $DEBUG;
print "Cache rebuild took ". (time - $^T) ." seconds.\n" if $DEBUG;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

while (1) {
    my $naptime = $INTERVAL - (time % $INTERVAL);
    print "Sleeping for $naptime seconds...\n" if $DEBUG;
    sleep $naptime;

    my $se = SNMP::Effective->new(
        MaxSessions   => $POLLERS,
        MasterTimeout => $TIMEOUT,
    );  

    foreach my $ip (@{$cache->{devices}}) {
        next unless exists $cache->{community_for}->{$ip};
        next unless scalar (keys %{$cache->{get_for}->{$ip}}) > 0;

        $se->add(
            DestHost => $ip,
            Arg      => {
                Community => $cache->{community_for}->{$ip},
                Timeout   => 5000000,
            },
            Callback => \&YATG::Callback::snmp_callback,
            walk     => [ keys %{$cache->{get_for}->{$ip}} ],
        );
    }   

    my $exec = time;
    $se->execute;
    to_log("Execute run took ". (time - $exec) ." seconds.\n") if ! $DEBUG;
    print "Execute run took ". (time - $exec) ." seconds.\n" if $DEBUG;

    my $update = time;
    my $stamp = floor($update - ($update % $INTERVAL));
    (($stamp % $INTERVAL) == 0) or die "yatg: FATAL: time is shifting!\n";

    # send gathered data from this run to storage
    foreach my $mod (keys %$results) {
        &{*{Symbol::qualify_to_ref('store',"YATG::Store::$mod")}}
            ({%$config}, $stamp, $results->{$mod});
    }

    to_log("Remote update took ". (time - $update) ." seconds.\n") if ! $DEBUG;
    print "Remote update took ". (time - $update) ." seconds.\n" if $DEBUG;

    $results = YATG::SharedStorage->results({});
    last if $ENV{YATG_SINGLE_RUN};
}


__END__

=head1 NAME

YATG - Fast SNMP data poller daemon, with storage and graphing

=head1 VERSION

This document refers to version 1.0300 of YATG

=head1 DESCRIPTION

YATG is an application which is intended to be run persistently. At intervals
it will wake up and poll network devices for SNMP data, and then store or
print that data. In this distribution are also included examples for
presenting simple CGI web pages with graphs.

Please understand that YATG is probably not going to be a plug-and-play system
for you. We took the decision to generalize the code as much as possible, and
release it, in the hope that other network systems administrators might find
our solutions to these problems interesting, and maybe provide feedback.

However, YATG is immenseley flexible and powerful. It can poll a large number
of devices with thousands of ports in just a few seconds. The configuration is
very simple, and the defaults sane (it's designed for sysadmins, after all).

You can use YATG both for historical logging, such as traffic counters on
ports, as well as short-term monitoring which might feed into, say, Nagios.
Wherever possible, data is translated to human-friendly formats for storage,
such as using Leaf Names instead of OIDs, translated values (C<up>, C<down>,
etc) and device port names rather than SNMP Interface Indexes.

One final thing; YATG works well for us, but might not for you. We know that
the code is rough in places. Take this distribution as a proof of concept, and
please send patches, comments and suggestions to the email address below.

=head1 How Does It Work?

At startup, C<yatg_updater> loads its configuration from local files and a
database, performs some basic SNMP connections to build a cache about device
capabilities and so on, and then goes to sleep.

Periodically, as determined by the configuration, C<yatg_updater> wakes up and
polls all devices, then stores results, again according to instructions in the
configuration.

If you have only the essential dependencies installed (see below) then you can
only output results to C<STDOUT>. With other modules, you have more options
such as local or remote disk, or memcached based storage.

C<yatg_updater> will re-load all its configuration if given a HUP signal. If
you run the daemon persistently (for example with C<daemontools>) then a cron
job once a day is a good way to refresh the configuration. There is reference
to this in one of the bundled example files.

=head1 What's in this distribution

=over 4

=item C<yatg_updater>

This is the main application, designed to be run persistently. It does not
accept any input and only produces output when in debugging mode. It is a
smart wrapper for the L<SNMP::Effective> module.

=item C<YATG::Store> family of modules

These are modules which take the SNMP poll results and store them to either
local Disk, a Memcached server, or the disk on a remote networked server.

=item C<YATG::Retrieve> family of modules

These are modules which read stored results back to you, for a given time
window. The data can be retrieved from local Disk, a Memcached server, or the
disk on a remote networked server.

=item L<RPC::Serialized> handlers

If storing and/or retrieving on a remote networked server, it should run an
instance of L<RPC::Serialized>, and these are the RPC Handlers for that server
(see that module's documentation for further details).

=item CGI

For the special case of viewing graphs of disk-based poll results for switch
port traffic counters, there is are two CGI scripts. One is a wrapper which
presents an HTML page embedded with PNG images created from the other script.

=item Examples

The C<examples/> folder includes a copy of each of the files you should need
for a complete deployment of YATG. Obviously some of them contain dummy data.

=back

=head1 Where to go from here

To begin with, you probably want to see how to configure C<yatg_updater> in
L<YATG::Config>.

Alongside that, there are examples of all the files you should need to
install, in the C<examples/> folder of this distribution.

=head1 LOGGING and TESTING

This module uses "Log::Dispatch::Syslog" for logging, and by default will log
timing data to your system's syslog service. More information is provided in
the L<YATG::Config> documentation.

To run in debug mode, where timing data is output to standard out rather than
syslog, set the environment variable C<YATG_DEBUG> to a true value.

To run the poller just once, set the C<YATG_SINGLE_RUN> environment variable
to a true value. This is great for development. It makes C<yatg_updater> load
its configuration, generate the device hints cache, sleep and then run just
one poll cycle before exiting.

=head1 DEPENDENCIES

The following modules are dependencies of the YATG system, in addition to the
standard contents of the Perl distribution itself:

=over 4

=item L<Class::Data::Inheritable>

=item L<DBI>

=item L<Log::Dispatch::Syslog>

=item L<Module::MultiConf>

=item L<Net::Netmask>

=item L<Readonly>

=item L<Regexp::Common>

=item L<SNMP>

=item L<SNMP::Effective> (but not version 1.0)

=item L<Time::HiRes>

=back

The following modules are optional, and allow you to use additional bundled
features such as data storage, graphing, and so on:

=over 4

=item L<CGI>

=item L<Cache::Memcached>

=item L<RPC::Serialized>

=item L<Tie::File::FixedRecLen>

=item L<URI::Escape>

=item L<perlchartdir>

=back

=head1 SEE ALSO

=over 4

=item L<SNMP::Effective>

This system uses C<SNMP::Effective> at its core do the polling.

=item L<RPC::Serialized>

Store polled data on another server using C<RPC::Serialized>.

=back

=head1 AUTHOR

Oliver Gorwits C<< <oliver.gorwits@oucs.ox.ac.uk> >>

=head1 COPYRIGHT & LICENSE

Copyright (c) The University of Oxford 2007. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under
the terms of version 2 of the GNU General Public License as published by the
Free Software Foundation.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
St, Fifth Floor, Boston, MA 02110-1301 USA

=cut
