#!/usr/bin/perl -w
# slreportd - load reporting daemon for perl Schedule::Load
# Note: adding -T taint checks will break the storable feature
# See copyright, etc in below POD section.
######################################################################

require 5.004;
use lib 'blib/lib';	# testing
use FindBin qw($Script);
use Getopt::Long;
use Pod::Usage;
use Sys::Syslog;
use Schedule::Load::Reporter;
use Schedule::Load::Reporter::Disk;
use Schedule::Load::Reporter::Filesys;
use Schedule::Load::Reporter::Network;
use POSIX;
use strict;

BEGIN { $ENV{PATH} = '/usr/ucb:/bin' }	# Secure path

$SIG{PIPE} = 'IGNORE';

######################################################################
# configuration

######################################################################
# globals

use vars qw(@Orig_ARGV);

######################################################################
# main

@Orig_ARGV = @ARGV;

my $Debug;
my %server_params = ( plugins => [] );
my $opt_quiet = 1;
my $opt_fork = 1;

Getopt::Long::config ("no_auto_abbrev");
if (!GetOptions (
		 "help"		=> \&usage,
		 "debug"	=> \&debug,
		 "version"	=> \&version,
		 "quiet!"	=> \$opt_quiet,	# Debugging
		 "fork!"	=> \$opt_fork,	# Debugging/Test script
		 "fake!"	=> sub {shift; $server_params{fake} = 1;},
		 "port=i"	=> sub {shift; $server_params{port} = shift;},
		 "dhost=s"	=> sub {shift; push @{$server_params{dhost}}, split(':',shift);},
		 "stored_filename=s"	=> sub {shift; $server_params{stored_filename} = shift;},
		 "<>"		=> \&parameter,
		 )) {
    die "%Error: Bad usage, try 'slreportd --help'\n";
}

autoflush STDOUT 1;
autoflush STDERR 1;

# Init plugins
if (!$server_params{fake}) {
    push @{$server_params{plugins}}, (
	Schedule::Load::Reporter::Disk->new(),
	Schedule::Load::Reporter::Filesys->new(),
	Schedule::Load::Reporter::Network->new(),
    );
}

# We're chdiring to /, so make sure program name has path
if ($0 !~ m!^/!) { $0 = getcwd()."/".$0; }
chdir "/" if $opt_fork;	# Change immediately to aid debugging of $0 change

if ($opt_quiet && !$Debug) {
    if ($opt_fork) {
	exit if fork();  # Fork once to let parent die
        POSIX::setsid(); # Disassociate from controlling terminal
	exit if fork();  # Prevent possibility of acquiring a controling terminal
	chdir "/";       # Change working directory so can umount .
    }
    # Close open file descriptors
    my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
    $openmax = (!defined($openmax) || $openmax < 0) ? 64 : $openmax;
    foreach my $i (0 .. $openmax) { POSIX::close($i); }
    # Silence please (in case user didn't pipe when starting us)
    open(STDIN,  "+>/dev/null");
    open(STDOUT, "+>&STDIN");
    open(STDERR, "+>&STDIN");
}

# Loop in case something kills us
while (1) {
    print "Starting server: $0\n" if $Debug;
    if (!$Debug) {
	openlog($Script, 'cons,pid', 'daemon');
	syslog('info', 'Started');
	closelog();
    }
    my $pid;
    if (!$opt_fork || (0==($pid = fork()))) {
	Schedule::Load::Reporter->start (%server_params);
	exit(0);
    }
    die "%Error: Server aborted\n" if !$opt_fork;

    waitpid($pid,0);
    warn "%Warning: Server aborted\n";
    sleep(1);
    kill 9, $pid;
    sleep(1);
    # Restart this program by re-execing; allows users to restart the daemon
    delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
    print "exec $0, @Orig_ARGV\n" if $Debug;
    # Note if testing this, the exec will use the installed library, not the local library
    exec $0, @Orig_ARGV if -x $0;
    # If can't exec we simply start the server again
}

exit (0);

#----------------------------------------------------------------------

sub parameter {
    my $param = shift;
    if ($param =~ /^([a-zA-Z_0-9]+)=(.*)$/) {
	$server_params{$1} = $2 if $1 eq "timeout" || $1 eq "alive_time";
	$server_params{const}{$1} = $2;
	$server_params{const}{physical_cpus} = $2 if $1 eq 'cpus';  # If user sets fake reporter cpus, they mean physical
    } else {
	die "%Error: Unknown parameter: $param\n";
    }
}

######################################################################

sub usage {
    print "Version: $Schedule::Load::VERSION\n";
    pod2usage(-verbose=>2, -exitval => 2);
    exit(1);
}

sub version {
    print "Version: $Schedule::Load::VERSION\n";
    exit (1);
}

sub debug {
    $Debug = 1;
    $Schedule::Load::Reporter::Debug = 1;
    $IPC::PidStat::Debug = 1;
}

######################################################################
__END__

=pod

=head1 NAME

slreportd - Distributed load reporter for Perl Schedule::Load

=head1 SYNOPSIS

B<slreportd>
[ B<--help> ]
[ B<--port=>I<port> ]
[ B<--dhost=>I<host> ]
[ B<--version> ]

=head1 DESCRIPTION

slreportd will start a daemon to report machine loading for the
Schedule::Load package.  It will create two similar processes, so that if
second process exits, the first may restart it automatically.

L<slreportd> must be running on every host in the network, usually started
with a init.d script.  It reports itself to the L<slchoosed> daemon
periodically, and is responsible for checking loading and top processes
specific to the host that it runs on.

L<slreportd> may also be invoked with some variables set.  This allows
static host information, such as class settings to be passed to
applications.

=head1 ARGUMENTS

=over 4

=item --help

Displays this message and program version and exits.

=item --dhost

Specifies the daemon host name that slchoosed uses.  May be specified
multiple times to specify backup hosts.  Defaults to SLCHOOSED_HOST
environment variable, which contains colon separated host names.

=item --fake

Specifies load management should not be used, for reporting of a "fake"
hosts' status.  Often the hostname and other parameters will want to be
overridden, for example:

slreportd hostname=lab_1 cpus=1 max_clock=100 osname=myos osvers=1
          archname=myarch reservable=1 load_limit=1

=item --nofork

For debugging, prevents the daemon from creating additional processes and
from going into the background.  This allows messages to appear on stdout,
and ctrl-C to stop the daemon.

=item --port

Specifies the port number that slchoosed uses.  Defaults to SLCHOOSED_PORT
environment variable or slchoosed service, or 1752.

=item --version

Displays program version and exits.

=item {variable}={value}

Sets a arbitrary constant variable to the specified value.  This variable
may be used so that a process requesting a machine can choose a machine
with specific properties.

=over 4

=item dynamic_cache_timeout={secs}

When set, after this number of seconds the dynamic load information for
this host will no longer be cached by slchoosed, and when next needed must
be reread from the slreportd.  If not set, slchooserd picks a default,
currently 10 seconds.  Turning this number up may improve performance at
the cost of decreased accuracy.

=item load_limit={value}

Set a maximum number of jobs that the scheduler can run on this machine.

=item load_pctcpu={1|0}

When set, determine load as a floating point number based on CPU usage
percentage of all tasks.  Default, or when clear, load is a absolute number
where each high CPU job counts as one job, regardless of what percentage of
the CPU is used.  Using pctcpu tends to keep CPUs busy more often, at the
possible expense of slowing down interactive jobs that are not using an
entire CPU.

=item rating_adder={value}

Add the specified value to the rating obtained for the machine.  A positive
rating will make the machine less desirable for scheduling.

=item rating_mult={value}

Multiply the specified value to the rating obtained for the machine.  The
value 2 would act the same as a halved clock frequency, making the machine
less desirable for scheduling.

=back

=back

=head1 DISTRIBUTION

The latest version is available from CPAN and from L<http://www.veripool.org/>.

Copyright 1998-2011 by Wilson Snyder.  This package is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License Version 3 or the Perl Artistic License Version 2.0.

=head1 AUTHORS

Wilson Snyder <wsnyder@wsnyder.org>

=head1 SEE ALSO

L<slchoosed>, L<Schedule::Load>, L<Schedule::Load::Reporter>,

=cut
######################################################################
