#!/usr/bin/perl
#
#   PS-Farm daemon, near copy of golem daemon
#   (simple cron implementation with task dependencies)
#
#   kohts@yandex-team.ru
#
#

BEGIN {
  use Cwd;
  use FindBin;

  # without this chroot abs_path (below)
  # returns empty string if daemon is called
  # from some shell script and current directory
  # is a home directory of a user (permissions?)
  chroot('/');

  $ENV{'MY_BIN'} = "$FindBin::Bin";
  $ENV{'MY_LIB'} = Cwd::abs_path("$ENV{'MY_BIN'}/../lib");
  $ENV{'MY_ETC'} = Cwd::abs_path("$ENV{'MY_BIN'}/../etc");
  $ENV{'MY_ROOT'} = Cwd::abs_path("$ENV{'MY_BIN'}/../../../..");

  if (!$ENV{'PS_SNAKED_LIB'}) {
    $ENV{'PS_SNAKED_LIB'} = $ENV{'MY_LIB'};
  }

  my $cfg_next = 0;
  foreach my $arg (@ARGV) {
    if ($arg eq '--cfg') {
      $cfg_next = 1;
      next;
    }
    if ($cfg_next) {
      $ENV{'PS_SNAKED_CFG'} = $arg;
      last;
    }
  }

  unless ($ENV{'PS_SNAKED_CFG'}) {
    $ENV{'PS_SNAKED_CFG'} = $ENV{'MY_ETC'};

    $ENV{'PS_SNAKED_CFG'} = ($ENV{'MY_ROOT'} eq "/" ? "" : $ENV{'MY_ROOT'}) .
      "/etc/ps-farm/options/ps-snaked" if ! -d $ENV{'PS_SNAKED_CFG'};

    $ENV{'PS_SNAKED_CFG'} = "/etc/ps-farm/options/ps-snaked"
      if ! -d $ENV{'PS_SNAKED_CFG'} && -d "/etc/ps-farm/options/ps-snaked";
  }

  die "$ENV{'PS_SNAKED_CFG'} is not available. Unable to continue\n\n"
    unless -d "$ENV{'PS_SNAKED_CFG'}";
};

use strict;
use warnings;

use lib "$ENV{'MY_LIB'}";
use lib "$ENV{'PS_SNAKED_LIB'}";
use snaked;

package psProcess;
sub pid { my ($self) = @_; return $self->{'pid'}; }
sub ppid { my ($self) = @_; return $self->{'ppid'}; }
sub pgrp { my ($self) = @_; return $self->{'pgid'}; }
sub pgid { my ($self) = @_; return $self->{'pgid'}; }
sub cmndline { my ($self) = @_; return $self->{'cmd'}; }

package psSnake::Daemon;
use Storable;
use Time::HiRes qw/usleep/;
use Schedule::Cron::Events;
use Time::Local;
use POSIX;
use IO::Handle; # autoflush
use Socket; # socketpair
use Fcntl;

if ($^O ne 'linux') {
  require Proc::ProcessTable;
}

my $version = '($Id: snaked 3935 2010-06-22 19:22:02Z kohts $)';

my $daemon_match_cfg = qr/^([^\s]+perl[^\s]*[\s]+|)[^\s]+(ps-)?snaked.+(daemon|debug).+cfg.+$ENV{'PS_SNAKED_CFG'}/;
my $daemon_match_nocfg = qr/^([^\s]+perl[^\s]*[\s]+|)[^\s]+(ps-)?snaked.+(daemon|debug)/;
my $watchdog_match = qr/^([^\s]+perl[^\s]*[\s]+|)[^\s]+(ps-)?snaked.+(watchdog).+cfg.+$ENV{'PS_SNAKED_CFG'}/;

my $my_path;
my $my_command_line;
my $watchdogs2maintain = 1;

$psSnake::Daemon::runtime = {
  "type" => "master",

  "flags" => {
    "stop" => 0,
    "refresh_configuration" => 0,
    },

  "children" => {},
  "children_cache" => {},

  "usec_2check_watchdog" => 0,
  "start_time" => time(),

  "current_tasks" => {},

  "tasks" => {},
  "task_groups" => {},

  "config" => {},

  };

sub sigTERM_handler {
#  psSnake::do_log("snaked $$ term");
  if ($psSnake::Daemon::runtime->{'type'} eq 'master') {
    $psSnake::Daemon::runtime->{'flags'}->{'stop'} = 1;
  }
  elsif ($psSnake::Daemon::runtime->{'type'} eq 'watchdog') {
    exit;
  }
}
sub sigHUP_handler {
  if ($psSnake::Daemon::runtime->{'type'} eq 'master') {
    $psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration'} = 1;
    $psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration_logged'} = 0;
  }
}
sub sigUSR1_handler {
}
sub sigUSR2_handler {
  if ($psSnake::Daemon::runtime->{'type'} eq 'master') {
    # do not restart if alreadying being stopped
    if (!$psSnake::Daemon::runtime->{'flags'}->{'stop'}) {
      $psSnake::Daemon::runtime->{'flags'}->{'restart'} = 1;
    }
  }
}

$SIG{'TERM'} = \&sigTERM_handler;
$SIG{'HUP'} = \&sigHUP_handler;
$SIG{'USR1'} = \&sigUSR1_handler;
$SIG{'USR2'} = \&sigUSR2_handler;

sub help() {
  print '
    snaked -- cron as it should be
    ' . $version . '

    command-line options:
      
      start-up type:
        --daemon    -- run in background
        --debug     -- run in foreground with debug output

      runtime control:
        --restart   -- schedule restart for currently running daemon
                       (valid only for backgrounded daemon)
        --configure -- schedule reread of configuration
        --status    -- is there daemon running?
        --stop      -- schedule stop for currently running daemon

      configuration:
        --show-jobs   -- show configured daemon jobs
        --version     -- show daemon version

';
  exit 0;
}

sub important {
  my ($msg) = @_;

  psSnake::send_mail({
    'to' => $psSnake::ADMIN_EMAIL,
    'subject' => $0 . ": warning",
    'body' => psSnake::safe_string($msg),
    'no_cc_all' => 1,
    });
}

sub run_task {
  my ($task_name) = @_;
  
  my $task = $psSnake::Daemon::runtime->{'tasks'}->{$task_name};

  my $old_job_name;
  $old_job_name = $ENV{'JOB_NAME'} if defined($ENV{'JOB_NAME'});
  $ENV{'JOB_NAME'} = $task_name;

  psSnake::debug("running task [$task_name] timeout [$task->{'execution_timeout'}]");
  my $o = psSnake::run_forked($task->{'cmd'}, {
    'timeout' => $task->{'execution_timeout'},
    'terminate_on_parent_sudden_death' => 1,
    'terminate_on_signal' => 'TERM',
    'terminate_wait_time' => 60, # should be configurable for each job
    'clean_up_children' => 1,
    });
  psSnake::debug("finished [$task_name]: " . psSnake::safe_string($o->{'exit_code'}));

  $ENV{'JOB_NAME'} = $old_job_name if $old_job_name;

  if ($o->{'parent_died'}) {
    psSnake::do_log("[$$] my parent died, exiting");
    exit 1;
  }
  elsif (
    $o->{'exit_code'} ne 0 ||
    $o->{'stdout'} ||
    $o->{'stderr'} ||
    $o->{'timeout'}
    ) {

    if (! defined($task->{'disable_notifications'})) {
      # save first failure time (this is only valid during
      # child life, parent will set this again in its
      # memory space after child returns $o->{'err_msg'})
      $task->{'runtime'}->{'first_failure_time'} = time()
        unless $task->{'runtime'}->{'first_failure_time'};

      # do not notify more often than once
      # each $task->{'notification_interval'} seconds
      # (notify after each failure if not defined)
      if ($task->{'notification_interval'}) {
        if ($task->{'runtime'}->{'first_failure_time'} + $task->{'notification_interval'} < time()) {
          psSnake::send_mail({
            'to' => ($task->{'admin_email'} ? $task->{'admin_email'} : $psSnake::ADMIN_EMAIL),
            'subject' => $0 . ": $task_name warning",
            'body' => $o->{'err_msg'},
            'no_cc_all' => 1,
            });
          
          # pretend that everything went fine
          # (this will make parent reset first_failure_time)
          return "";
        }
      }
      else {
        psSnake::send_mail({
          'to' => ($task->{'admin_email'} ? $task->{'admin_email'} : $psSnake::ADMIN_EMAIL),
          'subject' => $0 . ": $task_name warning",
          'body' => $o->{'err_msg'},
          'no_cc_all' => 1,
          });

        return "";
      }
    }

    # if notification was not sent --
    # let parent know that we had problem
    # and it should set first_failure_time
    # (if not set)
    return $o->{'err_msg'};
  }

  return "";
}

sub add_child {
  my ($type, $id, $opts) = @_;

  $opts = {} unless $opts;

  psSnake::die ("Programmer error: add_child expects at least child type")
    unless $type;

  my $child_socket;
  my $parent_socket;

  socketpair($child_socket, $parent_socket, AF_UNIX, SOCK_STREAM, PF_UNSPEC) ||
    psSnake::die ("socketpair: $!");

  $child_socket->autoflush(1);
  $parent_socket->autoflush(1);

  my $pid;

  if ($pid = fork) {
    # we are a parent
    close $parent_socket;

    my $flags = 0;
    fcntl($child_socket, F_GETFL, $flags) || die "can't fnctl F_GETFL: $!";
    $flags |= O_NONBLOCK;
    fcntl($child_socket, F_SETFL, $flags) || die "can't fnctl F_SETFL: $!";

    my $child = {
      'pid' => $pid,
      'type' => $type,
      'borntime' => time(),
      'killtime' => 0,
      'id' => $id,
      'child_socket' => $child_socket,
      'output' => '',
      };

    $psSnake::Daemon::runtime->{'children'}->{'by_pid'}->{$pid} = $child;
    $psSnake::Daemon::runtime->{'children'}->{'by_type'}->{$type}->{$pid} = $child;

    child_started($type);
  }
  else {
    psSnake::die("cannot fork: $!") unless defined $pid;

    $psSnake::Daemon::runtime->{'type'} = "child";
    close $child_socket;

    my $r = run_task($type);

    while ($r =~ /([^\r\n]+?)([\r\n]|$)/sg) {
      my $s = $1;
      my $e = $2;

      print $parent_socket "$s\n";
    }

    close($parent_socket);
    exit 0;
  }
}

sub get_child_cache {
  my ($name) = @_;

  if (!defined($psSnake::Daemon::runtime->{'children_cache'}->{$name})) {
    $psSnake::Daemon::runtime->{'children_cache'}->{$name} = {
      'laststart' => 0
      };
  }
  return $psSnake::Daemon::runtime->{'children_cache'}->{$name};
}

sub find_child {
  my ($id, $type) = @_;

  foreach my $v (values %{$psSnake::Daemon::runtime->{'children'}->{'by_pid'}}) {
    
    if ($id) {
      # search for child by its id (and type if specified)

      if ($v->{'id'} && $v->{'id'} eq $id &&
        ($type && $v->{'type'} eq $type || !$type)
        ) {

        return $v;
      }
    }
    elsif ($type) {
      # returns first child of specified type
      if ($v->{'type'} eq $type) {
        return $v;
      }
    }
  }
  
  return undef;
}

sub child_started {
  my ($name) = @_;
  $psSnake::Daemon::runtime->{'children_cache'}->{$name}->{'laststart'} = time();
}

sub child_finished {
  my ($name, $output) = @_;
  $psSnake::Daemon::runtime->{'children_cache'}->{$name}->{'lastfinish'} = time();

  if ($output) {
    psSnake::do_log("[$name]: $output");
  }
}

# reads output from child if any
# (so it can't overflow IPC buffer)
#
sub manage_child {
  my ($pid) = @_;

  my $child = $psSnake::Daemon::runtime->{'children'}->{'by_pid'}->{$pid};

  my $child_socket = $child->{'child_socket'};
  my $child_output = "";
  while (my $l = <$child_socket>) {
    $child_output .= $l;
  }
  
  $child->{'output'} .= $child_output;
}

sub remove_child {
  my ($pid) = @_;

  psSnake::die("Programmer error: remove_child called on child which hasn't finished yet")
    if waitpid($pid,WNOHANG) ne -1;

  my $child = $psSnake::Daemon::runtime->{'children'}->{'by_pid'}->{$pid};
  my $task = $psSnake::Daemon::runtime->{'tasks'}->{$child->{'type'}};

  close($child->{'child_socket'});

  # if child has output -- then it had some situation
  # which requires user invervention; save failure time
  #
  # (unset when child returns nothing --
  # meaning intervention is no longer needed)
  #
  if ($child->{'output'}) {
    $task->{'runtime'}->{'first_failure_time'} = time()
      unless $task->{'runtime'}->{'first_failure_time'};

    psSnake::debug("child output: " . $child->{'output'});

    if (! defined($task->{'disable_notifications'})) {
      # reset failure interval counter,
      # so we do not send notifications
      # more ofthen than notification_interval
      if ($task->{'runtime'}->{'first_failure_time'} + $task->{'notification_interval'} < time()) {
        $task->{'runtime'}->{'first_failure_time'} = time();
      }
    }
  }
  else {
    # delete first error time so next failure time will be saved
    delete($task->{'runtime'}->{'first_failure_time'});
  }

  child_finished($child->{'type'}, $child->{'output'});

  my $child_group = find_group_by_task(undef, $child->{'type'});
  delete $psSnake::Daemon::runtime->{'current_tasks'}->{$child_group}->{$child->{'type'}};

  delete $psSnake::Daemon::runtime->{'children'}->{'by_type'}->{$child->{'type'}}->{$pid};
  delete $psSnake::Daemon::runtime->{'children'}->{'by_pid'}->{$pid};
}

sub have_children {
  my $have_children = 0;
  foreach my $k (keys %{$psSnake::Daemon::runtime->{'children'}->{'by_pid'}}) {
    $have_children = 1;
    last;
  }
  return $have_children;
}

sub for_each_child {
  my ($opts) = @_;

  $opts = {} unless $opts;
  foreach my $k (keys %{$psSnake::Daemon::runtime->{'children'}->{'by_pid'}}) {
    if ($opts->{'stop_now'}) {
#      psSnake::do_log("killing $k");
      kill(15, $k); # TERM (default for run_forked)
    }
  }
}

sub find_group_by_task {
  my ($task_groups, $task_name) = @_;
  $task_groups = $psSnake::Daemon::runtime->{'task_groups'} unless $task_groups;

  foreach my $tg (keys %{$task_groups}) {
    if ($task_groups->{$tg}->{$task_name}) {
      return $tg;
    }
  }

  return undef;
}

# break $psSnake::Daemon::Runtime->{'tasks'}
# (configured in /etc/ps-farm/options/ps-snaked/jobs)
# into task groups by dependency
#
sub prepare_task_groups {
  my $task_groups = $psSnake::Daemon::runtime->{'task_groups'};
  my $tasks = $psSnake::Daemon::runtime->{'tasks'};

  foreach my $task_name (keys %{$tasks}) {
    my $task = $tasks->{$task_name};
    
    my $attach_task_to_group = sub {
      my ($group_name, $task_name) = @_;

      foreach my $tg (keys %{$task_groups}) {
        if ($tg eq $group_name) {
          $task_groups->{$tg}->{$task_name} = 1;
        }
        elsif ($task_groups->{$tg}->{$task_name}) {
          delete ($task_groups->{$tg}->{$task_name});
        }
      }
    };

    # prepare task groups by dependencies

    # attach task to the group where
    # its conflicting tasks are
    #
    if ($task->{'conflicts'}) {
      my $conflicting_groups = {};

      foreach my $c_task (@{$task->{'conflicts'}}) {
        my $conflicting_tg = find_group_by_task($task_groups, $c_task);

        if ($conflicting_tg) {
          $conflicting_groups->{$conflicting_tg} = 1;
        }
      }

      if (scalar(keys %{$conflicting_groups}) > 1) {
        # found conflicting tasks in different task groups,
        # merging those groups into one

        # choose any
        my $dest_group;
        foreach (keys %{$conflicting_groups}) {
          $dest_group = $_;
          last;
        }

        # and attach all tasks from all the conflicting groups
        # to the destination group
        foreach my $tg (keys %{$conflicting_groups}) {
          foreach my $tt (keys %{$task_groups->{$tg}}) {
            $task_groups->{$dest_group}->{$tt} = 1;
          }

          if ($tg && $tg ne $dest_group) {
            delete($task_groups->{$tg});
          }
        }
      }
      elsif (scalar(keys %{$conflicting_groups}) eq 1) {
        # found conflicting tasks in one group,
        # attaching the task to this group

        my $dest_group;
        foreach (keys %{$conflicting_groups}) {
          $dest_group = $_;
          last;
        }

        $attach_task_to_group->($dest_group, $task_name);
      }
      else {
        # conflicting tasks are not in any group,
        # should attach them to the group where
        # this task will be
        
        my $tg = find_group_by_task($task_groups, $task_name);
        
        # current task is not in any group --
        # creating new group with its name
        if (!$tg) {
          $tg = $task_name;
          $task_groups->{$tg} = {};
          
          $attach_task_to_group->($tg, $task_name);
        }

        foreach my $ctg (@{$task->{'conflicts'}}) {
          $attach_task_to_group->($tg, $ctg);
        }
      }
    }

    if (! find_group_by_task($task_groups, $task_name)) {
      $task_groups->{$task_name} = {};
      $attach_task_to_group->($task_name, $task_name);
    }
  }
}

sub refreshOptions {
  my ($dir, $opts) = @_;
  
  $opts = {} unless $opts;

  my $config = $psSnake::Daemon::runtime->{'config'};
  my $tasks = Storable::dclone($psSnake::Daemon::runtime->{'tasks'});
  my $tmp;

  # read daemon options
  $tmp = psSnake::read_dir($dir, {'output_type' => 'arrayref', 'only-files' => 1});
  foreach my $o (@{$tmp}) {
    next if $o =~ /^\./o;

    my $fileinfo = psSnake::fileinfo_struct({'absolute_name' => $dir . "/" . $o});

    # option was not modified since we've read it
    if ($config->{$o} && $config->{$o}->{'mtime'} eq $fileinfo->{'mtime'}) {
      next;
    }

    $config->{$o}->{'mtime'} = $fileinfo->{'mtime'};
    $config->{$o}->{'value'} = psSnake::read_file_option($dir . "/" . $o);

    if ($o eq "admin_email") {
      $psSnake::ADMIN_EMAIL = $config->{$o}->{'value'};
    }
  }

  if (!$psSnake::ADMIN_EMAIL) {
    $psSnake::ADMIN_EMAIL = 'root';
  }

  # configure logging (defaults to /tmp/ps-snaked.log, three 10MB files, rotated)
  #
  if ($config->{'log'}) {
    $psSnake::LOG = {
      'filename' => $config->{'log'}->{'value'},
      };
  }
  else {
    $psSnake::LOG = {
      'filename' => ($ENV{'MY_ROOT'} eq "/" ? "" : $ENV{'MY_ROOT'}) . "/tmp/snaked.log",
      };
  }
  if ($config->{'log_rotate_size'}) {
    $psSnake::LOG->{'rotate_size'} = $config->{'log_rotate_size'}->{'value'};
  }
  else {
    $psSnake::LOG->{'rotate_size'} = 1024 * 1024 * 10;
  }
  if ($config->{'log_rotate_keep_copies'}) {
    $psSnake::LOG->{'rotate_keep_copies'} = $config->{'log_rotate_keep_copies'}->{'value'};
  }
  else {
    $psSnake::LOG->{'rotate_keep_copies'} = 2;
  }
  
  # in watchdog mode we don't need
  # to read jobs definitions
  return if $opts->{'no-jobs'};


  my $defined_jobs = {};

  # read daemon jobs
  $tmp = psSnake::read_dir($dir . "/jobs", {'output_type' => 'arrayref', 'only-directories' => 1});
  foreach my $o (@{$tmp}) {
    next if $o =~ /^\./o;

    $defined_jobs->{$o} = 1;

    my $dirinfo = psSnake::fileinfo_struct({'absolute_name' => $dir . "/jobs/" . $o});

    # job was not modified since we've read it
    if ($tasks->{$o} && $tasks->{$o}->{'mtime'} eq $dirinfo->{'mtime'}) {
      next;
    }

    $tasks->{$o}->{'mtime'} = $dirinfo->{'mtime'};

    my $joptions = psSnake::read_dir($dir . "/jobs/" . $o, {'output_type' => 'arrayref', 'only-files' => 1});
    foreach my $jo (@{$joptions}) {
      if ($jo eq 'conflicts') {
        $tasks->{$o}->{$jo} = psSnake::read_file_array($dir . "/jobs/" . $o . "/" . $jo);
      }
      elsif ($jo eq 'cmd') {
        $tasks->{$o}->{$jo} = $dir . "/jobs/" . $o . "/" . $jo;
      }
      else {
        $tasks->{$o}->{$jo} = psSnake::read_file_option($dir . "/jobs/" . $o . "/" . $jo);
      }
    }
    if (defined($tasks->{$o}->{'disabled'}) && !$opts->{'keep_disabled'}) {
      delete($tasks->{$o});
    }
  }
  
  # cleanup removed jobs, validate tasks
  TASKS: foreach my $task_name (keys %{$tasks}) {
    if (!$defined_jobs->{$task_name}) {
      delete($tasks->{$task_name});
      next TASKS;
    }

    my $task = $tasks->{$task_name};

    $task->{'execution_timeout'} = 0 unless defined($task->{'execution_timeout'});

    foreach my $mp ("cmd") {
      if (!$task->{$mp}) {
        psSnake::do_log("skipping job [$task_name]: mandatory parameter [$mp] not specified");
        delete($tasks->{$task_name});
        next TASKS;
      }
    }
    if (! -x $task->{'cmd'}) {
      psSnake::do_log("skipping job [$task_name]: [$task->{'cmd'}] is not executable");
      delete($tasks->{$task_name});
      next TASKS;
    }

    if ((!$task->{'execution_interval'} && !$task->{'execution_schedule'}) ||
      ($task->{'execution_interval'} && $task->{'execution_schedule'})) {
      
      psSnake::do_log("skipping job [$task_name]: one and only one of (execution_interval, execution_schedule) must be defined");
      delete($tasks->{$task_name});
      next TASKS;
    }

    if ($task->{'execution_schedule'}) {
      my $cron;
      eval {
        $cron = new Schedule::Cron::Events($task->{'execution_schedule'}, Seconds => time());
      };

      if (!$cron) {
        my $msg = $@;
        # leave only first line
        $msg =~ s/[\r\n].+$//sgo;
        # remove filename in which the error was raised
        $msg =~ s/at\ \/.+$//sgo;
        $msg = ": $msg" if $msg;

        psSnake::do_log("skipping job [$task_name]: invalid execution_schedule $msg");
        delete($tasks->{$task_name});
        next TASKS;
      }
      $task->{'cron'} = $cron;
      $task->{'next_run'} = timelocal($task->{'cron'}->nextEvent);
    }

    foreach my $dp ("execution_interval", "execution_timeout", "notification_interval", "start_random_sleep") {
      if ($task->{$dp} && !psSnake::is_digital($task->{$dp})) {
        psSnake::do_log("skipping job [$task_name]: [$dp] must be numeric");
        delete($tasks->{$task_name});
        next TASKS;
      }
    }

    if ($task->{'conflicts'} && ref($task->{'conflicts'}) ne 'ARRAY') {
      psSnake::do_log("skipping job [$task_name]: [conflicts] must be an array reference");
      delete($tasks->{$task_name});
      next TASKS;
    }
    if ($task->{'conflicts'}) {
      foreach my $c_task (@{$task->{'conflicts'}}) {
        if ($c_task eq $task_name) {
          psSnake::do_log("skipping job [$task_name]: task conflicts with itself.");
          delete($tasks->{$task_name});
          next TASKS;
        }
      }
    }
  }

  # apply new tasks, recalculate task groups
  $psSnake::Daemon::runtime->{'tasks'} = $tasks;
  $psSnake::Daemon::runtime->{'task_groups'} = {};
  prepare_task_groups();
}

sub code_may_fail {
  my ($code, $opts) = @_;

  die("Need coderef (something to execute)")
    unless $code && ref($code) eq 'CODE';
  
  $opts = {} unless $opts;
  $opts->{'tries'} = 1 unless $opts->{'tries'};
  $opts->{'sleep_between_tries'} = 1 unless $opts->{'sleep_between_tries'};

  my $i = 0;
  my $lastwarn = "";
  my $code_result;

  while ($i < $opts->{'tries'}) {
    $i = $i + 1;

    $SIG{'__WARN__'} = sub { $lastwarn = join("\n", @_); };
    eval { $code_result = $code->(); };
    delete($SIG{'__WARN__'});

    if ($lastwarn && $i < $opts->{'tries'}) {
      $lastwarn = "";
      sleep $opts->{'sleep_between_tries'};
    }
  }

  return {
    'result' => $code_result,
    'warn' => $lastwarn,
    'try' => $i,
    };
}

sub get_process_table {
  my $ptable;

  # Proc::ProcessTable has some leaks on linux
  # which leads to process dying
  if ($^O eq 'linux') {
    $ptable = [];
    
    my $i = 0;

    my $dummy;
    my $open_res;
    while (!($open_res = opendir($dummy, "/proc")) && $i < 3) {
      sleep 1;
      $i++;
    }
    if (!$open_res) {
      psSnake::die("unable to read /proc");
    }

    my @all_entries;
    $i = 0;
    while (scalar(@all_entries) < 3 && $i < 3) {
      @all_entries = readdir($dummy);
      sleep 1 if $i > 0;
      $i++;
    }
    close($dummy);

    # . + .. eq 2
    if (scalar(@all_entries) < 3) {
      psSnake::die("/proc is not mounted");
    }

    my $read_may_fail = sub {
      my ($filename) = @_;
      my $filecontent;
      if (open F, $filename) {
        { local $/ = undef; $filecontent = <F>; }
        close F;
      }
      return $filecontent;
    };

    foreach my $e (sort @all_entries) {
      my $pid_dir = "/proc/$e";

      next if $e eq '.' || $e eq '..';
      next if $e !~ /^\d+$/o;
      next if ! -d $pid_dir;
      
      my $cmd = $read_may_fail->("$pid_dir/cmdline");
      $cmd =~ s/\0/ /goi if $cmd;

      my $stat = $read_may_fail->("$pid_dir/stat");
      next unless $stat;

      my @stat_arr = split(" ", $stat);
      next if ! scalar(@stat_arr) > 5;

      if (!$cmd) {
        $cmd = $stat_arr[1];
        
        if ($cmd) {
          $cmd =~ s/[\(\)]//goi;
          $cmd = "[" . $cmd . "]";
        }
      }

      my $ppid = $stat_arr[3];
      my $pgid = $stat_arr[4];

      next if ! $cmd;
      next if ! $ppid =~ /^\d+$/o;
      next if ! $pgid =~ /^\d+$/o;

      my $p = {
        'pid' => $e,
        'ppid' => $ppid,
        'pgid' => $pgid,
        'cmd' => $cmd,
        };

      bless ($p, 'psProcess');
      push (@{$ptable}, $p);
    }

    return $ptable;
  }
  else {
    my $r = code_may_fail(sub {return Proc::ProcessTable->new()->table}, {'tries' => 3});

    if (!$r->{'result'}) {
      psSnake::die("unable to get process table: " . $r->{'warn'});
    }
    
    $ptable = $r->{'result'};
  }

  my $i = 0;
  while (scalar(@{$ptable}) < 2 && $i < 3) {
    $i++;
    sleep 1;
    $ptable = get_process_table();
  }

  if (scalar(@{$ptable}) < 2) {
    psSnake::die("unable to read process table");
  }

  return $ptable;
}

sub get_process_by_id {
  my ($pid, $opts) = @_;

  $opts = {} unless $opts;

  my $processes;
  if ($opts->{'processes'}) {
    $processes = $opts->{'processes'};
  }
  else {
    if (!$psSnake::Daemon::runtime->{'startup_processes'}) {
      $psSnake::Daemon::runtime->{'startup_processes'} = get_process_table();
    }
    $processes = $psSnake::Daemon::runtime->{'startup_processes'};
  }

  foreach my $p (@$processes) {
    my $r = code_may_fail(sub {return $p->pid});

    if (!$r->{'result'}) {
#      print STDERR
#        "empty pid: " . $p->cmndline . "; " .
#        ((-f $p->cmndline) ? "file exists" : "file does not exist") .
#        "; my pid [" . $$ . "]" .
#        "\n";
#
# dvina: empty pid: /proc/23263/cmdline; file does not exist; my pid [23348]
# dunai: empty pid: /proc/31978/cmdline; file does not exist; my pid [32072]

      next;
    }
    
    return $p if $r->{'result'} eq $pid;
  }

  return undef;
}

# get the pid of my parent process (by command line)
# 
sub get_my_process {
  my ($pid) = @_;

  my $orig_pid = $pid;

  # trying to find daemon with the same --cfg option
  #
  while ($pid ne 1) {
    my $pid_p = get_process_by_id($pid);

    if (!$pid_p) {
      psSnake::die("unable to find [$pid] in process list");
    }

    if ($pid_p->cmndline && $pid_p->cmndline =~ /$daemon_match_cfg/) {
      return $pid_p;
    }

    $pid = $pid_p->ppid;
  }

  # backward compatibility: trying to find
  # any daemon without --cfg option
  $pid = $orig_pid;
  while ($pid ne 1) {
    my $pid_p = get_process_by_id($pid);
    if (!$pid_p) {
      psSnake::die("unable to find [$pid] in process list");
    }

    if ($pid_p->cmndline && $pid_p->cmndline =~ /$daemon_match_nocfg/ && $pid_p->cmndline !~ /--cfg/) {
      return $pid_p;
    }

    $pid = $pid_p->ppid;
  }

  return undef;
}

# get pid of other daemon started with the same --cfg option
#
sub get_other_daemon_process {
  my ($opts) = @_;
  $opts = {} unless $opts;

  my $processes;
  if (!$psSnake::Daemon::runtime->{'startup_processes'} || $opts->{'refresh_startup_processes'}) {
    $psSnake::Daemon::runtime->{'startup_processes'} = get_process_table();
  }
  $processes = $psSnake::Daemon::runtime->{'startup_processes'};

  # this doesn't mean "always find my process",
  # name of the sub is not consistent!!!
  #
  # it usually returns undef (during --stop for example)
  #
  my $my_process = get_my_process($$);

  my $r;

  # trying to find other daemon with the same --cfg option
  #
  foreach my $p (@$processes) {
    my $p_pid;
    my $p_cmndline;
    my $p_pgrp;
    $r = code_may_fail(sub {$p_pid = $p->pid});
    $r = code_may_fail(sub {$p_cmndline = $p->cmndline});
    $r = code_may_fail(sub {$p_pgrp = $p->pgrp});

    next unless $p_cmndline;
    next unless $p_cmndline =~ /$daemon_match_cfg/;
    
    # find process with given command line
    # from other process group
    if ($my_process) {
      next if $p->pgrp eq $my_process->pgrp;
    }

    # if we are looking for daemon then its parent should be init
    if ($p_cmndline !~ /--debug/o && $p->ppid ne 1) {
      next;
    }

    my $real_daemon = get_process_by_id($p_pgrp);
    
    # found a process for which group leader doesn't exist
    # (shouldn't happen but just in case of some error)
    # 
    #
    # real world situation:
    #
    # pechora:~# ps -eo pid,ppid,pgrp,cmd | grep snak | grep -v grep
    #  5674     1  5674 /usr/bin/perl /usr/local/ps-snake/bin/snaked --watchdog --cfg /etc/ps-farm/options/ps-snaked
    # 26550     1 25742 /usr/bin/perl /usr/local/ps-snake/bin/snaked --daemon --cfg /etc/ps-farm/options/ps-snaked
    # 29634     1 25742 /usr/bin/perl /usr/local/ps-snake/bin/snaked --daemon --cfg /etc/ps-farm/options/ps-snaked
    #
    # corresponding log message about parent pid:
    # Sat Apr 10 16:37:52 2010 [/usr/local/ps-snake/bin/snaked] [25742] started
    # 
    # both 26550 and 29634 were not snaked daemons
    # but were forks doing some work (actually locked
    # during log operation or something) but
    # watchdog doesn't detect difference between
    # snaked daemon and its children forks
    # (it should and will do it one day probably)
    # 
    # and manual `snaked --daemon` also didn't detect them,
    # now it kills them (their process group)
    # before spawning new daemon
    # 
    if (!$real_daemon) {
      print STDERR "cleaning up stuck process group [$p_pgrp]\n";
      kill(-9, $p_pgrp);
    }
    
    return $real_daemon;
  }

  # backward compatibility: trying to find
  # any other daemon without --cfg option
  #
  foreach my $p (@$processes) {
    my $p_pid;
    my $p_cmndline;
    my $p_pgrp;
    $r = code_may_fail(sub {$p_pid = $p->pid});
    $r = code_may_fail(sub {$p_cmndline = $p->cmndline});
    $r = code_may_fail(sub {$p_pgrp = $p->pgrp});

    next unless $p_cmndline;
    next unless $p_cmndline =~ /$daemon_match_nocfg/;
    next if $p_cmndline =~ /--cfg/;

    if ($my_process) {
      next if $p->pgrp eq $my_process->pgrp;
    }

    my $real_daemon = get_process_by_id($p_pgrp);
    return $real_daemon;
  }

  return undef;
}

sub canonical_command_line {
  my ($cmdline, $path) = @_;

  return "" unless $cmdline && $path;

  # suppress space in the end of command on freebsd
  $cmdline =~ s/\ +$//go;

  # replace path to the executable with full path
  #
  # notes:
  #   - regexp is not global so it replaces only 1st occurrence
  #
  #   - .+? is not greedy so it will find the 1st occurrence of
  #   "ps-snaked" string which should be the name of executable
  # 
  $cmdline =~ s/.+?(ps-)?snaked(\s+|$)/${path}\/snaked /;
  $cmdline =~ s/\s+$//goi;

  return $cmdline;
}

sub get_my_path_commandline {
  my ($opts) = @_;
  my $my_path;
  my $my_command_line;

  my $me = get_process_by_id($$, $opts);
  psSnake::die("[$$]: unable to find myself in process list") unless $me;
  $my_path = $FindBin::Bin;
  psSnake::die("[$$]: unable to find my path") unless $my_path;
  $my_command_line = $me->cmndline;
  psSnake::die("[$$] unable to determine my command line") unless $my_command_line;

  return ($my_path, $my_command_line);
}

sub exec_ps_snaked {
  my ($my_command_line, $my_path) = @_;

  # on ws1-569 in snaked.log got:
  #
  # Mon Oct 19 17:59:17 2009 [/place/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked] unable to exec  --cfg /place/home/monitor/ps-snake/etc/ps-farm/options/ps-snaked
  #
  # which effectively means that $my_command_line was empty
  # after calling canonical_command_line() below
  # (" --cfg ..." was appended to it in the next step)
  #
  # so trying to determine my command line if it's empty
  # (also added check on startup that we've got it)
  #

  # as a workaround for empty command line or path (why?)
  # trying to determine them during exit process
  if (!$my_command_line || !$my_path) {
    ($my_path, $my_command_line) = get_my_path_commandline({'processes' => get_process_table()});
  }

  if (my $pid = fork) {
    # parent
    exit 0;
  }
  else {
    POSIX::setsid() || psSnake::die("Error running setsid: " . $!);

    # let parent exit and clean up from /proc (or whatever)
    sleep 1;

    POSIX::setsid() || psSnake::do_log("Error running setsid: " . $!, {'stderr' => 1}) && die();
    
    $my_command_line = canonical_command_line($my_command_line, $my_path);

    # append --cfg parameter if it's not specified
    # (codepath is used only during first run
    # when path to configuration was specified
    # by environment variable)
    if ($my_command_line !~ /--cfg $ENV{'PS_SNAKED_CFG'}/) {
      $my_command_line .= " --cfg $ENV{'PS_SNAKED_CFG'}";
    }

    
    # set environment variable to specify that we want to cleanup
    # already running snaked processes (this might be workaround
    # for some FreeBSD or Proc::ProcessTable on FreeBSD bug,
    # which caused the following:
    #
    # Thu Jun 24 10:29:31 2010 [/opt/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked] clock moved back from Thu Jun 24 10:29:25 2010 to Thu Jun 24 10:29:24 2010, restarting
    # Thu Jun 24 10:29:38 2010 [/opt/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked] [24836] requested to restart
    # Thu Jun 24 10:29:38 2010 [/opt/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked] [24836] stopped
    # Thu Jun 24 10:29:54 2010 [/opt/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked] [WARN] [29246] snaked is already running: /usr/bin/perl /opt/home/monitor/ps-snake/usr/local/ps-snake/bin/snaked --daemon --cfg /opt/home/monitor/ps-snake/etc/ps-farm/options/ps-snaked  [24836]
    #
    # [monitor@orange64 ~]$ uname -a
    # FreeBSD orange64.yandex.ru 7.2-STABLE FreeBSD 7.2-STABLE #0 r199991M: Mon Feb  8 12:50:25 MSK 2010     root@distillatory.yandex.ru:/place/tmp/mk_pkg.wG1LSf1f/obj/place/GIT-repos/FreeBSD-7-r199991/sys/PRODUCTION  amd64
    #
    # Proc::ProcessTable 0.54
    #
    $ENV{'snaked_cleanup_already_running'} = 1;

    # next life
    exec($my_command_line) || psSnake::do_log("[$$] unable to exec $my_command_line", {'stderr' => 1}) && die();
  }
  exit(255);
}

# spawn additional watchdogs slowly
sub manage_watchdogs {
  my $ptable = get_process_table();

  my $number_of_watchdogs = 0;
  # get the ps-snaked daemon process for which the watchdog is running
  my $my_process = undef;
  foreach my $p (@$ptable) {
    next unless $p->cmndline;
    next unless $p->cmndline =~ /$watchdog_match/;
    $number_of_watchdogs = $number_of_watchdogs + 1;
  }

  if ($number_of_watchdogs < $watchdogs2maintain) {
    my $t_cmdline = $my_command_line;
    $t_cmdline = canonical_command_line($t_cmdline, $my_path);
    $t_cmdline =~ s/\-\-daemon/\-\-watchdog/;
    psSnake::run_forked($t_cmdline);
  }
}

sub stop_watchdogs {
  my $ptable = get_process_table();

  # get the ps-snaked daemon process for which the watchdog is running
  my $my_process = undef;
  foreach my $p (@$ptable) {
    next unless $p->cmndline;
    next unless $p->cmndline =~ /$watchdog_match/;
    
    kill (15, $p->pid);
  }
}

# watchdog mode, starts ps-snaked daemon
# if finds that it's not running
sub run_watchdog {

  # set daemon type to change signal handling slightly
  $psSnake::Daemon::runtime->{'type'} = 'watchdog';

  my $unsuccessful_tries = 0;
  my $life_time = 3600 * (rand($watchdogs2maintain) + 1);

  while(1) {
    # stop watchdogs from time to time to toss
    # their pid numbers (which might affect oom killers),
    # but not in case they detect that main process
    # is not running (and waiting a bit to start it)
    # 
    # watchdogs are restarted by main daemon.
    # 
    if ((time() - $psSnake::Daemon::runtime->{'start_time'}) > $life_time && !$unsuccessful_tries) {
      exit(0);
    }

    if ($psSnake::Daemon::runtime->{'usec_2check_watchdog'} < 1) {
      my $ptable = get_process_table();

      my $currently_running_watchdogs = 0;

      # get the ps-snaked daemon process for which the watchdog is running
      my $my_process = undef;

      foreach my $p (@$ptable) {
        my $p_cmndline;
        my $r = code_may_fail(sub {$p_cmndline = $p->cmndline});

        next unless $p_cmndline;

        if ($p_cmndline =~ /$watchdog_match/) {
          $currently_running_watchdogs = $currently_running_watchdogs + 1;
        }
        elsif ($p_cmndline =~ /$daemon_match_cfg/) {
          # at this point any snaked is selected
          # (even that which is starting
          # or running external command)

          my $p_pid;
          my $p_ppid;
          my $p_pgrp;
          $r = code_may_fail(sub {$p_pid = $p->pid});
          $r = code_may_fail(sub {$p_ppid = $p->ppid});
          $r = code_may_fail(sub {$p_pgrp = $p->pgrp});

          next unless $p_pid && $p_ppid && $p_pgrp;

          # real daemon is parented by init and is the process group leader,
          # if its not found -- start it, and it will clean up any
          # stuck child from previous daemon (shouldn't happen because
          # children are strongly attached to the main daemon
          # with use of terminate_on_sudden_parent_death flag of run_forked)
          if ($p_ppid eq 1 && $p_pid eq $p_pgrp) {
            $my_process = $p;
          }
        }
      }

      if ($my_process) {
        $unsuccessful_tries = 0;
      }
      else {
        $unsuccessful_tries = $unsuccessful_tries + 1;
      }

      if ($unsuccessful_tries > 0) {
        if ($unsuccessful_tries < 2) {
          # 4 seconds should be enough to start daemon
          # (if it's not found and began to start -- is restarting),
          # randomize each watchdog so they do not try to start
          # all at the same time
          # 
          sleep(4 + 4 * int(rand($currently_running_watchdogs)));
        }
        else {
          psSnake::do_log("watchdog [$$]: snaked not found (killed?), respawning");
          # replace --watchdog with --daemon
          my $t_cmdline = $my_command_line;
          $t_cmdline =~ s/\-\-watchdog/\-\-daemon/;

          # try to execute daemon instead of watchdog
          # if fork fails (wouldn't succeed probably,
          # but could we try at least?)
          #
          if (defined(my $pid = fork)) {
            if ($pid) {
              my $waitpid;
              
              # exec_ps_snaked forks before actually execing snaked
              # and parent exits immediately (which makes it
              # totally detached from watchdog)
              #
              while ($waitpid ne -1) {
                $waitpid = waitpid($pid, WNOHANG);
                sleep 1;
              }

              # watchdog to continue
              $unsuccessful_tries = 0;
            }
            else {
              # watchdog to become snaked
              # (detached from parent totally)
              exec_ps_snaked($t_cmdline, $my_path);
            }
          }
          else {
            exec_ps_snaked($t_cmdline, $my_path);
          }
        }
      }

      $psSnake::Daemon::runtime->{'usec_2check_watchdog'} = ($watchdogs2maintain + 1) * 2 * 2000000;
    }

    usleep(50000);
    $psSnake::Daemon::runtime->{'usec_2check_watchdog'} = $psSnake::Daemon::runtime->{'usec_2check_watchdog'} - 50000;
  }
  exit (255);
}

psSnake::read_cmdline();

if (defined($psSnake::cmd_opts->{'stop'})) {
  my $d = get_other_daemon_process();
  if ($d) {
    print "requesting " . $d->pid() . " [" . $d->cmndline . "] to stop\n";
    kill (15, $d->pid);
  }
  else {
    print "no snaked daemon found for $ENV{'PS_SNAKED_CFG'}\n";
  }
  exit 0;
}
elsif (defined($psSnake::cmd_opts->{'configure'})) {
  my $d = get_other_daemon_process();
  if ($d) {
    print "requesting " . $d->pid() . " [" . $d->cmndline . "] to refresh configuration\n";
    kill ("HUP", $d->pid)
  }
  else {
    print "no snaked daemon found for $ENV{'PS_SNAKED_CFG'}\n";
  }
  exit 0;
}
elsif (defined($psSnake::cmd_opts->{'restart'})) {
  my $d = get_other_daemon_process();
  if ($d) {
    if (!defined($psSnake::cmd_opts->{'only-errors'})) {
      print "requesting " . $d->pid() . " [" . $d->cmndline . "] to restart\n";
    }
    kill ("USR2", $d->pid)
  }
  else {
    print "no snaked daemon found for $ENV{'PS_SNAKED_CFG'}\n";
  }
  exit 0;
}
elsif (defined($psSnake::cmd_opts->{'status'})) {
  my $d = get_other_daemon_process();
  if ($d) {
    print "snaked is running as pid " . $d->pid . ". command line [" . $d->cmndline . "]\n";
  }
  else {
    print "no daemon running\n";
  }
  exit 0;
}
elsif (defined($psSnake::cmd_opts->{'show-jobs'})) {
  refreshOptions($ENV{'PS_SNAKED_CFG'}, {'keep_disabled' => 1});
  print "    configured jobs:\n";
  foreach my $job_name (keys %{$psSnake::Daemon::runtime->{'tasks'}}) {
    print "      " . $job_name . "\n";
    my $job = $psSnake::Daemon::runtime->{'tasks'}->{$job_name};
    foreach my $o (sort keys %{$job}) {
      print "        $o: ";
      if (ref($job->{$o}) eq 'ARRAY') {
        print join(",", @{$job->{$o}});
      }
      else {
        print $job->{$o};
      }
      print "\n";
    }
  }
  exit 0;
}
elsif (defined($psSnake::cmd_opts->{'version'})) {
  print "$version\n";
  exit 0;
}

my $i_am_watchdog = defined($psSnake::cmd_opts->{'watchdog'});

if (!defined($psSnake::cmd_opts->{'daemon'}) && !$psSnake::debug && !$i_am_watchdog) {
  help();
  exit 0;
}

refreshOptions($ENV{'PS_SNAKED_CFG'}, {'no-jobs' => $i_am_watchdog});
my $t = $psSnake::LOG->{'filename'};
if (!psSnake::can_log()) {
  psSnake::warn("Can not write to log file [$t], check permissions; logging to STDERR");
}

if (!$i_am_watchdog) {
  my $d = get_other_daemon_process();
  if ($d) {
    if (!$ENV{'snaked_cleanup_already_running'}) {
      psSnake::warn("[$$] snaked is already running: " . $d->cmndline . " ["  . $d->pid . "]");
      exit 1;
    }
    else {
      my $previous_snaked = $d;
      $ENV{'snaked_cleanup_already_running'} = undef;
      kill(-9, $d->pid);
      sleep 3;
      $d = get_other_daemon_process({'refresh_startup_processes' => 1});
      if ($d) {
        psSnake::warn("[$$] snaked is already running: " . $d->cmndline . " ["  . $d->pid . "] and doesn't stop on KILL signal");
        exit 1;
      }
      else {
        psSnake::warn("[$$] killed previously running snaked: " . $previous_snaked->cmndline . " ["  . $previous_snaked->pid . "], continuing to start");
      }
    }
  }
}

($my_path, $my_command_line) = get_my_path_commandline();

psSnake::debug("my_path: $my_path");
psSnake::debug("my_command_line: $my_command_line");

print "starting snaked daemon for $ENV{'PS_SNAKED_CFG'}\n"
  unless $i_am_watchdog;

if (defined($psSnake::cmd_opts->{'daemon'}) || $i_am_watchdog) {
  # restart daemon using its full pathname and config path
  # if it was not started like this (so we could distinguish
  # between daemons by their locations)
  if ($my_command_line !~ /$my_path/ || $my_command_line !~ /--cfg $ENV{'PS_SNAKED_CFG'}/) {
    sigUSR2_handler();
  }

  # daemonize
  chdir ('/') || die "unable chdir to /: $!";
  open(STDIN, "/dev/null") || die("unable to read from /dev/null: $!");
  open(STDOUT, "dev/null") || die("unable to write to /dev/null: $!");
  defined(my $pid = fork) || die "Can't fork: $!";
  exit if $pid;
  POSIX::setsid() || die("Error running setsid: " . $!);
  open(STDERR, "/dev/null") || die("unable to write to /dev/null: $!");

  # run watchdog (except for when snaked
  # would be restarted right after start)
  if ($i_am_watchdog && !$psSnake::Daemon::runtime->{'flags'}->{'restart'}) {
    run_watchdog();
    exit;
  }
}
elsif ($psSnake::debug) {
  # stay in foreground
}

psSnake::do_log("[$$] started");

if ($psSnake::Daemon::runtime->{'config'}->{'pidfile'} &&
  !$psSnake::Daemon::runtime->{'flags'}->{'restart'} &&
  !$i_am_watchdog) {

  if (psSnake::can_write($psSnake::Daemon::runtime->{'config'}->{'pidfile'}->{'value'})) {
    psSnake::write_file_option($psSnake::Daemon::runtime->{'config'}->{'pidfile'}->{'value'}, $$);
  }
}

my $previous_now;
my $current_now;

while (1) {
  $previous_now = $current_now;
  $current_now = time();

  # clock moved back -- restarting
  if ($previous_now && $current_now && ($previous_now > $current_now)) {
    psSnake::do_log("clock moved back from " . localtime($previous_now) . " to " . localtime($current_now) . ", restarting");
    sigUSR2_handler();
  }

  if (!$psSnake::Daemon::runtime->{'flags'}->{'restart'}) {
    if ($psSnake::Daemon::runtime->{'usec_2check_watchdog'} < 1) {
      manage_watchdogs() if $watchdogs2maintain;
      $psSnake::Daemon::runtime->{'usec_2check_watchdog'} = ($watchdogs2maintain + 1) * 2 * 2000000;
    }
  }

  my $have_active_children = values %{$psSnake::Daemon::runtime->{'children'}->{'by_pid'}};
  psSnake::debug("active children:") if $have_active_children;

  # check status of all children removing those which finished
  foreach my $v (values %{$psSnake::Daemon::runtime->{'children'}->{'by_pid'}}) {
    
    # minimize time() call a bit
    my $now = time();

    # check for really long running processes
    # and kill them brutally (not very fast
    # if killing doesn't work; blocking io?)
    #
    if (($now - $v->{'borntime'}) > (3600 * 2) && ($now - $v->{'killtime'}) > 5) {
      # kill first then log, because logging might fail
      # which leads to "die"

      # killing exactly child pid, which is only a "manager"
      # for the task; open3_run which is executed inside the child
      # checks whether manager is alive and terminates if not,
      # so killing manager notifies child that it should stop.
      kill(9, $v->{'pid'});
      $v->{'killtime'} = time();

      psSnake::do_log("killed long running (". ($now - $v->{'borntime'}) .
        " seconds) process [$v->{'pid'}] [$v->{'type'}]", {"stderr" => 1});
    }

    my $waitpid = waitpid($v->{'pid'}, WNOHANG);
    
    psSnake::debug("\tchild [$v->{'pid'}] [$v->{'type'}] [" . ($v->{'id'} ? $v->{'id'} : "") . "]: $waitpid;".
      " running " . (time() - $v->{'borntime'}) . " seconds");

    manage_child($v->{'pid'});

    if ($waitpid eq -1) {
      remove_child($v->{'pid'});
    }
  }

  if ($psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration'}) {
    if (!have_children()) {
      psSnake::do_log("requested to reread configuration, rereading");
      refreshOptions($ENV{'PS_SNAKED_CFG'});
      $psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration'} = 0;
    }
    else {
      if (!$psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration_logged'}) {
        psSnake::do_log("requested to reread configuration, waiting for children to stop");
        $psSnake::Daemon::runtime->{'flags'}->{'refresh_configuration_logged'} = 1;
      }
    }
  }
  if ($psSnake::Daemon::runtime->{'flags'}->{'restart'}) {
    if ($psSnake::debug) {
      psSnake::warn("unable to restart attached daemon");
      $psSnake::Daemon::runtime->{'flags'}->{'restart'} = 0;
    }
    else {
      if (!$psSnake::Daemon::runtime->{'flags'}->{'stop'}) {
        psSnake::do_log("[$$] requested to restart");
        $psSnake::Daemon::runtime->{'flags'}->{'stop'} = 1;
      }
    }
  }

  # do processing if we were not requested to stop
  unless ($psSnake::Daemon::runtime->{'flags'}->{'stop'}) {

    my $task_groups = $psSnake::Daemon::runtime->{'task_groups'};

    # run all the tasks one by one (task groups are concurrent)
    foreach my $tg (keys %{$task_groups}) {
      $psSnake::Daemon::runtime->{'current_tasks'}->{$tg} = {}
        unless $psSnake::Daemon::runtime->{'current_tasks'}->{$tg};

      my $current_tasks = $psSnake::Daemon::runtime->{'current_tasks'}->{$tg};
      my $configured_tasks = $psSnake::Daemon::runtime->{'tasks'};

      if (scalar(keys %{$current_tasks})) {
        # process tasks in this task group
        foreach my $task_name (keys %{$current_tasks}) {
          my $task = $configured_tasks->{$task_name};
          
          # do not start task if it's already running
          next if find_child(undef, $task_name);

          # check if there're tasks runing
          # which block this task
          my $do_not_run = 0;
          foreach my $ctask (keys %{$task_groups->{$tg}}) {
            next if $ctask eq $task_name;

            if (find_child(undef, $ctask)) {
              $do_not_run = 1;
              last;
            }
          }

          next if $do_not_run;

          psSnake::debug("starting [$task_name]");
          add_child($task_name);
        }
      }
      else {
        # we've completed all tasks, reschedule tasks which need to be executed
        foreach my $task_name (keys %{$task_groups->{$tg}}) {
          my $task = $configured_tasks->{$task_name};
          my $child_e = get_child_cache($task_name);
          my $now = time();

          if ($task->{'start_random_sleep'}) {
            if (!$child_e->{'startup_sleep'}) {
              $child_e->{'startup_sleep'} = int(rand($task->{'start_random_sleep'}));
              $child_e->{'startup_sleep_started'} = $now;
              psSnake::debug("task [$task_name] random sleep [$child_e->{'startup_sleep'}]");
            }

            if ($now - $child_e->{'startup_sleep_started'} > $child_e->{'startup_sleep'}) {
              $child_e->{'startup_sleep_finished'} = $now;
              psSnake::debug("task [$task_name] random sleep finished");
            }
          }
          else {
            # random startup sleep not configured for the task
            $child_e->{'startup_sleep_finished'} = $now;
          }

          # schedule only those tasks which:
          #   - finished random startup sleep time (if configured) AND
          #     - were not run for $task->{'execution_interval'} time or
          #     - have there next_run time passed
          #
          if ($child_e->{'startup_sleep_finished'} && (
                $task->{'next_run'} && $task->{'next_run'} <= $now
                  ||
                $task->{'execution_interval'} &&
                  $now - $child_e->{'laststart'} > $task->{'execution_interval'}
              )
            ) {

            if ($task->{'next_run'}) {
              $task->{'next_run'} = timelocal($task->{'cron'}->nextEvent);
            }

            $current_tasks->{$task_name} = $task;
          }
        }
      }
    }
  }
  else {
    # wait for children to exit and exit then
    if (have_children()) {
      for_each_child ({'stop_now' => 1});
      psSnake::debug("waiting for children to exit");
      sleep 1;
    }
    else {
      unlink($psSnake::Daemon::runtime->{'config'}->{'pidfile'}->{'value'})
        if $psSnake::Daemon::runtime->{'config'}->{'pidfile'};
      
      psSnake::do_log("[$$] stopped");

      # do not restart watchdogs on restart as they will try
      # to start snaked if restart fails (which should not happen
      # but happens in 0,02-0,03 % of cases)
      #
      # we may want to send some signal to watchdogs here
      # to notify them about restart so they could extend
      # their waiting cycle a bit
      #
      if ($psSnake::Daemon::runtime->{'flags'}->{'restart'}) {
        exec_ps_snaked($my_command_line, $my_path);
      }
      else {
        stop_watchdogs() if !$i_am_watchdog;
      }
      
      exit 0;
    }
  }

  if ($psSnake::debug) {
    psSnake::debug("-");
    sleep (1);
  }
  else {
    if ($have_active_children) {
      usleep(50000);
      $psSnake::Daemon::runtime->{'usec_2check_watchdog'} = $psSnake::Daemon::runtime->{'usec_2check_watchdog'} - 50000;
    }
    else {
      usleep(50000);
      $psSnake::Daemon::runtime->{'usec_2check_watchdog'} = $psSnake::Daemon::runtime->{'usec_2check_watchdog'} - 50000;
    }
  }
}

# yes i know this is the way
# to the world of endless may
exit(255);
