package Bio::Grid::Run::SGE::Worker;

use warnings;
use strict;
use Mouse;
use 5.010;
use Storable;
use Data::Dumper;
use Carp;
use File::Spec::Functions;
use File::Spec;
use Bio::Grid::Run::SGE::Index;
use Net::Domain qw(hostfqdn);
use IO::Handle;
use Try::Tiny;

use Cwd qw/fastcwd/;

our $VERSION = '0.060'; # VERSION

has [qw/config env/] => ( is => 'rw', required => 1 );
has [qw/task/]       => ( is => 'rw', required => 1 );
has log => ( is => 'rw', required => 1 );

has status_log_fh => ( is => 'rw' );

has [qw/iterator/] => ( is => 'rw', lazy_build => 1 );

sub BUILD {
  my ( $self, $args ) = @_;

  my $conf = $self->config;
  my $env  = $self->env;

  $env->{nslots} = $ENV{NSLOTS} // 1;

  confess "task is no code reference" unless ( $self->task && ref $self->task eq 'CODE' );

  confess "given range is not in the correct format"
    if ( $env->{range} && @{ $env->{range} } < 2 );

  $self->_determine_range;

  my $log_file = catfile( $conf->{log_dir},
    sprintf( "%s.l%d.%d", $env->{job_name_save}, $env->{job_id}, $env->{task_id} ) );
  $self->log->info( "log: " . $log_file );
  open my $status_log_fh, '>', $log_file or confess "Can't open filehandle: $!";
  $self->status_log_fh($status_log_fh);

  $self->_log_current_settings;
}

sub _build_iterator {
  my ($self) = @_;
  my $c = $self->config;

  my @indices;

  for my $in ( @{ $c->{input} } ) {
    push @indices, Bio::Grid::Run::SGE::Index->new( %{$in} );
    $self->log_status( "index_file: " . $in->{idx_file} );
  }

  # create iterator
  my $iter = Bio::Grid::Run::SGE::Iterator->new( mode => $c->{mode}, indices => \@indices );
  return $iter;
}

sub _determine_range {
  my ($self)  = @_;
  my $conf    = $self->config;
  my $env     = $self->env;
  my $task_id = $env->{task_id};

  my ( $num_comb, $num_parts ) = ( $env->{num_comb}, $conf->{num_parts} );

  $env->{part_size} = 1;
  if ( $env->{range} ) {
    #we ran before (and failed) and now somebody restarts us with a given range

    return;
  }

  #make everyting 0 based
  $task_id--;

  unless ($num_parts) {
    $env->{range} = [ $task_id, $task_id ];
    return;
  }
  my $part_size = int( $num_comb / $num_parts );

  my $rest = $num_comb % $num_parts;

  my $from = $part_size * $task_id;
  my $to   = $from + $part_size - 1;

  $env->{range} = [ $from, $to ];
  if ( $task_id < $rest ) {
    #do sth extra
    push @{ $env->{range} }, ( $part_size * $num_parts ) + $task_id;
  }

  return;
}

sub run {
  my ($self) = @_;

  my $iter = $self->iterator;
  my $conf = $self->config;
  my $env  = $self->env;

  chdir $conf->{working_dir};
  #log something
  $self->log_status( "cwd: " . fastcwd() );
  # FIXME was ist cmd, script_bin???
  #$self->log_status( "cmd: " . join( " ", @{ $c->{cmd} } ) );
  $self->log_status("run.begin");

  #time the whole stuff
  my $time_start = time;
  $self->log_status( "comp.begin: " . localtime($time_start) );

  my $next_comb = $self->_create_comb_iterator();

  # adjust config for main task
  #$env->{job_id}    .= "." . $env->{task_id}

  while ( my $comb = $next_comb->() ) {
    my $infiles       = $comb->{infiles};
    my $result_prefix = $comb->{result_prefix};
    my $comb_idx      = $comb->{comb_idx};
    $env->{comb_idx}      = $comb_idx;
    $env->{is_first_comb} = $comb_idx == 0;

    # some input files are generated by us, some are original files
    my $infile_is_temp = $comb->{is_temp};

    #stop time per task
    my $task_time = time;

    #RUN TASK
    my $return_status;
    try {
      $return_status = $self->task->( $result_prefix, @{$infiles} );
    }
    catch {
      warn "caught error: $_";
    };

    unless ($return_status) {
      $self->log_status("comp.task.exit.error:: $comb_idx");
      # TODO document this
    } elsif ( $return_status < 0 ) {
      $self->log_status("comp.task.exit.skip:: $comb_idx");
    } else {
      $self->log_status("comp.task.exit.success:: $comb_idx");
    }

    for ( my $i = 0; $i < @$infiles; $i++ ) {
      # delete the file only, if it was created by us.
      next unless ( $infile_is_temp->[$i] );

      my $infile = $infiles->[$i];
      next if ( $ENV{DEBUG} );
      $self->log_status("comp.task.file.delete:: $comb_idx $infile");
      unlink $infile;
    }

    $task_time = time - $task_time;
    $self->log_status(
      "comp.task.time:: $comb_idx " . sprintf( "%dd %dh %dm %ds", ( gmtime($task_time) )[ 7, 2, 1, 0 ] ) );
  }

  my $time_end = time;
  $self->log_status( "comp.end: " . localtime($time_end) );
  $self->log_status(
    "comp.time: "
      . sprintf(
      "%dd %dh %dm %ds (%d)",
      ( gmtime( $time_end - $time_start ) )[ 7, 2, 1, 0 ],
      $time_end - $time_start
      )
  );

  $self->log_status("run.end");
}

#0-based ranges

sub _create_comb_iterator {
  my ($self) = @_;
  my $conf   = $self->config;
  my $env    = $self->env;

  my $iter = $self->iterator;

  $iter->range( $env->{range} );
  my $num_infiles = @{ $iter->indices };

  return sub {
    my $comb_idx = $iter->peek_comb_idx;

    return
      unless ( defined($comb_idx) );

    my $comb = $iter->next_comb;

    return unless ($comb);

    my @infiles;
    my @is_temp_file;
    my @infile_fhs;
    die "different number of combinations than indices...????!!!" if ( $num_infiles != @$comb );
    for ( my $i = 0; $i < @$comb; $i++ ) {
      my $idx_type        = $iter->indices->[$i]->type;
      my $infile_template = catfile( $conf->{tmp_dir},
        sprintf( "worker.j%d.%d.c%d.i%d.tmp", $env->{job_id}, $env->{task_id}, $comb_idx, $i ) );

      if ( $idx_type && $idx_type eq 'direct' ) {
        push @infiles,      $comb->[$i];
        push @is_temp_file, 0;
      } else {
        open my $in_fh, '>', $infile_template or confess "Can't open filehandle: $!";
        print $in_fh $comb->[$i];
        $in_fh->close;
        push @infiles,      $infile_template;
        push @is_temp_file, 1;
      }
    }

    my $result_prefix = catfile( $conf->{result_dir},
      sprintf( "%s.j%d.%d.c%d", $env->{job_name_save}, $env->{job_id}, $env->{task_id}, $comb_idx ) );

    return {
      infiles       => \@infiles,
      is_temp       => \@is_temp_file,
      result_prefix => $result_prefix,
      comb_idx      => $comb_idx
    };
  };
}

sub _log_current_settings {
  my ($self) = @_;

  my $conf = $self->config;
  my $env  = $self->env;

  $self->log_status( "init: " . localtime(time) );
  $self->log_status( "task_id: " . $env->{task_id} );
  $self->log_status( "job_id: " . $env->{job_id} );
  $self->log_status( "job_cmd: " . $env->{job_cmd} );
  $self->log_status( "hostname: " . hostfqdn() );

  $self->log_status("err: $ENV{SGE_STDERR_PATH}");
  $self->log_status("out: $ENV{SGE_STDOUT_PATH}");

  #@range = ( from, to, extra_element)
  #extra element caused by modulo leftover
  $self->log_status( "range: (" . join( ",", @{ $env->{range} } ) . ")" );
}

sub log_status {
  my ($self) = shift;
  my $status_log_fh = $self->status_log_fh;

  say $status_log_fh join( " ", @_ );
  $status_log_fh->flush;

  return;
}

sub log {
  my ($self) = shift;

  print STDERR join( " ", @_ ), "\n";
}

1;

__END__

=head1 NAME

Bio::Grid::Run::SGE::Worker - Run the cluster script

=head1 SYNOPSIS


=head1 DESCRIPTION

This class runs the cluster script for a specific interval and gives some log output.

=head1 METHODS

=head1 SEE ALSO

L<Bio::Grid::Run::SGE>

=head1 AUTHOR

jw bargsten, C<< <joachim.bargsten at wur.nl> >>

=cut

