package Net::Amazon::MechanicalTurk::Command::RetrieveResults;
use strict;
use warnings;
use Carp;
use Net::Amazon::MechanicalTurk::RowData;
use Net::Amazon::MechanicalTurk::DelimitedWriter;

our $VERSION = '1.01_01';

=head1 NAME

Net::Amazon::MechanicalTurk::Command::RetrieveResults - Retrieves answer data.

=head1 SYNOPSIS

    $mturk->retrieveResults(
        input    => "loadhits-success.csv",
        output   => "loadhits-results.csv",
        progress => \*STDOUT
    );

=head1 C<retrieveResults>

retrieveResults

Retrieves the available answers for each HITId in input and stores the answers
in output.

   input - The input parameter is something which should produce rows of data
           (hashes) with at least one column named HITId.  The case does not 
           matter, however all rows should use the same column name.  Row 1
           should not have hitid, if row 2 has HITId.  This parameter may be
           of the following types:
          
              - Net::Amazon::MechanicalTurk::RowData
              - An array of hashes.
                (This is internally converted into an object of type:
                 Net::Amazon::MechanicalTurk::RowData::ArrayHashRowData)
              - A reference to a subroutine.  When the retrieveResults
                method asks for row data, the subroutine will be called and
                passed a subroutine reference, which should be called
                for every row generated by the input.  The generated row
                should be a hash reference.
                (This is internally converted into an object of type
                 Net::Amazon::MechanicalTurk::RowData::SubroutineRowData)
              - The name of a file.  The file should be either a CSV or
                tab delimited file.  If the file name ends with '.csv',
                it will read as a CSV, otherwise it is assumed to be
                tab delimited. The first row in the file should contain
                the column names.  Each subsequence row becomes a hash
                reference based on the column names.
                (This is internally converted into an object of type
                 Net::Amazon::MechanicalTurk::RowData::DelimitedRowData)
          
    output - Used to write answer information back to some form of data
             storage. This parameter may be of the following types:
              - A filename. The answers for an assignment will be 
                written to the file name as a delimited file.  It
                will be a CSV file if the file name ends with .csv,
                otherwise it will be a tab delimited file.
                The file will have the following named columns:
              
                  AssignmentId
                  AssignmentStatus
                  HITId
                  WorkerId
              
              There will be unnamed columns after those.  These columns will
              come in pairs.  A pair will have a question id and then 
              a textual form of the answer, for that question.
              
              - A subroutine.  The subroutine will be called back with
                a hash containing the following values:
                    - mturk      - A handle to the mturk client.
                    - HITId      - The HITId for the assignment.
                    - row        - The row from the input.
                    - assignment - An assignment object as returned by
                                   GetAssignmentsForHITAll.

=cut 

sub retrieveResults {
    my $mturk = shift;
    my %params = @_;
 
    foreach my $required (qw{ input output }) {
        if (!exists $params{$required}) {
            Carp::croak("Missing required parameter $required.");            
        }
    }

    my $progress = progressBlock($params{progress});
    my $input    = Net::Amazon::MechanicalTurk::RowData->toRowData($params{input});
    my $output   = outputBlock($params{output});
    my $start    = time();
    
    if ($progress) {
        $progress->("--[Initializing] " . scalar localtime() . " ---");
        $progress->("  URL:        " . $mturk->serviceUrl);
        $progress->("  Input:      $params{input}");
        $progress->("  Output:     $params{output}");
    }
    
    my $hitIdKey;
    my $hits = 0;
    my $results = 0;
    my $rowno = 0;
    
    $progress->("--[Retrieving results] " . scalar localtime() . " ---") if ($progress);
            
    $input->each(sub {
        my ($_self, $row) = @_;
        
        # Need to find the column name of the HITId key
        # The loop is for case insensitive matching and ignoring white space.
        if (!defined $hitIdKey) {
            while (my ($key,$value) = each %$row) {
                if ($key =~ /^\s*HITId\s*$/i) {
                    $hitIdKey = $key;
                    last;
                }
            }
            if (!defined $hitIdKey) {
                Carp::croak("Couldn't find HITId column.  Row has the following columns: " .
                    join(", ", sort keys (%$row)));
            }
        }
        
        $rowno++;
        if (!exists $row->{$hitIdKey}) {
            Carp::croak("Couldn't find HITId column on row $rowno.");
        }
        my $hitId = $row->{$hitIdKey};
        $hitId =~ s/^\s+//;
        $hitId =~ s/\s+$//;

        $progress->("  Getting assignments for HIT $hitId.") if $progress;
        my $assignments = $mturk->GetAssignmentsForHITAll( HITId => $hitId );
        while (my $assignment = $assignments->next) {
            $output->(
                HITId      => $hitId,
                mturk      => $mturk,
                row        => $row,
                assignment => $assignment
            );
            $results++;
        }
        
        $hits++;
    });
    
    if ($progress) {
        $progress->("  Examined $hits hits.");
        $progress->("  Downloaded $results answers.");
        $progress->("--[Done retrieving results] " . scalar localtime() . " ---");
        $progress->("  Total load time: " . (time() - $start) . " seconds.");
    }
}

sub progressBlock {
    my ($progress) = @_;
    if (!defined($progress)) {
        return $progress;
    }
    elsif (UNIVERSAL::isa($progress, "CODE")) {
        return $progress;
    }
    elsif (UNIVERSAL::isa($progress, "GLOB")) {
        return sub {
            print $progress @_, "\n";
        };
    }
    else {
        Carp::croak("The progress parameters should be an IO handle or a subroutine.");
    }
}

sub outputBlock {
    my ($output) = @_;
    if (UNIVERSAL::isa($output, "CODE")) {
        return $output;
    }
    else {
        return createOutputBlock($output);
    }
}

sub createOutputBlock {
    my ($file) = @_;
    my $out;
    my $rowNumber = 0;
    if (UNIVERSAL::isa($file, "GLOB")) {
        $out = Net::Amazon::MechanicalTurk::DelimitedWriter->new(
            output         => $file,
            fieldSeparator => "\t"
        );
    }
    elsif (UNIVERSAL::isa($file, "Net::Amazon::MechanicalTurk::DelimitedWriter")) {
        $out = $file;
    }
    else {
        my $fs = ($file =~ /\.csv$/i) ? "," : ".";
        $out = Net::Amazon::MechanicalTurk::DelimitedWriter->new(
            lazy           => 1,
            file           => $file,
            fieldSeparator => $fs,
            autoflush      => 1
        );
    }
    my $row = 0;
    return sub { return writeOutput(out => $out, rowNumber => ++$row, @_); };
}

sub writeOutput {
    my %params = @_;
    
    my $out        = $params{out};
    my $rowNumber  = $params{rowNumber};
    my $mturk      = $params{mturk};
    my $assignment = $params{assignment};
    
    if ($rowNumber == 1) {
        $out->write(qw{ AssignmentId AssignmentStatus HITId WorkerId Answers });
    }
    
    my $answers = $mturk->parseAssignmentAnswer($assignment);
    my @row = (
        $assignment->{AssignmentId}[0],
        $assignment->{AssignmentStatus}[0],
        $assignment->{HITId}[0],
        $assignment->{WorkerId}[0]
    );
    
    $answers->eachAnswerValue(sub {
        my ($questionId, $answerText) = @_;
        push(@row, $questionId, $answerText);
    });
    
    $out->write(@row);
}

return 1;
