package Lingua::PT::Actants;
# ABSTRACT: compute verb actants for Portuguese
$Lingua::PT::Actants::VERSION = '0.02';
use strict;
use warnings;

sub new {
  my ($class, %args) = @_;
  my $data;

  # input is in conll* format
  if (exists($args{conll})) {
    $data = _conll2data($args{conll});
  }

  my $self = bless({ data=>$data }, $class);
  return $self;
}

sub _conll2data {
  my ($input) = @_;

  my @data;
  foreach my $line (split /\n/, $input) {
    my @l = split /\s+/, $line;
    push @data, {id=>$l[0],form=>$l[1],pos=>$l[3],dep=>$l[6],rule=>$l[7]};
  }

  return [@data];
}

sub sentence {
  my ($self) = @_;

  return join(' ', map {$_->{form}} @{$self->{data}});
}

sub actants {
  my ($self, %args) = @_;

  my @cores = $self->acts_cores($self->{data});
  my @acts = $self->acts_syntagmas([@cores], $self->{data});

  return @acts;
}

sub acts_cores {
  my ($self, $data) = @_;

  $data = $self->{data} unless $data;

  my @verbs;
  foreach (@{$data}) {
    push @verbs, $_ if (lc($_->{pos}) eq 'verb');
  }

  my @cores;
  foreach my $v (@verbs) {
    my $paths = _paths($v, $data);

    my @result;
    foreach (@$paths) {
      my $score = _score($_);
      if ($score >= 0) {
        push @result, { token=>$_->[0], score=>$score};
      }
    }

    # normalize results
    my $total = 0;
    $total += $_->{score} foreach @result;
    $_->{score} = $_->{score}/$total foreach @result;

    # sort results
    @result = sort {$b->{score} <=> $a->{score}} @result;

    push @cores, { verb=>$v, rank=>[@result] };
  }

  return @cores;
}

sub acts_syntagmas {
  my ($self, $cores, $data) = @_;
  my $visited = {};

  my @acts;
  foreach my $v (@$cores) {
    my @list;
    foreach my $r (@{ $v->{rank} }) {
      next unless $r->{score} >= 0.02;  # FIXME: threshold cut option

      my @child = _child($r->{token}, $data, $visited);
      $visited->{$_->{id}}++ foreach (@child);

      next unless @child;
      push @list, { tokens=>[@child] };
    }
    push @acts, { verb=>$v->{verb}, acts=>[@list] };
  }

  return @acts;
}

sub _paths {
  my ($pivot, $data) = @_;
  my @paths;

  foreach (@{$data}) {
    my $p = _fullpath($_, $pivot, $data, []);
    push @paths, $p if ($p and @$p>1);
  }

  return [@paths];
}

sub _fullpath {
  my ($from, $to, $data, $path) = @_;
  push @$path, $from;

  if ($from->{id} == $to->{id}) {
    return $path;
  }
  else {
    foreach (@{$data}) {
      if ($from->{dep} == $_->{id}) {
        _fullpath($_, $to, $data, $path);
      }
    }
    return ($path->[-1]->{id} == $to->{id} ? $path : []);
  }

  return [];
}

sub _score {
  my ($path) = @_;
  my $score = 0;

  #foreach (@$path) { $score += _el_score($_); }
  $score = _el_score($path->[0]);

  return $score / (scalar(@$path)*scalar(@$path));
}

sub _el_score {
  my ($el) = @_;

  my $score = _score_pos($el->{pos}) * _score_rule($el->{rule});

  return $score;
}

sub _score_pos {
  my ($pos) = @_;

  return 0.8 if ($pos =~ m/^(noun|propn)$/i);
  return -1 if ($pos =~ m/^(punct)$/i);

  return 0.1;
}

sub _score_rule {
  my ($rule) = @_;

  return 0.8 if ($rule =~ m/^(nsubj)$/i);
  return 0.7 if ($rule =~ m/^(dobj)$/i);

  return 0.1;
}

sub _child {
  my ($node, $data, $visited) = @_;
  my @child = ();

  foreach (@$data) {
    next if exists($visited->{$_->{id}});
    push @child, $_ if ($node->{id} == $_->{dep} or $node->{id} == $_->{id});
  }

  return @child;
}

sub pp_acts_cores {
  my ($self, @cores) = @_;
  my $r = "# Actants syntagma cores\n";

  foreach my $v (@cores) {
    $r .= " Verb: $v->{verb}->{form}\n";
    foreach (@{$v->{rank}}) {
      $r .= sprintf "  %.6f | %s\n", $_->{score}, $_->{token}->{form};
    }
  }

  return $r;
}

sub pp_acts_syntagmas {
  my ($self, @acts) = @_;
  my $r = "# Actants syntagmas\n";

  foreach my $v (@acts) {
    $r .= " Verb: $v->{verb}->{form}\n";
    my $i = 1;
    foreach (@{$v->{acts}}) {
      $r .= sprintf "  %s: %s\n", "A$i", join(' ', map {$_->{form}} @{$_->{tokens}});
      $i++;
    }
  }

  return $r;
}

sub drop_auxs {
  my ($self) = @_;
  my @list = @{ $self->{data} };

  my @data;
  my $a = shift @list;
  foreach my $b (@list) {
    if ( lc($a->{pos}) eq 'verb' and lc($b->{pos}) eq 'verb' ) {
      $a = $b;
    }
    else {
      push @data, $a;
      $a = $b;
    }
  }
  push @data, $a;

  my @tokens = map { $_->{form} if exists($_->{form}) } @data;
  @tokens = grep {defined $_} @tokens;
  my $s = join(' ', @tokens);

  return $s;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Lingua::PT::Actants - compute verb actants for Portuguese

=head1 VERSION

version 0.02

=head1 SYNOPSIS

    # using as a library
    use Lingua::PT::Actants;
    my $a = Lingua::PT::Actants->new( conll => $input );
    my @cores = $a->acts_cores;
    my @actants = $a->actatans;

    # example from the command line
    $ cat examples/input.txt 
    1   A       _   DET     DET     _   2   det     _   _
    2   Maria   _   PROPN   PROPN   _   3   nsubj   _   _
    3   tem     _   VERB    VERB    _   0   ROOT    _   _
    4   razão   _   NOUN    NOUN    _   3   dobj    _   _
    5   .       _   PUNCT   PUNCT   _   3   punct   _   _
    $ cat examples/input.txt | actants
    Sentence: A Maria tem razão .
    # Actants syntagma cores
     Verb: tem
      0.526990 | Maria
      0.461116 | razão
      0.008234 | .
      0.003660 | A
    # Actants syntagmas
     Verb: tem
      A1: A Maria
      A2: razão

=head1 DESCRIPTION

This module implements an algorithm that computes a sorted rank of tokens
where the score measures the propensity of the token being an actant
for the verb to which is related.

=head1 FUNCTIONS

=head2 new

Create a new object, pass as argument the input text in CONLL format.

=head2 acts_cores

Compute the core (a token) of the actants syntagmas as rank sorted by score.

=head2 pp_acts_cores

Pretty print actants cores, mainly to be used by the command line interface.

=head2 actants

Compute actants for a sentence, returns a list of actants found.

=head2 pp_acts_syntagmas

Pretty print actants syntagmas, mainly to be used by the command line interface.

=head1 AUTHOR

Nuno Carvalho <smash@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2016 by Nuno Carvalho.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
