package Lingua::EN::Opinion;
our $AUTHORITY = 'cpan:GENE';

# ABSTRACT: Measure the positive/negative sentiment of text

our $VERSION = '0.03';

use Moo;
use strictures 2;
use namespace::clean;

use Lingua::EN::Opinion::Positive;
use Lingua::EN::Opinion::Negative;

use File::Slurper qw( read_text );
use Lingua::EN::Sentence qw( get_sentences );
use Statistics::Lite qw( mean );


has file => (
    is  => 'ro',
    isa => sub { die "File $_[0] does not exist" unless -e $_[0] },
);


has text => (
    is => 'ro',
);


has sentences => (
    is       => 'rw',
    init_arg => undef,
);


has scores => (
    is       => 'rw',
    init_arg => undef,
);


sub analyze {
    my ($self) = @_;

    my $contents = $self->file ? read_text( $self->file ) : $self->text;

    $self->sentences( get_sentences($contents) );

    my @sentences = map { $_ } @{ $self->sentences };

    my @scores;

    my $positive = Lingua::EN::Opinion::Positive->new();
    my $negative = Lingua::EN::Opinion::Negative->new();

    for my $sentence ( @sentences ) {
        $sentence =~ s/[[:punct:]]//g;  # Drop punctuation

        my @words = split /\s+/, $sentence;

        my $score = 0;

        for my $word ( @words ) {
            $score += exists $positive->wordlist->{$word} ? 1
                    : exists $negative->wordlist->{$word} ? -1 : 0;
        }

        push @scores, $score;
    }

    $self->scores( \@scores );
}


sub averaged_score {
    my ( $self, $bins ) = @_;

    $bins ||= 10;

    my @scores = map { $_ } @{ $self->scores };

    my @averaged;

    while ( my @n = splice @scores, 0, $bins ) {
        push @averaged, mean(@n);
    }

    return \@averaged;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Lingua::EN::Opinion - Measure the positive/negative sentiment of text

=head1 VERSION

version 0.03

=head1 SYNOPSIS

  use Lingua::EN::Opinion;
  my $opinion = Lingua::EN::Opinion->new( file => '/some/file.txt' );
  $opinion->analyze();
  my $sentences = $opinion->sentences;
  my $scores = $opinion->scores;
  my $averaged = $opinion->averaged_score(5);

=head1 DESCRIPTION

A C<Lingua::EN::Opinion> measures the positive/negative sentiment of text.

Please see the F<eg/> and F<t/> scripts for example usage.

=head1 ATTRIBUTES

=head2 file

The text file to analyze.

=head2 text

A text string to analyze instead of a text file.

=head2 sentences

Computed result.

=head2 scores

Computed result.

=head1 METHODS

=head2 new()

  $opinion = Lingua::EN::Opinion->new(%arguments);

Create a new C<Lingua::EN::Opinion> object.

=head2 analyze()

  $score = $opinion->analyze();

Measure the positive/negative sentiment of text.

=head2 averaged_score()

  $averaged = $opinion->averaged_score($bins);

Compute the averaged score given a number of (integer) B<bins> (default: 10).

This reduces the amount of "noise" in the original signal.  As such, it loses
information detail.

For example, if there are 400 sentences, B<bins> of 10 will result in 40 data
points.  Each point will be the mean of each successive bin-sized set of points
in the analyzed score.

=head1 SEE ALSO

L<Moo>

L<File::Slurper>

L<Lingua::EN::Sentence>

L<Statistics::Lite>

L<https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon>

=head1 AUTHOR

Gene Boggs <gene@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2018 by Gene Boggs.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
