#!/usr/bin/env perl
# PODNAME: ppred-comp-test.pl
# ABSTRACT: Compute compositional test based on ppred data

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use Smart::Comments;

use Bio::MUST::Core;
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::PostPred';
use aliased 'Bio::MUST::Core::SeqMask';


# TODO: generalize this to other executables through Utils?
my $load = 'load';
if ($ARGV_phylip) {
    ### Infiles are in PHYLIP format
    $load  .= '_phylip';
}

### Processing simulated files: scalar @ARGV_sim_files
my @sim_alis = map { Ali->load_phylip($_) } @ARGV_sim_files;

for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    my $ali = Ali->$load($infile);

    # optionally delete constant sites
    $ali->apply_mask( SeqMask->variable_mask($ali) ) if $ARGV_del_const;

    ### Computing compositional test
    my $test = PostPred->comp_test( [ $ali, @sim_alis ] );

    ### Test results ('*' means significant Z-score)
    for my $id ($test->all_ids) {
        my $zscore = $test->zscore_for($id);
        my $sign = abs($zscore) > 2 ? q{ *} : q{  };
        say join q{  }, $sign, $id, sprintf "%.2f", $zscore;
    }
}

__END__

=pod

=head1 NAME

ppred-comp-test.pl - Compute compositional test based on ppred data

=head1 VERSION

version 0.180230

=head1 SYNOPSIS

    $ ppred-comp-test.pl test/for-ppred-comp.phy --phylip
        --sim-files=`ls test/ppred-*.phy`

This program implements the compositional test proposed by Blanquart and
Lartillot (2008) based on simulated primary sequences. The compositional bias
is computed for each sequence in the real alignment as well as in each of the
simulated alignments. Two types of global biases (max and mean) are also
computed. Z-scores are then computed to describe the ability of the
evolutionary model to account for these biases. Any Z-score that is larger
than 2.0 means that the model could not account for the compositional bias of
the corresponding entity (single sequence or global).

=head1 USAGE

    ppred-comp-test.pl <infiles> --simfiles=<files>... [optional arguments]

=head1 REQUIRED ARGUMENTS

=over

=item <infiles>

Path to input ALI files [repeatable argument]. If infiles are not in ALI but
in PHYLIP format, use the C<--phylip> option below.

=for Euclid: infiles.type: readable
    repeatable

=item --sim-files=<files>...

List of paths to simulated input files. These files are assumed to be in
PHYLIP format as they result from PhyloBayes' C<ppred>.

=for Euclid: files.type: readable

=back

=head1 OPTIONAL ARGUMENTS

=over

=item --from-scafos

Consider the input ALI file as generated by SCaFoS [default: no]. Currently,
specifying this option results in turning all ambiguous and missing character
states to gaps.

=item --del-const

Delete constant sites just as the C<-dc> option of PhyloBayes [default: no].

=item --phylip

Assume infiles and outfiles are in PHYLIP format (instead of ALI format)
[default: no].

=item --version

=item --usage

=item --help

=item --man

Print the usual program information

=back

=head1 AUTHOR

Denis BAURAIN <denis.baurain@uliege.be>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
