package Bio::Palantir::Parser::Root;
# ABSTRACT: BiosynML DTD-derived internal class
$Bio::Palantir::Parser::Root::VERSION = '0.201670';
use Moose;
use namespace::autoclean;

# AUTOGENERATED CODE! DO NOT MODIFY THIS FILE!

use XML::Bare qw(forcearray);
use POSIX;

use aliased 'Bio::Palantir::Parser::Cluster';
use aliased 'Bio::Palantir::Parser::Gene';
use aliased 'Bio::Palantir::Parser::Domain';
use aliased 'Bio::Palantir::Parser::Motif';

use aliased 'Bio::Palantir::Roles::Modulable::Module';

# private attributes
has '_root' => (
    is       => 'ro',
    isa      => 'HashRef',
    required => 1,
);

has 'module_delineation' => (
    is      => 'ro',
    isa     => 'Str',
);



# public array(s) of composed objects


has 'clusters' => (
    traits   => ['Array'],
    is       => 'ro',
    isa      => 'ArrayRef[Bio::Palantir::Parser::Cluster]',
    writer   => '_set_clusters',
    init_arg => undef,
    handles  => {
         count_clusters => 'count',
           all_clusters => 'elements',
           get_cluster  => 'get',
          next_cluster  => 'shift',        
    },
);


## no critic (ProhibitUnusedPrivateSubroutines)


## use critic



has 'genes' => (
    traits   => ['Array'],
    is       => 'ro',
    isa      => 'ArrayRef[Bio::Palantir::Parser::Gene]',
    writer   => '_set_genes',
    handles  => {
         count_genes => 'count',
           all_genes => 'elements',
           get_gene  => 'get',
          next_gene  => 'shift',        
    },
);


## no critic (ProhibitUnusedPrivateSubroutines)


## use critic



has 'domains' => (
    traits   => ['Array'],
    is       => 'ro',
    isa      => 'ArrayRef[Bio::Palantir::Parser::Domain]',
    writer   => '_set_domains',
    handles  => {
         count_domains => 'count',
           all_domains => 'elements',
           get_domain  => 'get',
          next_domain  => 'shift',        
    },
);


## no critic (ProhibitUnusedPrivateSubroutines)


## use critic



has 'motifs' => (
    traits   => ['Array'],
    is       => 'ro',
    isa      => 'ArrayRef[Bio::Palantir::Parser::Motif]',
    writer   => '_set_motifs',
    handles  => {
         count_motifs => 'count',
           all_motifs => 'elements',
           get_motif  => 'get',
          next_motif  => 'shift',        
    },
);


## no critic (ProhibitUnusedPrivateSubroutines)


## use critic



# public composed object(s)


# public deep methods


# public methods



# public aliases
sub BUILD {
    my $self = shift;

    my @motifs = map { Motif->new( _root => $_ ) } @{
        forcearray $self->_root->{'motiflist'}{'motif'}
    };

    $self->_set_motifs( \@motifs );   

    my %motifs_in;
    push @{ $motifs_in{ $_->_root->{'domainID'}->{'value'} } }, $_ for @motifs;

    my @domains;
    for my $domain ( @{ forcearray $self->_root->{'domainlist'}{'domain'} }) {
        push @domains, Domain->new(
            _root  => $domain,
            motifs => $motifs_in{ $domain->{id}{value} } // [],
        );
    }

    $self->_set_domains( \@domains );
    
    my %domains_in;
    push @{ 
        $domains_in{ $_->_root->{'location'}->{'gene'}->{'geneid'}->{'value'} }
        }, $_ for @domains
    ;

    my %orphan_motifs_in;
    push @{ 
        $orphan_motifs_in{ $_->_root->{'geneID'}->{'value'} } 
        }, $_ for @{ $motifs_in{'0'} }
    ;
  
    my @genes; 
    for my $gene ( @{ forcearray $self->_root->{'genelist'}->{'gene'} }) {
        my $gene_object = Gene->new(
            _root         => $gene,
            domains       => $domains_in{ $gene->{id}{value} } // [],
            orphan_motifs => $orphan_motifs_in{ $gene->{id}{value} } // [],
        );
        
        # fill monomer attribute
        my @monomers = $gene_object->monomers;
        if (@monomers) {

            for my $domain ($gene_object->all_domains) {

                if ($domain->function =~ m/^A$ | ^A-OX$ | ^AT/xms) {
                    $domain->_set_monomer( shift @monomers );
                }
            }
        }

        push @genes, $gene_object;
    }

    $self->_set_genes( \@genes );
    
    my @modules;
    if ($self->_root->{modulelist}) {
        @modules = _extract_antismash_modules($self->_root, @genes);
    }
    
    my $cluster_rank = 1;
    
    # fix for antismash5: cluster order is not preserved during json2xml conversion
    my $cluster_list;
    unless ( 
        grep { $_->{genecluster}->{sequence}->{value} }
        @{ forcearray $self->_root->{'model'} })
    {
        # filter clusters by coordinates order for antiSMASH 5 (and 3)
        $cluster_list 
            = [ sort { $a->{genecluster}->{region}->{begin}->{value} 
                <=> $b->{genecluster}->{region}->{begin}->{value} }
                @{ forcearray $self->_root->{'model'} } ]
        ;
    }

    else {
        # do not filter clusters as antismash 4 reboots coordinates per contig
        $cluster_list = [ @{ forcearray $self->_root->{'model'} } ];
    }
                
    my @clusters;
    for my $cluster (@$cluster_list) {
        my $begin = $cluster->{'genecluster'}->{'region'}->{'begin'}->{'value'};
        my $end   = $cluster->{'genecluster'}->{'region'}->{'end'  }->{'value'};

        # fix for antiSMASH 4: keep coordinates in strand + order
        my $strand = $begin < $end ? '+' : '-';
        
        if ($strand eq '-') {
            my $temp_begin = $begin;

            $begin = $end;
            $end   = $temp_begin;
        }

        # second fix for antiSMASH 4: handle the coordinates reset for each contig (use of the sequence value which is only exploited in antiSMASH 4)
        my @cluster_genes;
        if ($cluster->{'genecluster'}->{'sequence'}->{'value'}) {
            my $cluster_seqlist 
                = $cluster->{'genecluster'}->{'sequence'}->{'value'};

            # filter on the sequence value and the cluster coordinates
            @cluster_genes = grep { $_->genomic_dna_begin < $end 
                                && $_->genomic_dna_end > $begin }
                             grep { $_->_root->{'sequence'}{'value'} 
                                eq $cluster_seqlist } @genes
            ;
        }
        
        # this information is not extracted in antiSMASH 5 (but coordinates are continuous)
        else {
            @cluster_genes = grep { $_->genomic_dna_begin < $end 
                &&  $_->genomic_dna_end > $begin } @genes;
        }
        
        @cluster_genes 
            = sort { $a->genomic_dna_begin <=> $b->genomic_dna_begin } 
            @cluster_genes
        ;

        my $gene_rank = 1;
        my $domain_rank = 1;

        for my $gene (@cluster_genes) {

            $_->_set_rank($domain_rank++) 
                for sort { $a->begin <=> $b->begin } $gene->all_domains;

            $gene->_set_rank($gene_rank++);
        }

        # fix antiSMASH 5.1 and its module delineation
        if ($self->_root->{modulelist}) {     # Add new module feature from version 5.1

            # TODO see how synchronize domain rank in @genes and @modules
            my @cluster_modules = 
                sort {$a->genomic_prot_begin <=> $b->genomic_prot_begin } 
                grep { $_->genomic_dna_begin < $end
                    && $_->genomic_dna_end > $begin }
                @modules;
            ;
            
            my $mrank = 1;
            $_->_set_rank($mrank++) for @cluster_modules;
            
            push @clusters, Cluster->new( 
                module_delineation => $self->module_delineation,
                              rank => $cluster_rank,
                             _root => $cluster->{'genecluster'},
                             genes => \@cluster_genes,
                 genomic_dna_begin => $begin,
                   genomic_dna_end => $end,
                genomic_prot_begin => ceil($begin / 3),
                genomic_prot_end   => floor($end / 3),
                modules            => \@cluster_modules,
            );
        }

        else { 
            push @clusters, Cluster->new( 
                module_delineation => $self->module_delineation,
                              rank => $cluster_rank,
                             _root => $cluster->{'genecluster'},
                             genes => \@cluster_genes,
                 genomic_dna_begin => $begin,
                   genomic_dna_end => $end,
                genomic_prot_begin => ceil($begin / 3),
                  genomic_prot_end => floor($end / 3),
            );
        }

        $cluster_rank++;
    }

    # enables module cutting mode
    $_->_set_cutting_mode( $self->module_delineation ) for @clusters;

    $self->_set_clusters( \@clusters );

    return;
}

sub _extract_antismash_modules { 

    my ($root, @report_genes) = @_;

    my @report_modules;
    for my $module ( @{ forcearray $root->{'modulelist'}{'module'} }) {

        next
            if $module->{complete}{value} eq 'false';

        my @mgenes
            = grep { $module->{prot_start}{value} < $_->genomic_prot_end
                  && $module->{prot_end}{value}  >  $_->genomic_prot_begin }
              sort { $a->genomic_prot_begin <=> $b->genomic_prot_end }
            @report_genes
        ;

        my $gene_ids = [map { $_->uui } @mgenes];  # ArrayRef Module attribute

        my $mdomains;
        for my $gene (@mgenes) { 

            push @{ $mdomains }, 
                sort { $a->begin <=> $b->begin }
                grep { ($_->begin + $gene->genomic_prot_begin - 1)  # -1 for beginning domain pos 1 in gene pos 1
                        < $module->{prot_end}{value}
                    && ($_->end + $gene->genomic_prot_begin - 1)
                        > $module->{prot_start}{value} }
                $gene->all_domains
            ;
        }

        my $module_sequence = join '', map { $_->protein_sequence }
            @{ $mdomains };
#             my $module_sequence = substr(
#                 $module->{prot_begin} - 1,
#                 $module->{prot_end} - $module->{prot_begin} + 1,
#                 $mgene->protein_sequence
#             );
#
        
        my $size
            = $module->{prot_end}{value} - $module->{prot_start}{value} + 1;

        push @report_modules, Module->new(
            rank => $module->{id}{value},
            genomic_prot_begin => $module->{prot_start}{value},
            genomic_prot_end   => $module->{prot_end}{value},
            genomic_prot_coordinates => [
                $module->{prot_start}{value},
                $module->{prot_end}{value},
            ],
            protein_sequence => $module_sequence,
            gene_uuis => $gene_ids,
            domains => $mdomains,
            size => $size,
        );
    }

    return(@report_modules);
}


__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

=head1 NAME

Bio::Palantir::Parser::Root - BiosynML DTD-derived internal class

=head1 VERSION

version 0.201670

=head1 SYNOPSIS

    # TODO

=head1 DESCRIPTION

    # TODO

=head1 ATTRIBUTES

=head2 clusters

ArrayRef of L<Bio::Palantir::Parser::Cluster>

=head2 genes

ArrayRef of L<Bio::Palantir::Parser::Gene>

=head2 domains

ArrayRef of L<Bio::Palantir::Parser::Domain>

=head2 motifs

ArrayRef of L<Bio::Palantir::Parser::Motif>

=head1 METHODS

=head2 count_clusters

Returns the number of Clusters of the Root.

    # $root is a Bio::Palantir::Parser::Root
    my $count = $root->count_clusters;

This method does not accept any arguments.

=head2 all_clusters

Returns all the Clusters of the Root (not an array reference).

    # $root is a Bio::Palantir::Parser::Root
    my @clusters = $root->all_clusters;

This method does not accept any arguments.

=head2 get_cluster

Returns one Cluster of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Cluster does not exist, this method will return C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    my $cluster = $root->get_cluster($index);
    croak "Cluster $index not found!" unless defined $cluster;

This method accepts just one argument (and not an array slice).

=head2 next_cluster

Shifts the first Cluster of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Clusters in
the array, returns C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    while (my $cluster = $root->next_cluster) {
        # process $cluster
        # ...
    }

This method does not accept any arguments.

=head2 count_genes

Returns the number of Genes of the Root.

    # $root is a Bio::Palantir::Parser::Root
    my $count = $root->count_genes;

This method does not accept any arguments.

=head2 all_genes

Returns all the Genes of the Root (not an array reference).

    # $root is a Bio::Palantir::Parser::Root
    my @genes = $root->all_genes;

This method does not accept any arguments.

=head2 get_gene

Returns one Gene of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Gene does not exist, this method will return C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    my $gene = $root->get_gene($index);
    croak "Gene $index not found!" unless defined $gene;

This method accepts just one argument (and not an array slice).

=head2 next_gene

Shifts the first Gene of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Genes in
the array, returns C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    while (my $gene = $root->next_gene) {
        # process $gene
        # ...
    }

This method does not accept any arguments.

=head2 count_domains

Returns the number of Domains of the Root.

    # $root is a Bio::Palantir::Parser::Root
    my $count = $root->count_domains;

This method does not accept any arguments.

=head2 all_domains

Returns all the Domains of the Root (not an array reference).

    # $root is a Bio::Palantir::Parser::Root
    my @domains = $root->all_domains;

This method does not accept any arguments.

=head2 get_domain

Returns one Domain of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Domain does not exist, this method will return C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    my $domain = $root->get_domain($index);
    croak "Domain $index not found!" unless defined $domain;

This method accepts just one argument (and not an array slice).

=head2 next_domain

Shifts the first Domain of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Domains in
the array, returns C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    while (my $domain = $root->next_domain) {
        # process $domain
        # ...
    }

This method does not accept any arguments.

=head2 count_motifs

Returns the number of Motifs of the Root.

    # $root is a Bio::Palantir::Parser::Root
    my $count = $root->count_motifs;

This method does not accept any arguments.

=head2 all_motifs

Returns all the Motifs of the Root (not an array reference).

    # $root is a Bio::Palantir::Parser::Root
    my @motifs = $root->all_motifs;

This method does not accept any arguments.

=head2 get_motif

Returns one Motif of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Motif does not exist, this method will return C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    my $motif = $root->get_motif($index);
    croak "Motif $index not found!" unless defined $motif;

This method accepts just one argument (and not an array slice).

=head2 next_motif

Shifts the first Motif of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Motifs in
the array, returns C<undef>.

    # $root is a Bio::Palantir::Parser::Root
    while (my $motif = $root->next_motif) {
        # process $motif
        # ...
    }

This method does not accept any arguments.

=head1 AUTHOR

Loic MEUNIER <lmeunier@uliege.be>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
