#!/usr/bin/env perl

use strict;
use warnings;

use v5.10;
use Getopt::Long qw( :config bundling permute no_getopt_compat );
use Pod::Usage;
use rlib '../lib';

use Pod::Usage;
use Bio::BPWrapper;
use Bio::BPWrapper::AlnManipulations;

################################################################################
# Option parsing
################################################################################
my %opts;
GetOptions(
    \%opts,
    "help|h",
    "man",
    "avpid|a",
    "bootstrap|b",
    "codon-view|c:20",    # Optional value, default 20
    "delete|d=s",
    "nogaps|g",
    "input|i=s",
    "length|l",
    "match|m",
    "numseq|n",
    "output|o=s",
    "pick|p=s",
    "refseq|r=s",
    "slice|s=s",
    "split-cdhit=s",
    "uniq|u",
    "varsites|v",
    "window|w:30",
    "concat|A",
    "conblocks|B:6",
    "consensus|C:50",
    "dna2pep|D",
    "erasecol|E=s",
    "noflatname|F",
    "aln-index|I=s",
    "listids|L",
    "permute-states|M",
    "pep2dna|P=s",
    "resample|R:i",    # Optional value, default is floor of num_sequences/2
    "shuffle-sites|S",
    "select-third",
    "remove-third",
    "uppercase", # make upper case (for DNAStatistics)
    "gapstates",
    "gapstates2",
    "random-slice=i",
    "trimends",
    "bin-inform", # extract binary & informative sites (for clique): discard constant, 3/4-states, non-informative
    "version|V",
    "binary", # 0/1 for binary sites (for phylip lagacy programs)
    "phy-nonint" # non-interleaved phylip (for e.g.,clique)
  #  "dnadist|D=s", # Needs fixing
  #  "inform|Z",
) or pod2usage(2);

use constant PROGRAM => File::Basename::basename(__FILE__);
Bio::BPWrapper::print_version(PROGRAM) if $opts{"version"};

initialize(\%opts);
write_out(\%opts);

################# POD Documentation ############

__END__
=encoding utf8

=head1 NAME

bioaln - L<Bio::Perl> Alignment utility wrappers for use
L<Bio::AlignIO>, L<Bio::SimpleAlign> and L<Bio::Align::Utilities>

=head1 SYNOPSIS

B<bioaln> I<options> [I<alignment_file>]

B<bioaln> [C<-h> | C<--help> | --C<v> | C<--version> C<--man>]

=head1 DESCRIPTION

B<bioaln> performs common, routine manipulations of sequence
alignments. By default, B<bioaln> assumes that both the input and the
output files are in CLUSTALW format so that multiple C<bioaln> runs
can be chained with UNIX pipes. Upper-case options are less commonly
used.

=head2 OPTIONS

=head3 Alignment Descriptors

=over 4

=item --avpid, -a

Calculate the average percent identity of an alignment. Returns the
value alone.

Wraps L<Bio::SimpleAlign-E<gt>average_percentage_identity()|https://metacpan.org/pod/Bio::SimpleAlign#average_percentage_identity>.

=item --gapstate

   bioaln --gapstate <alignment_file>

=item --length, -l

Print alignment length. Wraps L<Bio::SimpleAlign-E<gt>length()|https://metacpan.org/pod/Bio::SimpleAlign#length>.

   bioaln --length <alignment_file>

=item --listids, -L

List all sequence ids.

   bioaln --listids <alignment_file>

=item --numseq, -n

Get number of sequences in alignment.

   bioaln --numseq <alignment_file>

=item --window, -w

Calculate pairwise average sequence difference by windows (overlapping windows with fixed step of 1). Default value for window_size is 30.

   bioaln --window window_size <alignment_file>

=back

=head3 Alignment Viewers

=over 4

=item --codon-view, -c

Print a CLUSTALW-like alignment, but separated by codons. Intended for use with DNA sequences. Block-final position numbers are printed at the end of every alignment block at the point of wrapping, and block-initial counts appear over first nucleotide in a block.

If invoked as C<--codon-view=>I<n> where I<n> is some number, will print I<n> codons per line. Other normally stackable options, such as C<--match>, can be used alongside it. If piping through bioaln, ensure codon-view is used in the last invocation.

For C<bioaln -c input_DNA.aln> when C<input_DNA.aln> contains:

 Seq1   ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAAATAAGC
 Seq2   ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAAATAAGC
 Seq3   ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAAATAAGT
 Seq4   ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAAATAAGT
 Seq5   ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAATAAGC
 Seq6   ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAATAAGT
        ******** ** * *** *************** **** ********

you get:

                                                                       4
         1                                                             8
  Seq1   ATG AAT AAA AAG ATA TAC AGC ATA GAA GAA TTA ATA GAT AAA ATA AGC
  Seq2   ATG AAT AAT AAA ATA TAC AGC ATA GAA GAA TTA ATA GAT AAA ATA AGC
  Seq3   ATG AAT AAA AAG ATA TAT AGC ATA GAA GAA TTA GTA GAT AAA ATA AGT
  Seq4   ATG AAT AAA AAA ACA TAT AGC ATA GAA GAA TTA ATA GAT AAA ATA AGT
  Seq5   ATG AAT AAA AAA ATA TAT AGC ATA GAA GAA TTA ATA GAC AAA ATA AGC
  Seq6   ATG AAT AAA AAA ATA TAT AGC ATA GAA GAA TTA ATA GAC AAA ATA AGT

=item --match, -m

Go through all columns and change residues identical to the reference sequence to be the match character, '.' Wraps L<Bio::SimpleAlign-E<gt>match()|https://metacpan.org/pod/Bio::SimpleAlign#match>.

   bioaln --match <alignment_file>

With

For input:

 Seq1           ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 Seq2           ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACGATAAGC
 Seq3           ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA--ATAAGC
 Seq4           ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA--ATAAGT
 Seq5           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 Seq6           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA--ATAAGT
                ******** ** * *** *************** **** ***  *****

C<bioaln --match input.aln> gives:

 Seq1           ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 Seq2           .................C...............A........CG.....C
 Seq3           ........T..A.....C...............A...............C
 Seq4           ...........A.C...................A................
 Seq5           ...........A.....................A....C...C......C
 Seq6           ...........A.....................A....C...........

=back

=head3 Alignment filters which produce a new alignment

=over 4

=item --consensus, -C

Add a consensus sequence to the end of the alignment with a certain
threshold percent and id Consensus_<percent>. By default percent is
50. Wraps
L<Bio::SimpleAlign-E<gt>consensus_string()|https://metacpan.org/pod/Bio::SimpleAlign#consensus_string>.

   bioaln --consensus 'percent' <alignment_file>

For C<bioaln --consensus '90' input.aln> where C<input.aln> contains:

 Seq1                  MNNKIYSIEELIDKISMPVVAYAGEAKSFLREALEYAKNK
 Seq2                  MNKKTYSIEELIDKISMPVVAYSGEAKSFLREALEHAKNK
 Seq3                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq4                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq5                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq6                  MNKKIYSIEELVDKISMPVVAYSGEAKSFLREALEYAKNK
                       **:* ******:**********:************:****

you get

 Seq1                  MNNKIYSIEELIDKISMPVVAYAGEAKSFLREALEYAKNK
 Seq2                  MNKKTYSIEELIDKISMPVVAYSGEAKSFLREALEHAKNK
 Seq3                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq4                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq5                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq6                  MNKKIYSIEELVDKISMPVVAYSGEAKSFLREALEYAKNK
 Consensus_90          MN?K?YSIEEL?DKISMPVVAY?GEAKSFLREALE?AKNK
                       ** * ****** ********** ************ ****

=item --delete, -d

Delete sequences based on their id. Option takes a comma-separated list of ids.

   bioaln --delete 'seq_id_1, seq_id_2, ... , seq_id_n' <alignment_file>

=item --dna2pep, -D

Turn a CDS alignment to a corresponding protein alignment. Wraps
L<Bio::Align::Utilities-E<gt>dna_to_aa_aln()|https://metacpan.org/pod/Bio::Align::Utilities#dna_to_aa_al>.

   bioaln --dna2pep <cds_alignment>

=item --erasecol, -E

Remove columns with gap in designated sequence.

   bioaln --erasecol 'seq_id' <alignment_file>

For C<bioaln --erasecol 'Seq5' input.aln> where C<input.aln> contains:

 Seq1           ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 Seq2           ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACGATAAGC
 Seq3           ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA--ATAAGC
 Seq4           ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA--ATAAGT
 Seq5           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 Seq6           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA--ATAAGT
                ******** ** * *** *************** **** ***  *****

you get output:

 Seq1           ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA-ATAAGT
 Seq2           ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACATAAGC
 Seq3           ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA-ATAAGC
 Seq4           ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA-ATAAGT
 Seq5           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAACATAAGC
 Seq6           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA-ATAAGT
                ******** ** * *** *************** **** *** *****

=item --input, -i

Input file format. See L<Bio::AlignIO> for supported formats. By
default, this is 'clustalw'. Wraps L<Bio::AlignIO-E<gt>new()|https://metacpan.org/pod/Bio::AlignIO#new>.

   bioaln --input 'format'

=item --nogaps, -g

Remove gaps (and returns an de-gapped alignment). Wraps
L<Bio::SimpleAlign-E<gt>remove_gaps()|https://metacpan.org/pod/Bio::SimpleAlign#remove_gaps>.

   bioaln --nogaps <alignment_file>

=item --output, -o

Output file format. see L<Bio::AlignIO> for supported formats. By
default, this is 'clustalw'. Wraps
L<Bio::AlignIO-E<gt>new()|https://metacpan.org/pod/Bio::AlignIO#new>.

   bioaln --output 'format'

=item --pick, -p

Pick sequences based on their id. Option takes a comma-separated list of ids.

   bioaln --pick 'seq_id_1, seq_id_2, ... , seq_id_n' <alignment_file>

=item --refseq, -r I<seqid>

Change the reference sequence to be I<seq_id>. Wraps
L<Bio::SimpleAlign-E<gt>set_new_reference()|https://metacpan.org/pod/Bio::SimpleAlign#set_new_reference>.

   bioaln --refseq 'seq_id' <alignment_file>

=item --slice, -s

Get a slice of the alignment. Wraps
L<Bio::SimpleAlign-E<gt>slice()|Bio::SimpleAlign#https://metacpan.org/pod/Bio::SimpleAlign#slice>
with improvements.

Using a '-' character in the first or second position defaults to the beginning or end, respectively. Therefore specifying -s'-,-' is the same as grabbing the whole alignment.

   bioaln --slice 'min,max' <alignment_file>

--slice'20,80' or --slice'20,80' or -s='20,80' or --slice='20,80'   Slice from position 20 to 80, inclusive.
--slice'-,80'  Slice from beginning up to, and including position 80
--slice'20,-'  Slice from position 20 up to, and including, the end of the alignment

NOTE: --slice'-,x' (where x is '-' or a position) does NOT work. Use --slice='-,x' instead.

=item --varsites, -v

Extracts variable sites. B<Used in conjunction with -g:> do not show sites with gaps in any sequence.

   bioaln --varsites <alignment_file>

=item --uniq, -u.

Extract the alignment of unique sequences. Wraps L<Bio::SimpleAlign-E<gt>uniq_seq()|https://metacpan.org/pod/Bio::SimpleAlign#uniq_seq>.

   bioaln --uniq <alignment_file>

For C<bioaln --uniq input.aln> where C<input.aln> contains:

 seq1                  ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 seq11                 ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 seq2                  ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACGATAAGC
 seq3                  ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA--ATAAGC
 seq4                  ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA--ATAAGT
 seq5                  ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 seq7                  ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 seq6                  ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA--ATAAGT
                       ******** ** * *** *************** **** ***  *****

you get:

 seq1	ST1
 seq11	ST1
 seq2	ST2
 seq3	ST3
 seq4	ST4
 seq5	ST5
 seq7	ST5
 seq6	ST6

 ST1                   ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 ST2                   ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACGATAAGC
 ST3                   ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA--ATAAGC
 ST4                   ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA--ATAAGT
 ST5                   ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 ST6                   ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA--ATAAGT
                       ******** ** * *** *************** **** ***  *****

=item --pep2dna, -P

Align CDS sequences according to their corresponding protein
alignment. Wraps
L<Bio::Align::Utilities-E<gt>aa_to_dna_aln()https://metacpan.org/pod/Bio::Align::Utilities#aa_to_dna_aln>.

   bioaln --pep2dna 'unaligned_cds.fas' <protein_alignment>

=item --uppercase

Make an uppercase alignment.

   bioaln --uppercase <alignment_file>

=back

=head3 Evolutionary Analysis

=over 4

=item --bootstrap, -b

Produced a bootstrapped alignment. Wraps
L<Bio::Align::Utilities-E<gt>bootstrap()|https://metacpan.org/pod/Bio::Align::Utilities#bootstrap_replicates>.

Note that only a single alignment is produced. To produce multiple
bootstraped alignment, use BASH loop (see below)

Here is an example in bash:

   # 10 bootstrapped alignments
   for i in {1..10}; do
      bioaln --bootstrap foo.aln > foo.boot-$i.aln;
   done

=item --conblocks, -B

Extract perfectly conserved blocks (PCBs, gap excluded) from an alignment, each to a new clustalw file. Default minimum length of PCB is 6 sites.

   bioaln --conblocks <alignment_file>

With C<bioaln --conblocks input.aln> where C<input.aln> is:

 Seq1           ATGAATAAAAAGATATATAGCATAGAAGAATTAGTAGATAAA--ATAAGT
 Seq2           ATGAATAAAAAGATATACAGCATAGAAGAATTAATAGATAAACGATAAGC
 Seq3           ATGAATAATAAAATATACAGCATAGAAGAATTAATAGATAAA--ATAAGC
 Seq4           ATGAATAAAAAAACATATAGCATAGAAGAATTAATAGATAAA--ATAAGT
 Seq5           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAAC-ATAAGC
 Seq6           ATGAATAAAAAAATATATAGCATAGAAGAATTAATAGACAAA--ATAAGT
                ******** ** * *** *************** **** ***  *****

you get:

 nuc.aln.slice-1.aln : file contents below. Site positions indicated after the '/'

 Seq1/1-8              ATGAATAA
 Seq2/1-8              ATGAATAA
 Seq3/1-8              ATGAATAA
 Seq4/1-8              ATGAATAA
 Seq5/1-8              ATGAATAA
 Seq6/1-8              ATGAATAA
                       ********
 nuc.aln.slice-19.aln:

 Seq1/19-33            AGCATAGAAGAATTA
 Seq2/19-33            AGCATAGAAGAATTA
 Seq3/19-33            AGCATAGAAGAATTA
 Seq4/19-33            AGCATAGAAGAATTA
 Seq5/19-33            AGCATAGAAGAATTA
 Seq6/19-33            AGCATAGAAGAATTA
                       ***************

 nuc.aln.slice-40.aln

 Seq1/40-47            AAAATAAG
 Seq2/40-47            AAAATAAG
 Seq3/40-47            AAAATAAG
 Seq4/40-47            AAAATAAG
 Seq5/40-47            AAAATAAG
 Seq6/40-47            AAAATAAG
                       ********

=item --concat, -A

Concatenate multiple alignments sharing the same set of unique
IDs. This is normally used for concatenating individual gene
alignments of the same set of samples to a single one for making a
"supertree". Wraps L<Bio::Align::UtilitiesE<gt>cat()|https://metacpan.org/pod/Bio::Align::Utilities#cat>.

   bioaln --concat gene1.aln gene2.aln gene3.aln gene4.aln

or using wildcard to specify files:

   bioaln --concat gene*.aln

=item --noflatname, -F

By default, sequence names do not contain 'begin-end'. This option
turns ON 'begin-end' naming. Wraps
L<Bio::SimpleAlign-E<gt>set_displayname_flat()|https://metacpan.org/pod/Bio::SimpleAlign#set_displayname_flat>.

   bioaln --noflatname <alignment_file>

=item --aln-index, -I

Identify aligned position of a residue. Wraps
L<Bio::SimpleAlign-E<gt>column_from_residue_number()|https://metacpan.org/pod/Bio::SimpleAlign#column_from_residue_number>.

   bioaln --aln-index 'seq_id,position' <alignment_file>

=item --permute-states, -M

Generate an alignment with randomly permuted residues at each site. This operation removes phylogenetic signal among aligned sequences, if there is any in the original alignment. This is the basis of the Permutation Trail Prob (PTP) test of tree-ness of an alignment.

   bioaln --permute-states <alignment_file>

=item --resample, -R

Picks I<n> random sequences from input alignment and produces a new alignment consisting of those sequences.

If n is not given, default is the number of sequences in alignment divided by 2, rounded down.

This functionality uses an implementation of Reservoir Sampling, based on the algorithm found here: http://blogs.msdn.com/b/spt/archive/2008/02/05/reservoir-sampling.aspx

   bioaln --resample 'n' <alignment_file>

For C<bioaln --resample 4 input.aln> where C<input.aln> contains

 Seq1                  MNNKIYSIEELIDKISMPVVAYAGEAKSFLREALEYAKNK
 Seq2                  MNKKTYSIEELIDKISMPVVAYSGEAKSFLREALEHAKNK
 Seq3                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq4                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq5                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq6                  MNKKIYSIEELVDKISMPVVAYSGEAKSFLREALEYAKNK
                       **:* ******:**********:************:****

you get:

 Seq2                  MNKKTYSIEELIDKISMPVVAYSGEAKSFLREALEHAKNK
 Seq3                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq4                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq6                  MNKKIYSIEELVDKISMPVVAYSGEAKSFLREALEYAKNK
                       **** ******:***********************:****

=item --shuffle-sites, -S

Make a shuffled (not bootstraped) alignment. This operation randomizes alignment columns. It is used for testing the signficance of long-runs of conserved sites in an alignment (e.g., conserved intergenic spacers [IGSs]).

   bioaln --shuffle-sites <alignment_file>

For bioaln --shuffle-sites input.aln

 Seq1                  MNNKIYSIEELIDKISMPVVAYAGEAKSFLREALEYAKNK
 Seq2                  MNKKTYSIEELIDKISMPVVAYSGEAKSFLREALEHAKNK
 Seq3                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq4                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq5                  MNKKIYSIEELIDKISMPVVAYSGEAKSFLREALEYAKNK
 Seq6                  MNKKIYSIEELVDKISMPVVAYSGEAKSFLREALEYAKNK
                       **:* ******:**********:************:****

you get:

 Seq1                  VIEELKYEKAAEAFIPNISEDASGKIRLKLMSVNAYKMYN
 Seq2                  VIEELKYEKAAEAFIPNISEDASGKTRLKLMSVKSYKMHN
 Seq3                  VIEELKYEKAAEAFIPNISEDASGKIRLKLMSVKSYKMYN
 Seq4                  VIEELKYEKAAEAFIPNISEDASGKIRLKLMSVKSYKMYN
 Seq5                  VIEELKYEKAAEAFIPNISEDASGKIRLKLMSVKSYKMYN
 Seq6                  VIEELKYEKAAEAFVPNISEDASGKIRLKLMSVKSYKMYN
                       **************:********** *******::***:*

=item --select-third

Generate an alignment of every-third (mostly synonymous) bases (assuming a CDS alignment).

=back

=head3 Common Options

=over 4

=item --help, -h

Print a brief help message and exit.

=item --man

Print the manual page and exit.

=item --version, -V

Print current release version of this command and exit.

=back

=head1 EXAMPLES

=head2 Alignment Descriptors

 bioaln --avpid input.aln            # average percent identity
 bioaln --gapstates  aln_file        # identify unique gaps (start, end, frequency, whether in bioaln --index 'B31,100' aln_file   # get aln column index of seq 'B31' residue 100
 bioaln --length aln_file            # length of an alignment
 bioaln --listids aln_file           # list all sequence IDs
 bioaln --numseq aln_file            # number of aligned sequences
 bioaln --window '30' aln_file       # average identifies for sliding [w]indows of 30
ternal)

=head2 Alignment Viewers

 bioaln --codon-view aln_file             # codon view (groups of 3 nts)
 bioaln --match input.aln                 # match view (show variable sites)

=head2 Alignment filters which produce a new alignment

 bioaln --consensus '90' aln_file         # add a 90% consensus sequence
 bioaln --delete 'Seq1,Seq2' aln_input    # delete sequences
 bioaln --dna2pep cds.aln                 # DNA alignment => protein alignment
 bioaln --erasecol 'Seq5' input.aln       # erase sites gapped at Seq5
 bioaln --input 'fasta' fasta_aln_file    # input is a FASTA alignment (CLUSTALW is dafault)
 bioaln --nogaps aln_file                 # remove gapped sites
 bioaln --output 'fasta' aln_file         # output a FASTA alignment (CLUSTALW is dafault)
 bioaln --pep2dna 'cds.fas' pep.aln       # Back-align CDS seqs according to protein alignment
 bioaln --pick 'Seq1, Seq2' aln_input     # pick sequences
 bioaln --refseq 'seq_id' aln_file        # change reference (1st) sequence
 bioaln --slice '10,20'                   # alignment slice from 10-20
 bioaln --uniq aln_file                   # remove redundant sequences
 bioaln --upperasealn_file                # turn into upper-case
 bioaln --varsites aln_file               # show only variable sites

=head2 Evolutionary Analysis

 bioaln --concat *.aln               # concattenate aln files
 bioaln --conblocks aln_file         # extract conserved blocks
 bioaln --shuffle-sitesaln_file      # shuffle sites (for testing conserved blocks)
 bioaln --resample '10' aln_file     # [R]e-sampled an alignment of 10 sequences
 bioaln --bootstrap aln_file         # bootstrap an alignment (for testing branch stability)
 bioaln --permute-states aln_file    # permute at each site (for testing tree-ness)
 bioaln --remove-third aln_file      # remove [T]hird site (assume coding sequences)

=head2 change alignment format

 bioaln --input 'fasta' --output 'phylip'       # FASTA => PHYLIP
 bioaln --input 'fasta' --output 'pmal'         # FASTA => PAML

=head2 Chaining with pipes

 # Read a FASTA alignment, slice it, and calcualte percent identity:
 bioaln -i'fasta' fasta.aln | bioaln -s'10,20' | bioaln -a

 # Chaining with bioseq:
 # Turn a coding-sequence alignment into a protein alignemnt (an alternative to C<--dna2pep>):

   bioaln --output fasta cds.aln | bioseq --translate 1 | bioaln --input fasta

=head1 SEE ALSO

=over 4

=item *

L<Qiu Lab wiki page|http://diverge.hunter.cuny.edu/labwiki/Bioutils>

=item *

L<Github project wiki page|https://github.com/rock/p5-bpwrapper>

=item *

L<bioseq>: a wrapper of BioPerl class L<Bio::Seq> with additional methods

=back

=head1 CONTRIBUTORS

=over 4

=item *
William McCaig <wmccaig at gmail dot com>

=item *
Girish Ramrattan <gramratt at gmail dot com>

=item  *
Che Martin <che dot l dot martin at gmail dot com>

=item  *
Yözen Hernández yzhernand at gmail dot com

=item *
Levy Vargas <levy dot vargas at gmail dot com>

=item  *
L<Weigang Qiu|mailto:weigang@genectr.hunter.cuny.edu> (Maintainer)

=item *
Rocky Bernstein

=back

=cut
