#!/usr/bin/env perl
package Bio::AssemblyImprovement::Bin::ImproveAssembly;
# ABSTRACT: Given an assembly, some reads and optionally a reference, reduce the number of contigs and fill gaps.
# PODNAME: improve_assembly


BEGIN { unshift( @INC, '../lib' ) }
use lib "/software/pathogen/internal/prod/lib";
use Moose;
use Getopt::Long;
use Cwd;
use Cwd 'abs_path';
use File::Path qw(make_path);
use Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles;
use Bio::AssemblyImprovement::Scaffold::SSpace::Iterative;
use Bio::AssemblyImprovement::FillGaps::GapFiller::Iterative;
use Bio::AssemblyImprovement::Abacas::Iterative;
use Bio::AssemblyImprovement::Validate::Executable;

my ( $assembly_file, $forward_reads_file, $reverse_reads_file, $insert_size, $scaffolder_exec, $abacas_exec,$gapfiller_exec, $debug,$reference,$output_directory, $help );

GetOptions(
    'a|assembly=s'        => \$assembly_file,
    'f|forward_fastq=s'   => \$forward_reads_file,
    'r|reverse_fastq=s'   => \$reverse_reads_file,
    'c|reference=s'       => \$reference,
    'i|insert_size=i'     => \$insert_size,
    's|scaffolder_exec=s' => \$scaffolder_exec,
    'b|abacas_exec=s'     => \$abacas_exec,
    'g|gapfiller_exec=s'  => \$gapfiller_exec,
    'd|debug'             => \$debug,
    'o|output_directory=s' => \$output_directory,
    'h|help'              => \$help,
);

( defined($assembly_file) && defined($forward_reads_file) && defined($reverse_reads_file) && ( -e $assembly_file ) && ( -e $forward_reads_file ) && ( -e $reverse_reads_file ) && !$help ) or die <<USAGE;
Usage: improve_assembly [options]
Take in an assembly in FASTA format,reads in FASTQ format, and optionally a reference and produce a a better reference using Abacas/SSpace and GapFiller.

# Improve the assembly without a reference
improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq 

# Provide a reference
improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq  -c my_reference.fa

# Gzipped input files are accepted
improve_assembly -a contigs.fa.gz -f 123_1.fastq.gz -r 123_2.fastq.gz

# Insert size defaults to 250 if not specified
improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq -i 3000

# Output to a specific directory
improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq -o my_directory

# This help message
improve_assembly -h

USAGE

$debug           ||= 0;
$insert_size     ||= 250;
$scaffolder_exec ||= '/software/pathogen/external/apps/usr/local/SSPACE-BASIC-2.0_linux-x86_64/SSPACE_Basic_v2.0.pl';
$gapfiller_exec  ||= '/software/pathogen/external/apps/usr/local/GapFiller_v1-11_linux-x86_64/GapFiller.pl';
$scaffolder_exec = 'SSPACE_Basic_v2.0.pl' unless(Bio::AssemblyImprovement::Validate::Executable->new()->does_executable_exist($scaffolder_exec));
$gapfiller_exec = 'GapFiller.pl' unless(Bio::AssemblyImprovement::Validate::Executable->new()->does_executable_exist($gapfiller_exec));

$abacas_exec     ||= 'abacas.pl';
$output_directory ||= getcwd();
$output_directory  = abs_path($output_directory);
make_path($output_directory);

my @input_files = ( $forward_reads_file, $reverse_reads_file );

my $preprocess_input_files = Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles->new(
    input_files    => \@input_files,
    input_assembly => $assembly_file,
    reference      => $reference,
);
my $process_input_files_tmp_dir_obj = $preprocess_input_files->_temp_directory_obj();

# scaffold and extend contigs
my $scaffolding_obj = Bio::AssemblyImprovement::Scaffold::SSpace::Iterative->new(
    input_files     => $preprocess_input_files->processed_input_files,
    input_assembly  => $preprocess_input_files->processed_input_assembly,
    insert_size     => $insert_size,
    scaffolder_exec => $scaffolder_exec,
    debug           => $debug,
    output_base_directory => $output_directory
);
$scaffolding_obj->run();

my $scaffolding_output = $scaffolding_obj->final_output_filename;

# order contigs on an assembly
if(defined($reference))
{
  $scaffolding_obj = Bio::AssemblyImprovement::Abacas::Iterative->new(
    reference      => $preprocess_input_files->processed_reference,
    input_assembly => $scaffolding_output,
    abacas_exec    => $abacas_exec,
    debug          => $debug,
    output_base_directory => $output_directory
  );
  $scaffolding_obj->run();
}


# fill gaps
my $fill_gaps_obj = Bio::AssemblyImprovement::FillGaps::GapFiller::Iterative->new(
    input_files     => $preprocess_input_files->processed_input_files,
    input_assembly  => $scaffolding_obj->final_output_filename,
    insert_size     => $insert_size,
    gap_filler_exec => $gapfiller_exec,
    debug           => $debug,
    _output_prefix  => 'gapfilled',
    output_base_directory => $output_directory
)->run();

__END__

=pod

=head1 NAME

improve_assembly - Given an assembly, some reads and optionally a reference, reduce the number of contigs and fill gaps.

=head1 VERSION

version 1.131890

=head1 SYNOPSIS

Given an assembly, some reads and optionally a reference, reduce the number of contigs and fill gaps.

   # Improve the assembly without a reference
   improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq 

   # Provide a reference
   improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq  -c my_reference.fa

   # Gzipped input files are accepted
   improve_assembly -a contigs.fa.gz -f 123_1.fastq.gz -r 123_2.fastq.gz

   # Insert size defaults to 250 if not specified
   improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq -i 3000

   # Output to a specific directory
   improve_assembly -a contigs.fa -f 123_1.fastq -r 123_2.fastq -o my_directory

   # This help message
   improve_assembly -h

=head1 AUTHOR

Andrew J. Page <ap13@sanger.ac.uk>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.

This is free software, licensed under:

  The GNU General Public License, Version 3, June 2007

=cut
