### generate_callable_bases.pl ####################################################################
# Description
# Finds callable bases of the bam file given, documented in bed file

### HISTORY #######################################################################################
# Version           Date            Developer           Comments
# 0.0.1				2015-02-01		Vincent Huang		
#
# 0.1.1             2015-06-16      Lydia Liu           Combine shell scripts, simplify code
# 0.1.2				2015-09-28      Lydia Liu 			Use SGE-Tools
#
### TO DO #########################################################################################


### INCLUDES ######################################################################################
use warnings;
use strict;
use Carp;
use BoutrosLab::Utilities::General;
use Getopt::Long;
use Pod::Usage;
use POSIX qw(strftime);
use Cwd;
use YAML qw(LoadFile);
use File::Path qw(make_path);
use SGE_tools;
use HPCI;
use File::Basename;
use Path::Class;
use Data::Dumper;

use constant DATE => strftime("%C%y-%m-%d", localtime(time()));
use constant SVN => substr(cwd(),0,index(cwd(),'BoutrosLab')) . 'BoutrosLab/';

my $log_output = DATE . '_generate_callable_bases.logs';
open (my $log_fh, '>', $log_output);

### COMMAND LINE DEFAULT ARGUMENTS ################################################################
# list of arguments and default values go here as hash key/value pairs
our %opts = (
	input => undef,
	output_dir => undef,
	min_cov => 10,
	);

### MAIN CALLER ###################################################################################
my $result = main();
close($log_fh); 
exit($result);

### FUNCTIONS #####################################################################################

### main ##########################################################################################
# Description:
#	Main subroutine for program
# Input Variables:
#	%opts = command line arguments
# Output Variables:
#	N/A

sub main {
	# get the command line arguments
	GetOptions(
		\%opts,
		"help|?",
		"man",
		"input=s",
		"output_dir=s",
		"min_cov:i"
		) or pod2usage(64);

	if ($opts{'help'}) { pod2usage(1) };
	if ($opts{'man'}) { pod2usage(-exitstatus => 0, -verbose => 2) };
	
	while(my ($arg, $value) = each(%opts)) {
		if (!defined $value) {
			print "ERROR: Missing argument $arg\n";
			pod2usage(128);
			}
		}

	my $output_dir = dir($opts{'output_dir'});
	my $min_cov = $opts{'min_cov'};

	my $input_bam = $opts{'input'};
	my $path = dirname($input_bam);
	my ($dir) = $path =~m/\/(\w*?$)/;
	my $id = $dir.'_'.basename($input_bam);

	# Change directory to output dir
	chdir $output_dir || croak "Something wrong with $output_dir"; 

	my $output = $id . '.cov'.$min_cov.'.bed';

	&qsub_cb($input_bam, $id, $output, $min_cov, $output_dir);
	
	return 0;
	}

### qsub_cb ####################################################################################
# Description:
#	Submits the callable base jobs 
# Input Variables:
#	input_file
#	sample_name
#	output_file
#	min_cov
#   output_dir
# Output Variables:
#	Error codes if any 

sub qsub_cb {
	my ($input_file, $sample_name, $output_file, $min_cov, $output_dir) = (@_); 
	my @mem = ('8G', '12G', '16G');
	my @queue = ('default', 'short'); 
	my $final_out = $output_file;
	$final_out=~s/.bed/_collapsed.bed/;

	my $array_ref = ['Perl-BL', 'samtools', 'BEDTools'];
	my @job_name = ('cb_'.$sample_name, $final_out);
	my @shell_script_name = ('cb_'.$sample_name.".sh", $final_out.".sh");

	# Prepare qsub command
	my $submit_command_1 = 'samtools view -b ' . $input_file . ' | bedtools genomecov -bga -ibam - | awk \'$4 >= ' . $min_cov . ' {print $0}\' > ' . $output_dir.'/'.$output_file;
	my $submit_command_2 = 'bedtools merge -i ' . $output_dir.'/'.$output_file . ' > ' . $output_dir.'/'.$final_out;

	my $shell_script_cb = BoutrosLab::Utilities::General::create_shell_script(
		file => $shell_script_name[0],
		dir  => "$output_dir",
		command => $submit_command_1,
		modules => $array_ref
		);

	my $shell_script_callapse = BoutrosLab::Utilities::General::create_shell_script(
		file => $shell_script_name[1],
		dir  => "$output_dir",
		command => $submit_command_2,
		modules => $array_ref
		);

	SGE_tools::submit_qsub(
			script        => $shell_script_cb,
			type          => 'bash',
			memory        => 'h_vmem=8G',
			cwd           => 1,
			_e            => "$output_dir",
			_o            => "$output_dir",
			jobname       => $job_name[0],
		);

	SGE_tools::submit_qsub(
			script        => $shell_script_callapse,
			type          => 'bash',
			memory        => 'h_vmem=4G',
			cwd           => 1,
			_e            => "$output_dir",
			_o            => "$output_dir",
			jobname       => $job_name[1],
			hold          => $job_name[0],
		);

	return 0;
	}

__END__


=head1 NAME

generate_callable_bases.pl

=head1 SYNOPSIS

B<generate_callable_bases.pl> [options] [file ...]

	Options:
	--help          brief help message
	--man           full documentation
	--input			input bam file
	--output_dir    output bed files directory
	--min_cov       minimum coverage for callable bases definition (default = 10)

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the manual page.

=item B<--input>

Input bam file.

=item B<--output_dir>

Output bed files directory.

=item B<--min_cov>

Minimum coverage for callable bases definition (default = 10).

=back

=head1 DESCRIPTION

B<generate_callable_bases.pl> 

Finds callable bases of the bam file given, documented in bed file

=head1 EXAMPLE

perl generate_callable_bases.pl --input /path/to/bam --output_dir .

Vincent Huang vhuang -- Boutros Lab

The Ontario Institute for Cancer Research

Lydia Liu lliu -- Boutros Lab

The Ontario Institute for Cancer Research

=head1 ACKNOWLEDGEMENTS

Paul Boutros, PhD, PI - Boutros Lab
