/*
 * PEAK-SEQ -- PREPROCESSING
 * Paper by Joel Rozowsky, et. al.
 * Coded in C by Theodore Gibson.
 * Main program.
*/


#include <stdio.h>
#include <string.h>
#include "sgr.h"
#include "io.h"
#include "util.h"
#include "config.h"

//-------------------------------------------------
// MAIN PROGRAM
//-------------------------------------------------

// JH --
void preprocessMain(int *l_READ_LENGTH,
						 int *l_MIN_CNUM,
						 int *l_MAX_CNUM,
						 int *l_START,
						 int *l_STOP,
						 int *l_FILENAME_LENGTH,
						 char **l_INPUT_FILENAME,
						 char **l_ELAND_PREFIX, 
						 char **l_ELAND_SUFFIX,
						 char **l_SGR_PREFIX,
						 char **l_SGR_SUFFIX)
{
	printf("Reading input from R");
	READ_LENGTH = *l_READ_LENGTH;
	MIN_CNUM = *l_MIN_CNUM;
	MAX_CNUM = *l_MAX_CNUM;
	N_CHRS = MAX_CNUM - MIN_CNUM + 1;
	START = *l_START;
	STOP = *l_STOP;

	FILENAME_LENGTH = *l_FILENAME_LENGTH;
	INPUT_FILENAME = *l_INPUT_FILENAME;
	ELAND_PREFIX = *l_ELAND_PREFIX;
	ELAND_SUFFIX = *l_ELAND_SUFFIX;
	SGR_PREFIX = *l_SGR_PREFIX;
	SGR_SUFFIX = *l_SGR_SUFFIX;
	// -- JH
	printf("Input read from R");

	// Open the file.
	FILE* in = fopen( INPUT_FILENAME, "r" );

	// Check that the file opens correctly.  Otherwise, quit the program.
	if( in == NULL )
		fatal("Error opening input file %s. Program exiting.", INPUT_FILENAME);


//*****
//*****OPEN THE ELAND OUTPUT FILES FOR EACH CHROMOSOME.
//*****

	// Allocate memory for the string holding each filename.
	char filename[FILENAME_LENGTH];

	// Allocate memory for an array of 25 file handles.
	FILE** elands = safe_malloc( N_CHRS * sizeof(FILE*) );

	// This loop runs once for each chromosome.
	for(int cnum = MIN_CNUM; cnum <= MAX_CNUM; cnum++)
	{
		// Get the string representation (from chr1 to chrM)
		// for this chromosome.
		String cname = getCname( cnum );

		// Open this file for output.
		strcpy( filename, ELAND_PREFIX );
		strcat( filename, cname);
		strcat( filename, ELAND_SUFFIX );
		elands[cnum-MIN_CNUM] = fopen( filename, "w" );

		// Free the chromosome name.
		free( cname );

		// Make sure the file opens correctly.
		if( elands[cnum-MIN_CNUM] == NULL )
		{
			fclose( in );
			for(int i = 0; i < cnum-MIN_CNUM; i++)
				fclose( elands[i] );
			free( elands );
			fatal("Error opening file %s for output.\nProgram can create files "
				"but not directories. Program exiting.", filename);
		}
	}

	// Initialize the pos array for an array for each chromosome.
	Ps* array = newPsArray();


//*****
//*****READ AND PROCESS INPUT FILE.
//*****

	// These local variables are used for temporary storage.
	char line[500];
	char seq[39];
	char cname[6];
	int pos = 0;
	char dir = '\0';
	int cnum = 0;

	// These variables are used in loop and scanf() syntax.
	int ret = 0;
	char ch = '\0';

	// This loop runs until the end of the file is reached.
	while( (ch = getc(in)) != EOF )
	{
		// Return the character used for testing to the file.
		ungetc( ch, in );

		// Get this line from the file.
		ret = fscanf(in, "%499[^\n]", line);

		// If the line was incorrectly read, skip this line.
		if( ret != 1 )
		{
			printf("Error reading file. Line skipped.\n");
			getNewline( in );
			continue;
		}

		// Get the information from this line of the file.
		ret = sscanf(line, "%*s%38s%*s%*s%*s%*s %6[^.]%c%*s%d %c", seq, cname, &ch, &pos, &dir);

		// If the line was incorrectly read, skip this line.
		if( ret != 5 )
		{
			printf("Error reading file. Line skipped.\n");
			getNewline( in );
			continue;
		}

		// If this chromosome field is not in the format chr##.fa (i.e. it is
		// chr##_random.fa, etc.) skip this line.
		if( ch != '.' )
		{
			getNewline( in );
			continue;
		}

		// Get the number of this chromosome.
		cnum = getCnum( cname );

		// Get the ps structure corresponding to this chromosome.
		Ps ps = array[cnum-MIN_CNUM];

		// Make sure this chromosome exists.
		if( cnum < MIN_CNUM || cnum > MAX_CNUM )
		{
			printf("Error reading file. Line skipped.\n");
			getNewline( in );
			continue;
		}

		// Output the line to the correct Eland file.
		fprintf( elands[cnum-MIN_CNUM], "%s\n", line );

		// If this is a reverse read, correct the position start.
		if( dir == 'R' )
		{
			// Get the start position with respect to the forward strand.
			int start = pos + strlen( seq ) - READ_LENGTH;

			// If the read overlaps the end of the chromosome, initialize
			// the start position to the start of the chromosome.
			if( start < 1 ) start = 1;

			// Insert the start position of the read.
			insertPos( ps, start, START );

			// Regardless of whether the read overlaps the start of the
			// chromosome or not, the stop of the read is pos + strlen( seq ).
			insertPos( ps, pos + strlen( seq ), STOP );

			// Skip to the next line in the file.
			getNewline( in );
			continue;
		}

		// If the direction is not reverse or forward, there is an
		// error in this line, so skip the line.
		else if( dir != 'F' )
		{
			getNewline( in );
			continue;
		}

		// Insert this read into the array of positions corresponding to this
		// chromosome.
		insertPos( ps, pos, START );
		insertPos( ps, pos + READ_LENGTH, STOP );

		// Go to the next line in the file.
		getNewline( in );

	}

	// Now that the entire file has been read, close all the Eland output files as
	// well as the input file.
	fclose( in );
	for(int i = 0; i < N_CHRS; i++)
		fclose( elands[i] );
	free( elands );


//*****
//*****PRINT SGR FILES.
//*****

	// Print to the sgr files.
	fprintfSGR( array );
}
