# TOOL dimont-predictor.R: DimontPredictor (DimontPredictor lets you predict binding sites for a motif discovered by Dimont on supplied input data.)
# INPUT model.xml: "Dimont model" TYPE GENERIC (The XML representation of a motif model created using Dimont. Models are available from the Dimont output, one for each motif discovered.)
# INPUT seqdata.fa: "Input sequences" TYPE GENERIC (The input sequences for de-novo motif discovery in annotated FastA format. The required format can be generated using the \"Dimont data extractor\".)
# OUTPUT dimont-predictor.log: Logfile (Logfile of the DimontPredictor run.)
# OUTPUT dimont-predictor-predictions.txt: Predictions (Binding sites predicted by DimontPredictor.)
# OUTPUT dimont-predictor-logo-rc.png: "Sequence logo (rc\)" (The sequence logo of the reverse complement of the predictions.)
# OUTPUT dimont-predictor-logo.png: "Sequence logo" (The sequence logo of the predictions.)
# PARAMETER pval: "p-value" TYPE DECIMAL FROM 0 TO 1 DEFAULT 0.001 (The maximum p-value allowed for predicted binding sites.)
# PARAMETER valtag: "Value tag" TYPE STRING DEFAULT "signal" (The tag for the value information in the FastA-annotation of the input file, default as generated by \"Dimont data extractor\".)
# PARAMETER OPTIONAL wf: "Weighting factor" TYPE DECIMAL FROM 0 TO 1 DEFAULT 0.2 (The value for weighting the data, a value between 0 and 1. Recommended values: 0.2 for ChIP-seq/ChIP-exo, 0.01 for PBM data.)

system("perl -n -i -e \'if(/^>/) { print \"\\n$_\"; } else { s/\\n//g && print }\' seqdata.fa")

fasta_file <- scan("seqdata.fa", what="list", sep="\n", blank.lines.skip=F)
fasta_len  <- sapply(fasta_file, nchar)
if(min(fasta_len) == 0) {
	remove_lines <- c(which(fasta_len==0) -1, which(fasta_len==0))
	remove_lines <- remove_lines[remove_lines>0]
	fasta_file <- fasta_file[-c(remove_lines)]
}
cat(fasta_file, file="seqdata.fa", sep="\n")

tool<-file.path(chipster.tools.path,"dimont","DimontPredictor.jar");

command<-paste("java -Xms512M -Xmx2G -Djava.awt.headless=true -jar ",tool,
			   " dimont=model.xml",
			   " data=seqdata.fa",
			   " infix=dimont-predictor",
			   " value=",valtag,
			   " weightingFactor=",wf,
			   " p-value=",pval,
			   " > dimont-predictor.log",sep="",collapse="");

system(command);
