#!/usr/bin/perl
#
# Script to count the proportion of iterations each linkage group
# has QTLs linked to it
#
# Usage: count.pl -i [start iteration][,:-][stop iteration] -f outfile -p posfile
#
# outfile defaults to loki.out
# posfile defaults to loki.pos
#
# Works with loki_2.3
# 
# Simon Heath - September 2000
#
use Getopt::Std;
use strict;
my($model,$fg,%chrom,$lg,@map_length,$map_length,$sex_map,$tmp,$file,%opt,$start,$stop);
my(@chr_length,@map_start,@map_end,$bw);
getopts('qf:p:b:i:h?',\%opt);
if($opt{h} || $opt{'?'}) {
	 print "usage: count.pl -i [start iteration][,:-][stop iteration] -f outfile -p posfile -b binsize\n";
	 exit(0);
}
# -i option sets a range of iterations to consider
if($opt{i}) {
	 $tmp=$opt{i};
	 if($tmp=~/^([1-9][0-9]*)(.*)/) {
		  $start=$1;
		  $tmp=$2;
	 }
	 if($tmp=~/^[,-:](.*)/) {
		  $tmp=$1;
		  if($tmp=~/^([1-9][0-9]*)/) {
				$stop=$1;
		  }
	 }
	 die "Bad -i option\n" if(!defined($start) && !defined($stop));
	 if(defined($start) && defined($stop) && $start>$stop) {
		  $tmp=$start;
		  $start=$stop;
		  $stop=$tmp;
	 }
	 $start=1 if(!defined($start));
}
# Set bin width
$bw=$opt{b}?$opt{b}:1;
#
# Read in linkage group details from loki.out
#
$file=$opt{f}?$opt{f}:"loki.out";
open FILE,$file or die "Could not open file '$file'\n";
while(<FILE>) {
	 # This should only occur if there is an error
	 last if(/^----/);
	 # Read in model
	 $model=$1 if(/^Model: (.+)/);
	 if(/^Linkage groups:$/) {
		  $fg=1;
		  next;
	 }
	 if($fg==1) {
		  # Check for linkage group names and map lengths
		  if(/(\d+): (.*)$/) {
				$lg=$1;
				if($2=~/^(.*)  Map range: (.*)/) {
					 $chrom{$lg}=$1;
					 if($2=~/\((-?[0-9.]+)cM to (-?[0-9.]+)cM\)(.*)/) {
						  $map_start[$lg][0]=$1;
						  $map_end[$lg][0]=$2;
						  $chr_length[$lg]=$2-$1;
						  if($3=~/\((-?[0-9.]+)cM to (-?[0-9.]+)cM\)/) {
								$map_start[$lg][1]=$1;
								$map_end[$lg][1]=$2;
								$chr_length[$lg]+=$2-$1;
								$sex_map=1;
						  }
					 } else { die "Error reading linkage group map range\n"; }
				} else { die "Error reading linkage group name\n"; }
		  } else {
				# Check for total map length
				if(/^Total Map Length: (.*)$/) {
					 if($1=~/(\d\d*\.?\d*)cM(.*)/) {
						  $map_length[0]=$1;
						  $map_length=$1;
						  if($2=~/(\d\d*\.?\d*)cM$/) {
								$map_length[1]=$1;
								$map_length+=$1;
								$sex_map=1;
						  }
					 } else { die "Error reading map lengths\n"; }	
					 $fg=2;
					 last;
				}
		  }
	 }
}
close FILE;
# Now start reading data from loki.pos
$file=$opt{p}?$opt{p}:"loki.pos";
open FILE,$file or die "Could not open file '$file'\n";
my($it,$i,$j,$k,$rep,$rep1,$n,$n_qtl,@cnt,@bf,$p,@dist,%tdist,@dist1,$skip,$sw,$sw1,@nq,$nq1,@nq1,@nq2);
my(@bin,@bin1,$sx,$x,$kk,$max_x,$max_l,$target,$target1,$nlink,@dist_lnk,$lg1);
$target=0;
$target1=100;
# Set up name and map length for unlinked portion of genome
for($sx=0;$sx<=$sex_map;$sx++) {
	 $k=0;
	 foreach $lg(keys %chrom) { $k+=$map_end[$lg][$sx]-$map_start[$lg][$sx]; }
	 if($k>$map_length[$sx]) {
		  print "Total map length $map_length[$sx] less then sum of chromosome lengths $k\nResetting map_length\n";
		  $map_length[$sx]=$k+1; # Add 1 in case we get some unlinked
	 }
	 $chr_length[0]+=$map_length[$sx]-$k;
}
$chrom{0}="Unlinked";
my($istty)=1 if -t STDOUT;
while(<FILE>) {
	 split;
	 $k=@_;
	 # Skip empty lines
	 next if(!$k);
	 # Get number of iterations from end of line
	 if($_[$k-1]=~/(.*):(\d+)/) {
		  $rep=$2;
		  $skip=0;
		  $rep1=$rep;
		  if($opt{i}) {
				if($start>$it+$rep) {$skip=1;} 
				elsif($start>$it+1) {$rep1=$it+$rep+1-$start;}
				if(defined($stop)) {
					 if($stop<=$it) {$skip=2;}
					 elsif($stop<=$it+$rep) {$rep1-=$it+$rep-$stop;}
				}
		  }
		  if($rep1 && !$skip) {
				$_[$k-1]=$1;
				$n_qtl=0;
				$i=0;
				undef @dist1;
				undef @nq;
				undef @bin1;
				$nlink=0;
				# Go though rest of line
				while($i<$k) {
					 $lg=$_[$i++]; # Linkage group
					 $n=$_[$i++];  # Number of QTLs in linkage group
					 $n_qtl+=$n;   # Add on to total QTL count
					 $nq[$lg]=$n;
					 $cnt[$lg]+=$rep1; # Add to count for linkage group
					 $dist1[$lg]=$n;
					 if($lg) {     # If linked, go thought QTL positions
						  $nlink+=$n;
						  $j=0;
						  while($j<$n) {
								for($sx=0;$sx<=$sex_map;$sx++) {
									 $x=$_[$i++]; # Get position
									 $kk=int(($x-$map_start[$lg][$sx])/$bw); # Get offset from start of map, and compute bin
									 $bin1[$sx][$lg]{$kk}=1; # Add count to bin
								}
								$j++;
								die "Invalid number of columns\n" if($i>$k);
						  }
					 }
				}
				for($sx=0;$sx<=$sex_map;$sx++) {
					 if($map_length[$sx]>0) {
						  $p=$bw/$map_length[$sx]; # Prior for 1 QTL being linked to a bin
						  $p=1-((1-$p)**$n_qtl); # Prior of at least 1 QTL in bin
					 } else {
					 	  $p=1;
					 }
					 foreach $lg (keys %chrom) {
						  $j=$bin1[$sx][$lg];
						  foreach $kk(keys %$j) {
								$bin[$sx][$lg][$kk]+=$rep1/$p;
						  }
					 }
				}
				# Check if switch in QTL number has occurred
				if($n_qtl!=$nq1) {
					 $sw++;
					 $nq1=$n_qtl;
				}
				# Check if switch in number linked has occurred
				foreach $lg(keys %chrom) { 
					 if($lg && ($nq[$lg]!=$nq1[$lg])) {
						  $sw1++;
						  $nq1[$lg]=$nq[$lg];
					 }
				}
				# Accumulate QTL distribution counts for linkage groups
				foreach $lg(keys %chrom) {
					 my($lg1);
					 $dist[$lg][$dist1[$lg]]+=$rep1;
					 if($lg>1) {
						  foreach $lg1(keys %chrom) {
								next if(!$lg1 || $lg1>=$lg);
								if($dist1[$lg]>0 && $dist1[$lg1]>0) {
									 $nq2[$lg][$lg1]+=$rep1;
								}
						  }
					 }
				}
				$dist_lnk[$nlink]+=$rep1;
				# and overall QTL distribution counts
				$tdist{$n_qtl}+=$rep1;
				# Compute BF for linkage
				if($n_qtl && $map_length) {
					 $i=0;
					 while($i<$k) {
						  $lg=$_[$i++]; # Linkage group
						  $n=$_[$i++];  # Number of QTLs in linkage group
						  $p=$chr_length[$lg]/$map_length; # Prior for single QTL to linkage group
						  $p=1-((1-$p)**$n_qtl); # Prior of at least 1 QTL on linkage group
						  $bf[$lg]+=$rep1/$p; # Add post/prior ratio to @bf
						  $i+=$n*(1+$sex_map) if($lg); # Skip to next record
					 }	
					 die "Invalid number of columns\n" if($i>$k);
				}
		  }
		  $it+=$rep;
		  if(!$opt{q} && $it>=$target1 && $istty) {
				print "\rAt iteration: $target1";
				while($it>=$target1) {$target1+=100;}
		  }
		  last if($skip==2);
	 }
}
close FILE;
print "\r" if($istty && !$opt{q});
# Print report
print "----------------------------------------------\n";
if($opt{i}) {
	 $stop=$it if(!defined($stop));
	 $it=$stop-$start+1;
}
print"Model: $model\nIterations: $it\n";
if($it) {
	 printf("Switching proportion for QTL no.: %.5f\n",$sw/$it);
	 printf("Switching proportion for linked QTLs:  %.5f\n\n",$sw1/$it);
	 if($map_length) {
		  print "Linkage group    Count   Prop. linked    BF\n";
		  print "----------------------------------------------\n";
	 } 
	 foreach $lg (sort {$a<=>$b} keys %chrom) {
		  if($map_length) {
				printf("%-13s %8d     %.5f     %.5f",$chrom{$lg},$cnt[$lg],$cnt[$lg]/$it,$bf[$lg]/$it);
				if($lg) {
					 for($sx=0;$sx<=$sex_map;$sx++) {
						  $x=$map_start[$lg][$sx]+.5*$bw;
						  $max_l=0;
						  $kk=0;
						  while($x<=$map_end[$lg][$sx]) {
								$j=$bin[$sx][$lg][$kk++]/$it;
								if($j>$max_l) {
									 $max_l=$j;
									 $max_x=$x;
								}
								$x+=$bw;
						  }
						  printf("  %8.2f %7.2fcM",$max_l,$max_x);
					 }
				}
				print "\n";
		  } else {
				printf("%-13s %8d     %.5f\n",$chrom{$lg},$cnt[$lg],$cnt[$lg]/$it);
		  }
	 }
	 print "\n";
	 $i=0;
	 foreach $lg (sort {$a<=>$b} keys %chrom) {
		  next if($lg<1);
		  foreach $lg1(sort {$a<=>$b} keys %chrom) {
				next if(!$lg1);
				last if($lg1>=$lg);
				$i=1;
				printf("%-23s    %.5f\n","$chrom{$lg}-$chrom{$lg1}",$nq2[$lg][$lg1]/$it);
		  }
	 }
	 print "\n" if($i);
	 foreach $i(sort {$b<=>$a} keys %tdist) {$k=$i;last;}
	 print "QTL number ";
	 for($i=0;$i<=$k;$i++) {printf " %6d",$i;}
	 print "\n-------------";
	 for($i=0;$i<=$k;$i++) {printf "-------",$i;}
	 print "\nOverall      ";
	 for($i=0;$i<=$k;$i++) {printf " %.4f",$tdist{$i}/$it;}
	 print "\nLinked       ";
	 for($i=0;$i<=$k;$i++) {printf " %.4f",$dist_lnk[$i]/$it;}
	 print "\n";
	 foreach $lg(sort {$a<=>$b} keys %chrom) {
		  printf("%-13s",$chrom{$lg});
		  for($i=0;$i<=$k;$i++) {printf " %.4f",$dist[$lg][$i]/$it;}
		  print "\n";
		  if(!$lg) {
				print "-------------";
				for($i=0;$i<=$k;$i++) {printf "-------",$i;}
				print "\n";
		  }
	 }
}
