#!/usr/bin/perl
#______________________________________________________________________
# Title     : B (for Bio) or bio_lib.pl
# Usage     : require "B.pl"; ##<-- This is very slow, so you'd better
#             copy the subroutines in your prog. or make a smaller lib files
#               which are classified according to functions(like, Bio_Seq.pl
#               for sequence handling, Bio_Array.pl for various array
#               subroutines..), or make your own module out of this, do whatever
#               you want....
# Function  : This has all the sub routines a biomatic has developed.
#             You can copy any of the sub routines in this file, modify, use
#             in yours...
#             PLEASE MODIFY AS FREELY AS YOU WANT !!
#
#             " I am the programmer and I will try to debug with all my
#             efforts if you have any problems with my codes."
#
#             All my subroutines are tested in small files
#             If you want to have such single example program
#             to see how they really work, pls contact me( A Biomatic )
#             For example, a file called  'handle_arguments.pl' exists to
#             test the subroutine 'handle_arguments'. Usually you can find them
#             in  http://www.mrc-lmb.cam.ac.uk/genomes/jong/B.pl.html
#
#             This was meant to be used in Bioperl project, but I did not
#             have time to make object oriented module out of this. Also,
#             these procedural subroutines might be easier for a lot of
#             people.
#
# Example   : require "B.pl"; BUT, I recommand you take subroutines out and
#             use it directly or modify in your programs.
# Warning   : CopyLEFTed, for the enhancement of Biology, Biomatics, and Science.
#             This is a development companion, nothing else.
#             Class is for classification of my subroutines. If it is B, it can
#             be useful for biological sequence data handling. If it's Utility,
#             it can also be used for general purpose file handling stuff.
#             File, Array, Hash,... are my classification items.
# Keywords  : Biology, perl library, sequence handling lib
# Options   : nothing (used as subroutine library)
#             There are certain package structures;
#               1. Bio is the highest structure. This includes file handling
#                  and utility stuff, too.
#               2. Bio::Seq  is rather specific for sequence handling stuff.
#                  To be sorted and developed later.
#               3. YOU can add, modify or discuss on this structure anytime :-)
#
# Version   : 1.6    (Sept/21/1997)
#------------------------------------------------------------------

## The following box is used as the header for any subroutines developed to
##  give information on the subroutines. It is used by Jong as a template.


#________________________________________________________________________________
# Title     :
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Version   : 1.0
#--------------------------------------------------------------------------------


## Following variables in 'my' are very commonly used ones. I have
## put those to be copied into any new subroutines to be developed
## This is because, in Perl, every variable is global unless you mark them
## to be inside the subroutines. Many BUGs are coming from not localizing vars.
## This array variables are used  as a defalt insertion for the subroutine
## 'handle_arguments'. If you add this box in any sub, 'handle_arguments'
## subroutine will be called and any arguments passed to the subroutine will
## be classified to file, dir, string, hash(as reference), array(as reference),
## pure number, or option(with -) prefix. etc. For more detail look at
## handle_argument's header.

	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""


#________________________________________________________________________
# Title     : handle_arguments
# Usage     : Just put the whole box delimited by the two '###..' lines below
#             to inside of your subroutines. It will call 'handle_arguments'
#             subroutine and parse all the given input arguments.
#             To use, claim the arguments, just use the variable in the box.
#             For example, if you had passed 2 file names for files existing
#             in your PWD(or if the string looks like this: xxxx.ext),
#             you can claim them by $file[0], $file[1] in
#             your subroutine.
# Function  : Sorts input arguments going into subroutines and returns default
#             arrays of references for various types (file, dir, hash, array,,,,)
#             If you give (\@out, @file), it will put @out into @array as a ref
#             and also the contents of @out will be dereferenced and put to
#             raw_string regardless what is in it).
#
# Example   : 'handle_arguments(\@array, $string, \%hash, 8, 'any_string')
# Warning   :
# Keywords  : handling arguments, parsing arguments,
# Options   :
# Returns   : Following GLOBAL variables
#
#             $num_opt,    @num_opt     @file          @dir
#             $char_opt,   @char_opt    %vars          @array,
#             @hash        @string,     @raw_string    @range,
#
#             $num_opt has 10,20
#             @num_opt has (10, 20)
#             @file has  xxxx.ext
#             @dir has  dir  or /my/dir
#             $char_opt has 'A,B'
#             @char_opt has (A, B)
#             @array has  (\@ar1, \@ar2)
#             @hash has (\%hash1, \%hash2)
#             @string  ('sdfasf', 'dfsf')
#             @raw_string (file.ext, dir_name, 'strings',,)
#             @range has values like  10-20
#             %vars deals with x=2, y=3 stuff.
#
# Argument  : any type, any amount
# Version   : 4.8
#--------------------------------------------------------------------
sub handle_arguments{
	my($c, $d, $e, $f, $i, $j, $k, $l, $s, $t, $x, $y, $z, $char_opt, $dir, @hash,
		$file, $in_dir, $num_opt, @char_opt, @dir, @file, @string, @file_dir, @k,
		@num_opt, @raw_string,@string, @array, %vars, @range, @temp, $temp,
		@char_options);

  &set_debug_option;
  if(@_<1){ print chr(7),"\n This is handle_arguments. No args Passed, Error?\n"}
  elsif( (@_ ==1)&& (ref($_[0]) eq 'ARRAY') ){ # when there is only 1 argument
	  push(@array, $_[0]);
	  push(@k, $_[0]);
  }elsif( (@_==1)&&( !ref($_[0]) ) ){
	  if(-f $_[0]){ push(@file, $_[0]);   push(@string, $_[0]) }
	  elsif(-d $_[0]){ push(@dir, $_[0]); push(@string, $_[0]) }
	  elsif($_[0]=~/^\d+$/){ push(@num_opt, $_[0]); $num_opt.=$_[0] }
	  elsif($_[0]=~/^\w+$/){ push(@string, $_[0]); }
  }elsif(@_ >=1){ @k = @_ }

  #####______Start of  general argument handling______######
  for($k=0; $k < @k ;$k++){
	  if( !ref($k[$k]) ){
		  if($k[$k]=~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){  push(@char_opt, $1); $char_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\-([a-zA-Z]+)$/){          ## When multiple option is given,
			  @char_options = split(/\,|/, $1);  push(@char_opt, @char_options);
			  $char_opt .= join("\,", @char_options); ## '-' should be used. eg. '-HEGI'
		  }elsif($k[$k]=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
		  }elsif($k[$k]=~ /^(\-?\d+)$/){ push(@num_opt, $1);  $num_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\d+\.?\d*\-\d+\.?\d*$/){  push(@range,  $k[$k] );
		  }elsif(-f $k[$k]){                          push(@file,   $k[$k] );
		  }elsif(-d $k[$k]){                          push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /\/[\w\d\.\-]+[\/].+[\/]$/){push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^\/[\w\d\.\-]+[\/]*$/){    push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^[\/\w\d\-\.]+\.\w+$/){    push(@file,   $k[$k] );
		  }elsif($k[$k]=~ /\S\/[\/\w\d\-\.]+\.\w+$/){ push(@file,   $k[$k] );
		  }elsif($k[$k]=~/^\w+[\/\\\w\d\.\-]+$/){     push(@string, $k[$k] );
		        # string does not have space, but includes '\', '/', '.'
		  }else{                                      push(@raw_string, $k[$k] );  }

	  }elsif( ref($k[$k]) ){
		  if( ref($k[$k]) eq "SCALAR"){
			 if(${$k[$k]} =~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){ push(@char_opt, $1); $char_opt  .= "$1\,";
				}elsif(${$k[$k]}=~ /^\-([a-zA-Z]+)$/){ push(@char_opt, @char_options);
					$char_opt  .= join("\,", @char_options);  ## as an option string.
				}elsif(${$k[$k]}=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
				}elsif(${$k[$k]}=~ /^(\-?\d+)$/){ $num_opt .= "$1\,";  push(@num_opt, $1);
			    }elsif(${$k[$k]}=~ /^\d+\.?\d*\-\d+\.?\d*$/){    push(@range,  $k[$k] );
				}elsif(-f ${$k[$k]}){                            push(@file,   ${$k[$k]} );
				}elsif(-d ${$k[$k]}){                            push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\/[\/\w\d\.\-]+[\/]*$/){     push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /^[\/\w\d\-\.]+\.\w+$/){      push(@file,   ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\w+[\w\d\.\-]+$/){           push(@string, ${$k[$k]} );
				}else{                                           push(@raw_string, ${$k[$k]}); }
		  }elsif(ref($k[$k]) eq "ARRAY"){ my @temp_arr = @{$k[$k]}; push(@array, $k[$k]);
			for ($i=0; $i<@temp_arr; $i++){
			   if(-f $temp_arr[$i]){                            push(@file, $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/^\d+\.?\d*\-\d+\.?\d*$/){ push(@range,$temp_arr[$i] );
			   }elsif(-d $temp_arr[$i]){                        push(@dir , $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\/[\/\w\d\.\-]+[\/]*$/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^[\/\w\d\-\.]+\.\w+$/){   push(@file,$temp_arr[$i] );
																push(@string,$temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\w+[\w\d\.\-]+$/){       push(@string,$temp_arr[$i]);
			   }else{                                           push(@raw_string, $temp_arr[$i]); }
			 }
		  }elsif(ref($k[$k]) eq "HASH"){                             push(@hash,   $k[$k] ); }
	  }
  }
  @raw_string=(@raw_string, @string);
  @file = @{&remove_dup_in_arrayH(\@file)};
  #-----------------------------------------------------
	 sub remove_dup_in_arrayH{  my($i, @nondup, @out_ref, %duplicate, @orig, @out_ref);
		for($i=0; $i<@_; $i++){  undef(%duplicate);
	       if(ref($_[$i]) eq 'ARRAY'){    @orig = @{$_[$i]};    }
		   @nondup = grep { ! $duplicate{$_}++ } @orig; push(@out_ref, \@nondup);  }
		if(@out_ref ==1){ return($out_ref[0]);}
		elsif(@out_ref >1){  return(@out_ref);}
	 }
  #-----------------------------------------------------
  return(\@hash, \@array, \@string, \@dir, \@file, \@num_opt,
			\@char_opt, \$num_opt, \$char_opt, \@raw_string, \%vars, \@range );
}


#__________________________________________________________________________
# Title     : sort_by_E_values
# Usage     : @out=@{&sort_by_E_values(\@input_line_array)};
# Function  : it sorts by the 2nd column(E-value, in msp file), small comes top
# Example   :
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub sort_by_E_values{
   my (@in);
   if(ref $_[0] eq 'ARRAY'){
	  @in = @{$_[0]};
   }else{
	  @in = @_;
   }
   @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map {/^ {0,3}\S+ +(\S+)/ && [$_, $1] } @in;
   return(\@in);
}

#__________________________________________________________________________
# Title     : sort_by_column
# Usage     : @out=@{&sort_by_column(\@input_line_array, 1)}; # while 1 is col num
# Function  : it sorts by the 2nd column(E-value, in msp file), small comes top
#             by the help of  ts <decoux@moulon.inra.fr>
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#              sort_by_pattern, sort_column_by_size
# Options   :
# Version   : 1.3
#----------------------------------------------------------------------------
sub sort_by_column{
   my (@in, @M);
   unless(@_ ==2  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; exit }
   if(ref $_[0] eq 'ARRAY'){ 	  @in = @{$_[0]};      }else{ 	  @in = @_;    }
   if(ref $_[1] eq 'SCALAR'){ 	  $col=${$_[1]};       }else{	  $col=$_[1];  }
   $col--;
   @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
   return(\@in);
}

#__________________________________________________________________________
# Title     : sort_by_cluster_size
# Usage     : @out=@{&sort_by_cluster_size(\@input_line_array)};
# Function  : it sorts by the 1st digit before '-'  as in 2-183_cluster, 2-140_cluster,
#               etc.
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#             sort_by_pattern
# Options   :
# Version   : 1.2
#----------------------------------------------------------------------------
sub sort_by_cluster_size{
   my (@in, @M, $col);
   if(@_ < 1  ){ print "\n# FATAL: sort_by_cluster_size needs 1 argument\n"; exit }
   if(ref $_[0] eq 'ARRAY'){ 	  @in = @{$_[0]};      }else{ 	  @in = @_;    }
   $col=0;
   @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/^(\S+)\-/)[$col] ] } @in;
   return(\@in);
}


#__________________________________________________________________________
# Title     : sort_by_column_bigger_first
# Usage     : @out=@{&sort_by_column_bigger_first(\@input_line_array, 1)};
# Function  : it sorts by the 2nd column(E-value, in msp file), small comes top
#             by the help of  ts <decoux@moulon.inra.fr>
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#
# Options   :
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_by_column_bigger_first{
   my (@in, @M);
   unless(@_ ==2  ){ print "\n# FATAL: sort_by_column_bigger_first needs 2 arguments\n"; exit }
   if(ref $_[0] eq 'ARRAY'){ 	  @in = @{$_[0]};      }else{ 	  @in = @_;    }
   if(ref $_[1] eq 'SCALAR'){ 	  $col=${$_[1]};       }else{	  $col=$_[1];  }
   $col--;
   @in= map {$_->[0]} sort { $b->[1] <=> $a->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
   return(\@in);
}



#_____________________________________________________________________
# Title     : make_scrambled_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  : scramble_seq_database, create_scrambled_seq_database
# Options   :
# Version   : 1.1
#-------------------------------------------------------------------
sub make_scrambled_seq_database{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my (%seqs, %scrambled_seqs, $fasta_file_for_DB,$base,$ext,$out_file_name );

   for($i=0; $i< @file; $i++){
	  $fasta_file_for_DB =$file[$i];
	  $base=${&get_base_names($fasta_file_for_DB)};
	  #~~~~~~~~~~~~~~~~~~~ To prevent growing of _sc_sc_sc... ~~~~~~~~~~~~`
	  if($base=~/^(\S+)_sc[\_sc]*$/){ $base=$1 }

	  $ext =${&get_file_extensions($file[$i])};
	  if($ext=~/\S/){
		 $out_file_name="$base\_sc\.$ext";
		 $out_bak_file ="$base\_sc_bak\.$ext";
	  }else{
	     $out_file_name="$base\_sc\.fa";
		 $out_bak_file ="$base\_sc_bak\.fa";
		 print "\n# There was no file ext for $base, attaching \"fa\" as default\n";
	  }
	  if(-s $out_file_name){
		 print "\n# $out_file_name already exists, moving it to $out_bak_file\n";
	  }
	  %seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
	  %reversed_seqs=%{&scramble_sequences(\%seqs)};
	  &write_fasta(\%reversed_seqs, $out_file_name );
	  if(-s $out_file_name){
		 print "\n# make_scrambled_seq_database: Supposedly wrote new file: $out_file_name\n";
	  }else{
		 print "\n# make_scrambled_seq_database: Error in writing: $out_file_name\n";
	  }
   }
   print "\n# make_scrambled_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix_array
# Usage     : @matrix=@{&make_2D_identity_matrix(\@seq1, \@seq2)};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
#    $skip_gap_char = g  for skipping gap char (any special char)
# Returns   :
# Argument  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub make_2D_identity_matrix_array{
	my (@matrix, $skip_gap_char, $k, $l, @seq_1, @seq_0);
	for($i=0; $i< @_; $i++){
		if($_[$i]=~/g/){
			$skip_gap_char='g';
			splice (@_, $i, 1);
			$i--;
		}elsif(ref($_[$i]) eq 'ARRAY'){
			push(@seqs, $_[$i]);
		}
	}
	@seq_0=@{$seqs[0]};
	@seq_1=@{$seqs[1]};
	unless(@seq_1){ @seq_1=@seq_0; };
	for($k=0; $k< @seq_0; $k++){
	   for($l=0; $l< @seq_1; $l++){
		  if($seq_1[$l] =~/\W/ and $skip_gap_char){ next };
		  if($seq_0[$k] eq $seq_1[$l]){
			 $matrix[$k][$l]=1;
			 print "# X\[$k\] Y\[$l\] = 1 \n";
		  }
	   }
	}
	return(\@matrix);
}


#__________________________________________________________________________
# Title     : make_2D_aa_residue_matrix_array
# Usage     : @matrix=@{&make_2D_aa_residue_matrix_array(\@seq)};
# Function  : @matrix is like  $matrix[1][2]='A'; when aa residue is identical
#             This assigns identical residue to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_2D_aa_residue_matrix_array{
	my @seq=@{$_[0]};
	my @seq2=@{$_[1]};
	if(@_ == 1){ @seq2=@seq };

	my (@residue_matrix, $k, $l);
	for($k=0; $k< @seq; $k++){
	   for($l=0; $l< @seq2; $l++){
		  if($seq[$k] eq $seq2[$l]){
			 $residue_matrix[$k][$l]="$seq[$k]";
			 print "# $seq[$k] = $l \n";
		  }
	   }
	}
	return(\@residue_matrix);
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix
# Usage     : @matrix=@{&make_2D_identity_matrix(\$seq, [\$seq2] )};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
# Example   :
# Keywords  : make_matrix, make_identity_matrix
# Options   :
#        s  for show axis
# Returns   :
# Argument  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub make_2D_identity_matrix{

	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@matrix, $k, $l, @seq1, @seq2 );
	if(@_ > 1){	@seq1=split(//, $string[0]);   @seq2=split(//, $string[1]);
	}else{	    @seq2=@seq1= split(//, $string[0]);	}

	if($char_opt=~/s/){
	   @matrix = map {  $m = $_; "$m  ".join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
	}else{
	   @matrix = map {  $m = $_; join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
	}
	foreach (@matrix) {	   print ;	}
	return(\@matrix);
}

#________________________________________________________________________________
# Title     : amino_acid_homology_matrix
# Usage     : $yes_no=${&amino_acid_homology_matrix('E', 'D')};
# Function  :
# Example   :
# Keywords  : are_they_homologous, amino_acid_homology_table, compare_amino_acid_homology
# Options   :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub amino_acid_homology_matrix{
	my ($amino_acid1, $amino_acid2,  $hydrophobic_group, $neural_polar,
		$acidic_group, $basic_group, $proline);
	$amino_acid1=${$_[0]} || $_[0];
	$amino_acid2=${$_[1]} || $_[1];
	$hydrophobic_group='LIFV'; # A excluded by me
	$neural_polar ='STCNQ'; # M excluded by me
	$acidic_group='ED';
	$basic_group='KRH';
	$proline='P';
	@groups=($hydrophobic_group, $neural_polar, $acidic_group, $basic_group);
	for($i=0; $i< @groups; $i++){
		if($groups[$i] =~/$amino_acid1/ and $groups[$i] =~/$amino_acid2/){
			return(\1);
		}
	}
	return(\0);
}



#_____________________________________________________________________
# Title     : make_reverse_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.2
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub make_reverse_seq_database{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   $| = 1;
   for($i=0; $i< @file; $i++){
	  my $fasta_file_for_DB =$file[$i];
	  my $base=${&get_base_names($fasta_file_for_DB)};
	  my $ext =${&get_file_extensions($file[$i])};
	  my($out_file_name, %seqs, %reversed_seqs);
	  if($ext=~/\S/){
		 $out_file_name="$base\_rv\.$ext";
	  }else{
	     $out_file_name="$base\_rv\.fa";
	  }
	  %seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
	  %reversed_seqs=%{&reverse_sequences(\%seqs)};
	  &write_fasta(\%reversed_seqs, $out_file_name );
	  if(-s $out_file_name){
		 print "\n# make_reverse_seq_database: Supposedly wrote: $out_file_name\n";
	  }else{
		 print "\n# make_reverse_seq_database: Error in writing: $out_file_name\n";
	  }
   }
   print "\n# make_reverse_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_hmm_from_alignment
# Usage     : @out_hmm_file_names=@{&make_hmm_from_alignment(\@file, "$over_write")};
# Function  :
# Example   :
# Keywords  : HMM, hidden markov model, make_HMM_from_alignment,
#             make_hmm_from_msf_file, create_hmm_from_alignment,
#             create_hmm_from_msf_file,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_hmm_from_alignment{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	local (@out_hmm_files, $base, $out_hmm_file);
	for($i=0; $i< @file; $i++){
	   if($file[$i]=~/\.msf/){
		   $base=${&get_base_names($file[$i])};
		   $out_hmm_file= "$base\.hmm";
		   if($char_opt=~/o/ or !(-s $out_hmm_file) ){
			  system("hmmb -P BLOSUM62 -B 200 -w $out_hmm_file $file[$i]");
			  push(@out_hmm_files, $out_hmm_file);
		   }else{
			  print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
		   }
	   }
	}
	if(@out_hmm_files > 1){
	   return(\@out_hmm_files);
	}else{
	   return(\$out_hmm_files[0]);
	}
}



#__________________________________________________________________
# Title     : get_false_positive_seq_matches
# Usage     : %seq=%{&get_false_positive_seq_matches(\%msp_1, \%msp2)};
# Function  : gets sequences which are wrongly matched from intermediate seq search
# Example   :
#
#  OUTPUT looks like the following;
#	d1dvh__=d1fcdc1     7.1e-08
#	d1fcdc1=d1dvh__     7.1e-08
#	d5cytr_=d351c__     5.3e-08
#	d351c__=d5cytr_     5.3e-08
#	d1cyi__=d2mtac_     9.1e-06
#	d2mtac_=d1cyi__     9.1e-06
#	d1cyi__=d5cytr_     0.00045
#	d5cytr_=d1cyi__     0.00045
#
# Warning   : The default is to show the best E value(lowest that is)
#
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#
# Returns   :
# Argument  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub get_false_positive_seq_matches{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value,
	   $name_combi1, $name_combi2, $each_iss_line, $all_enquiry_seqs);
	@iss_lines = sort values %iss_input;

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
		$all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
		for($i=0; $i< @iss_lines; $i++){
		   $each_iss_line=$iss_lines[$i];
		   if($each_iss_line=~/^ *(\S+) +/){
			  $all_enquiry_seqs{$1}++;
		   }
		}
		$all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^ *(\S+) +(\S+)\((\d+)\)\((\S+)\) +(\S+)\((\d+)\)\((\S+)\)/){
		  $inter_seq_seq_name= $2;
		  $inter_seq_score   = $3;
		  $inter_seq_E_value = $4;
		  $match_seq_seq_name= $5;
		  $match_seq_score   = $6;
		  $match_seq_E_value = $7;
		  $name_combi1="$1\=$match_seq_seq_name";
		  $name_combi2="$match_seq_seq_name\=$1";
		  if($all_enquiry_seqs !~/$match_seq_seq_name/){
			 $false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value";
			 next;
		  }
	   }
	}
	if($char_opt=~/v/){
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # Wrting the false positives
	   #__________________________________
	   @keys_false=sort keys %false_positive_matches;
	   print "\n\n# False positives: " if @keys_false > 0;
	   for $key_false (@keys_false){
		  if($key_false =~/\S+/){
			 printf ("\n%-30s %-30s",  $key_false,  $false_positive_matches{$key_false});
		  }
	   }
	   print "\n";
	}
	return(\%false_positive_matches);
}


#__________________________________________________________________
# Title     : make_sequence_match_table
# Usage     : %sequence_match_table=%{&make_sequence_match_table(\%msp_1, \%msp2)};
# Function  : makes a table of match with the values for E values.
# Example   :
#
#  INPUT looks like this: (the iss file format), first column is key
#
#   d1ten__(110)(0.00031)     d1fna__    d1fna___1-91(578)(6.9e-37)       d1ten__(110)(0.00031)
#   d1cfb_2(255)(7.8e-16)     d1cfb_2    HSU55258_741-838(255)(5.6e-12)   d1cfb_2(255)(7.8e-16)
#
#  OUTPUT looks like the following;
#   d1dvh__=d1fcdc1    Correct: 7.1e-08
#	d1fcdc1=d1dvh__    Correct: 7.1e-08
#	d5cytr_=d351c__    Correct: 5.3e-08
#	d351c__=d5cytr_    Correct: 5.3e-08
#	d1cyi__=d2mtac_    Wrong:   9.1e-06
#
# Keywords  : make_sequence_match_Evalue_table, Evalue_table, make_Evalue_table
#             make_iss_sequence_match_table
# Options   : _  for debugging.
#             #  for debugging.
#             s  for skip SELF to SELF match entries
#             w  for Smith-Waterman score result out than E value out
#             r  for reflexive output
#
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   :
# Argument  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub make_sequence_match_table{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value, $key_seq,
	   $each_iss_line, $all_enquiry_seqs, $name_combi1, $name_combi2, @sorted_names, $name_sorted,
	   %final_table_interm_and_matched_score, %final_table_interm_and_matched_Evalue, %scop_bugs);
	@iss_lines = sort values %iss_input;

	if($char_opt=~/v/){ print "\n# make_sequence_match_table: \$char_opt is $char_opt\n" ; }

	if($char_opt=~/r/){ $non_reflexive=0;
	}else{    $non_reflexive=1; } # default , not to print result in two ways

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
		$all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
		for($i=0; $i< @iss_lines; $i++){
		   $each_iss_line=$iss_lines[$i];
		   if($each_iss_line=~/^ *(\S+) +/){  $all_enquiry_seqs{$1}++;		   }
		}
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# $all_enquiry_seqs contains all the sequences in the group
		#____________________________________________________________________
		$all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   my $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^ *(\S+) +(\S+)\((\d+)\)\((\S+)\) +(\S+)\((\d+)\)\((\S+)\)/){
		  $key_seq=$1;
		  $inter_seq_seq_name= $2;
		  $inter_seq_score   = $3;
		  $inter_seq_E_value = $4;
		  $match_seq_seq_name= $5;
		  $match_seq_score   = $6;
		  $match_seq_E_value = $7;
		  if( $key_seq eq $match_seq_seq_name and $char_opt=~/s */ ){ next } ## avoiding self self match
		  @sorted_names=sort ($1, $match_seq_seq_name);
		  $name_combi1="$1\=$match_seq_seq_name";
		  $name_combi2="$match_seq_seq_name\=$1";
		  $name_sorted="$sorted_names[0]\=$sorted_names[1]";
		  if($all_enquiry_seqs !~/$match_seq_seq_name/){
			   if($non_reflexive){
				   $false_positive_matches{$name_sorted}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
			   }else{
				   $false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
			   }
			   next;
		  }elsif($final_table_score{$name_combi1} < $inter_seq_score or
			  $final_table_score{$name_combi2} < $inter_seq_score or
		      $final_table_score{$name_sorted} < $inter_seq_score){
				$final_table_score{$name_combi1}=$inter_seq_score;
				$final_table_score{$name_combi2}=$inter_seq_score;
				$final_table_Evalue{$name_combi1}=$inter_seq_E_value;
				$final_table_Evalue{$name_combi2}=$inter_seq_E_value;

				if($non_reflexive){
					 $final_table_interm_and_matched_score{$name_sorted} = "$inter_seq_score $match_seq_score";
					 $final_table_interm_and_matched_Evalue{$name_sorted} = "$inter_seq_E_value $match_seq_E_value";
				}else{
					 $final_table_interm_and_matched_score{$name_combi1} = "$inter_seq_score $match_seq_score";
					 $final_table_interm_and_matched_score{$name_combi2} = "$inter_seq_score $match_seq_score";
					 $final_table_interm_and_matched_Evalue{$name_combi1} = "$inter_seq_E_value $match_seq_E_value";
					 $final_table_interm_and_matched_Evalue{$name_combi2} = "$inter_seq_E_value $match_seq_E_value";
				}
		  }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# printing out to screen
	#__________________________________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   @keys = sort keys %final_table_interm_and_matched_Evalue;
	   for $key (@keys){
		  if($key =~/\S+/){
			 printf ("\n%-30s Correct: %-50s",  $key, $final_table_interm_and_matched_Evalue{$key});
		  }
	   }
	}else{
	   @keys = sort keys %final_table_interm_and_matched_score;
	   for $key (@keys){
		  if($key =~/\S+/){
			 printf ("\n%-30s Correct: %-50s",  $key,  $final_table_interm_and_matched_score{$key});
		  }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Writing the false positives
	#__________________________________
	@keys_false=sort keys %false_positive_matches;
	#print "\n\n# False positives: " if @keys_false > 0;
	for $key_false (@keys_false){
	   if($key_false =~/\S+/){
	      if($scop_bugs{$key_false}){
	          printf ("\n%-30s Correct: %-50s",  $key_false,  $false_positive_matches{$key_false});
	      }else{
			  printf ("\n%-30s Wrong:   %-50s",  $key_false,  $false_positive_matches{$key_false});
		  }
		  %scop_bugs=qw(d2kauc1=d2kauc  1 d1pkya2=d1pkya1 1 d1pbe_1=d1pbe_2 1
						d1dih_1=d1dih_2 1 d2ohxa2=d2ohxa1 1 d1poxa3=d1pvda2 1
						d1efga1=d1efga2 1 d1bct__=d1brd__ 1 d1qora1=d1qora2 1
						d2ohxa1=d2ohxa2 1);
	   }
	}
	print "\n";

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Returning the hash result
	#_____________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   return(\%final_table_interm_and_matched_score);
	}else{
	   return(\%final_table_interm_and_matched_Evalue);
	}
}



#__________________________________________________________________
# Title     : write_iss_file
# Usage     : &write_iss_file(\%msp1, \%msp2);  ## for 2 msp_x file input
# Function  : writes the intermediate sequence search file.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : write_interm_seq_search_file
#             v  for showing the output in STDOUT
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Version   : 1.2
#---------------------------------------------------------------------------
sub write_iss_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my(%msp_1, %msp_2, %merged_1, %merged_2);

   %msp_1=%{$hash[0]};
   %msp_2=%{$hash[1]};

   @msp1_keys=sort keys  %msp_1;
   @msp2_keys=sort keys  %msp_2;

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
   # removing the empty matches and merging matches according to the enquiry seqs.
   #  Following input will become;
   # ..
   # xxxxx
   # xxxxx  YYYYY(xx)(yy)
   # xxxxx  zzzzz(xx)(yy) ttttt(xx)(yy)
   # ..
   #  ->  xxxxx  'YYYYY(xx)(yy) zzzzz(xx)(yy) ttttt(xx)(yy)'
   #____________________________________________________________________________________
   for($i=0; $i< @msp1_keys; $i++){
	  $enquiry_seq = $msp1_keys[$i];
	  #my ($seq_name,  $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;

	  #-- if $msp_1{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
	  if($msp_1{$enquiry_seq}=~/\S+/){
		 ($seq_name, $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
		  $merged_msp1{$seq_name} .=$msp_1{$enquiry_seq};
	  }else{
		 next;
	  }
   }
   for($i=0; $i< @msp2_keys; $i++){
	  $enquiry_seq = $msp2_keys[$i];

	  #-- if $msp_2{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
	  if($msp_2{$enquiry_seq}=~/\S+/){
		  $merged_msp2{$enquiry_seq} .=$msp_2{$enquiry_seq};
	  }else{
		 next;
	  }
   }

   @merged_msp1_keys=sort keys  %merged_msp1;
   @merged_msp2_keys=sort keys  %merged_msp2;

   for($i=0; $i< @merged_msp1_keys; $i++){
	  $enquiry_seq=$merged_msp1_keys[$i];
	  @intermediate_seqs=sort split(/ +/, $merged_msp1{$enquiry_seq});
	  for($j=0; $j< @intermediate_seqs; $j++){

		 $intermediate_seq=$intermediate_seqs[$j];

		 ($inter_seq_name, $sw_score, $evalue)=$intermediate_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
		 @final_matches=sort split(/ +/,  $merged_msp2{$inter_seq_name});
		 for($k=0; $k < @final_matches; $k ++){
			 $final_matched_seq = $final_matches[$k];
			 if($char_opt=~/v/){
			    printf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
			 }
			 $final_out{$final_matched_seq}=
			    sprintf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		 }
		 #print "\n";
	  }
	  #print "\n";
   }
   #print "\n";
   return(\%final_out);
}





#__________________________________________________________________
# Title     : get_isearch_result_stat
# Usage     : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function  :
# Example   : input example content of eg) %stat2
#
#  KEYS:      VALUEs:
#
#  d1cc5__    d1cyi__
#  d1cyi__    d2mtac_ d351c__ d1cc5__
#  d1dvh__    d1fcdc1
#  d1fcdc1    d1dvh__
#  d1fcdc2
#  d2mtac_    d1cyi__
#  d351c__    d1cyi__
#  d5cytr_
#
# Keywords  : get_stat_interm_search, get_intermediate_search_stat
# Options   : _  for debugging.
#             #  for debugging.
# Package   : Bio
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   : [$av_correct, $num_enq_seq]
# Tips      :
# Argument  :
# Todo      :
# Author    : A Scientist
# Version   : 1.8
#-----------------------------------------------------------------------------
sub get_isearch_result_stat{
	my (@keys, $num_enq_seq, @pdbg_seqs_ori, $c, $d, $i,
	    $sum_correct, $sum_false, $match_seq, $correct, @correct,
	    $av_correct, $av_false, $actual_e_value, $correct_matched, %correct);

	my %seqs=%{$_[0]};
	my @pdbg_seqs=@{$_[1]};
	my $evalue=${$_[2]};
	my $pdbg_base=${$_[3]} || $ARGV[3];
	my $E_mult_factor1=${$_[4]};
	my $E_mult_factor2=${$_[4]};
	my $leng_thresh =$_[5] or ${$_[5]};
	my %msp_0=%{$_[6]};
	my %msp_00=%{$_[7]};

	if($E_mult_factor1=~/^ *$/){ $E_mult_factor1=1; };


	@keys=sort keys %seqs;
	@keys=@{&strip_sequence_ranges(\@keys)};
	@keys=@{&remove_dup_in_array(\@keys)};
	@pdbg_seqs_ori=@pdbg_seqs;
	$num_enq_seq=@pdbg_seqs;
	print "\n# In get_isearch_result_stat: PDBG seqs $num_enq_seq \n=> @pdbg_seqs\n\n";

	#@pdbg_seqs=@{&strip_sequence_ranges(\@pdbg_seqs)};
	#@pdbg_seqs=@{&remove_dup_in_array(\@pdbg_seqs)};

	if($num_enq_seq < 2){ print "\n# \$num_enq_seq is less than 2 @pdbg_seqs $base\n"; exit; }

	for($c=0; $c < @keys; $c++){
	   my($enq_seq, $correct, $false_positive);
	   $num_of_matched=@match_seqs=split(/ +/, $seqs{$keys[$c]});
	   $enq_seq=$keys[$c];

	   for($d=0; $d< @match_seqs; $d++){
		   my($correct_matched, @sorted);

		   $match_seq=$match_seqs[$d];

		   for($i=0; $i< @pdbg_seqs; $i++){
			  if($match_seq =~/d?$pdbg_seqs[$i]/i){
				 #print "\n# \$match_seq = $match_seq, \$pdbg_seqs $pdbg_seqs[$i] \n" if $verbose=~/\S/;
				 $correct++;
				 $correct_matched=1;
				 @sorted=sort ($enq_seq, $match_seq);
				 unless($correct{join(' ', @sorted)}){
				    $correct_group{$base} .="Correct: @sorted $base  $msp_0{join(' ', @sorted)}\n";
				 }
				 $correct{join(' ', @sorted)}="Correct: $base  $msp_0{join(' ', @sorted)}";
			  }
		   }

		   if($correct_matched !=1){
			  $false_positive++;
			  @sorted=sort ($enq_seq, $match_seq);
			  unless($correct{join(' ', @sorted)}){
			     $correct_group{$base} .="Wrong: @sorted $base  $msp_0{join(' ', @sorted)}\n";
			  }
			  $correct{join(' ', @sorted)}="Wrong: $base  $msp_0{join(' ', @sorted)}";
		   }
	   }
	   if(@match_seqs == 0){ @match_seqs=1; $percent_correct=0; }
	   $sum_correct += $correct;
	   $sum_false   += $false_positive;
	}
	$av_correct = $sum_correct/$num_enq_seq;
	$av_false   = $sum_false  /($num_enq_seq);

	#### $actual_e_value becomes whatever $E_mult_factor1 defined ~~~~~~~~~~~~
	if($E_mult_factor1 != 1){
	   $actual_e_value=$evalue * $E_mult_factor1;
	}elsif($E_mult_factor2 != 1){
	   $actual_e_value= $evalue * $E_mult_factor2;
	}else{ $actual_e_value=$evalue }

	$num_enq_seq--;
	$sum_correct_for_additional = $num_enq_seq+1;
	$match_count=$sum_correct_for_additional * $av_correct;
	#$sum_correct= $sum_correct_for_additional;
	printf ("%-10s %-12s %-13f %-13f %-7s %-7s %-7s %-7s %-4s\n", $pdbg_base,
		$actual_e_value, $av_correct, $av_false, $num_enq_seq,
		$sum_correct_for_additional, $sum_false, $match_count, $leng_thresh);

	print "\n-----";

	@correct_new=@{&remove_dup_in_array(\@correct_new)};
	for($i=0; $i< @correct_new; $i++){
	    print "\n# correct new: $correct_new[$i]" ;
	}
	$num_correct=$match_count/2;

	print "Num of non-reflective correcct:  $num_correct  Wrong: $sum_false  \n\n";
	return([$av_correct, $sum_correct, $num_enq_seq, \%correct, \%correct_group]);
}



#__________________________________________________________________
# Title     : strip_sequence_ranges
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : remove_sequence_ranges, remove_sequence_name_ranges,
#             remove_ranges_in_sequences, strip_sequence_name_ranges,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub strip_sequence_ranges{
   my (@out, $i);
   my @in=@{$_[0]} or @_;
   for($i=0; $i< @in; $i++){
	  if($in[$i]=~/^(\S+)_\d+\-\d+/){
		  push(@out, $1);
	  }else{
		  push(@out, $in[$i]);
	  }
   }
   return(\@out);
}



#__________________________________________________________________________
# Title     : open_sequence_index_files
# Usage     : open_sequence_index_files(<indexfilename>, <sequencename>);
# Function  : returns seqname with its seek pos in fasta sequence db file.
# Example   : %index=%{&open_sequence_index_files(\@INDEX_FILE, \@input_seq_names)};
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : open_seq_index_files, open_seq_idx_files, open_idx_files,
#             get_sequence_index, get_seq_index, get_sequence_with_index
# Options   : _ or # for debugging
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------------------
sub open_sequence_index_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my( %final_index, %long_index);

	for($i=0; $i< @file; $i++){
	   open(INDEX, "$file[$i]");
	   while(<INDEX>){
		  if(/^(\S+) +(\S+)$/){
			 $long_index{$1}=$2;
		  }
	   }
	   for($j =0; $j < @string; $j++){ #<<<< @string has the sequence NAMEs >>>>
		  if($input_seq_names[$j]=~/^(\S+)_\d+\-\d+/){
			 $seq_with_index{$input_seq_names[$j]}=$long_index{$1};
		  }else{
			 $seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]};
		  }
	   }

	}
	return(\%final_index);
}

#__________________________________________________________________
# Title     : do_intermediate_sequence_search
# Usage     : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Function  :
# Example   : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_msp to save mem
#             s  for the big single output (msp file output I mean)
#             o  for overwrite existing xxxx.fa files for search
#             c  for create SSO file (sequence search out file)
#             R  for adding ranges to the enquiry sequences as well.
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#
# Returns   : the names of files created (xxxxx.msp, yyy.msp,,)
# Argument  :
# Version   : 1.0
# Enclosed  :
#----------------------------------------------------------------------------------------
sub do_intermediate_sequence_search{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@final_out, $add_range, $single_big_msp, $base_name, $create_sso, @nondup,
	   $Single_msp_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
	   @sso, @temp, $algorithm, $margin, $out_msp_file, @MSP, @final_msp_file_names_out,
	   $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $add_range_to_enquiry );
	my ($E_val) = 5;  ## default 5 <<<<<<<<<<<<<<<<<<<<<

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# DEFAULTS
	#________________________________________
	$k_tuple=1;
	$algorithm='fasta';
	$upper_expect_limit=10;
	$lower_expect_limit=0;
	$Score_thresh      =75;
	$margin            =0;
	$add_range         ='';
	$sequence_DB       =$ENV{'PDB40D_FASTA'};

	if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
	if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
	if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
	if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
	if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
	if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
	if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
	if($vars{'s'}=~/\S+/){ $single_big_msp     = 's'                   };
	if($vars{'DB'}=~/\S+/){ $sequence_DB       = $vars{'DB'}           };
	if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}         };
	if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
	if($vars{'e'}         =~/\S+/){ $E_val     = $vars{'e'}            };

	if($char_opt=~/r/){    $add_range            = 'r' }
	if($char_opt=~/R/){    $add_range_to_enquiry = 'R'  }
	if($char_opt=~/c/){    $create_sso           = 'c' }
	if($char_opt=~/s/){    $single_big_msp       = 's'; print "\n# Single file opt is set\n"; }
	if($char_opt=~/m/){    $msp_directly_opt     = 'm' }
	if($char_opt=~/i/){    $do_intermediate_search   = 'i' }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
   #_______________________________________________________________________________________
   if($input_file_name=~/^$/){  $input_file_name=$file[1];
	  print "\n# You did not use \"File=\$XXX\" format\n"  };
   if($sequence_DB=~/^$/){      $sequence_DB    =$file[0];
	  print "\n# You did not use \"DB=\$XXX\" format\n"   };
   print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta_seq_by_seq";
   print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB \n";

   my $base = ${&get_base_names($input_file_name)};
   my $out_msp_file="$base\.msp";
   my @temp=`$algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
   if(@temp < 40){	  print "\n# There must be error , \@temp is too small\n\n";   }
   my @msp_hashes_from_temp = @{&open_sso_files(\@temp, $add_range,
												"u=$upper_expect_limit",
												"l=$lower_expect_limit",
												$add_range_to_enquiry)};
   my @msp_from_temp= values %{$msp_hashes_from_temp[0]};
   $MSP{$out_msp_file} = \@msp_from_temp;
   open(MSPOUT, ">$out_msp_file");
   for($i=0; $i< @msp_from_temp; $i++){
	  print MSPOUT $msp_from_temp[$i];
	  print $msp_from_temp[$i];
   }
   close MSPOUT;
   return(\$out_msp_file);
}


#__________________________________________________________________
# Title     : do_sequence_search
# Usage     : &do_sequence_search("Query_seqs=\%pdb_seq", "DB=$sequence_db_fasta",
#  		         "File=$ARGV[0]", $single_msp, $over_write,
# 	        	 "u=$upper_expect_limit", "l=$lower_expect_limit",
#       		 "k=$k_tuple", $No_processing );
# Function  :
# Example   : &do_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Keywords  : sequence_search
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_msp to save mem
#             s  for the big single output (msp file output I mean)
#             o  for overwrite existing xxxx.fa files for search
#             c  for create SSO file (sequence search out file)
#             d  for very simple run and saving the result in xxxx.gz format in sub dir starting with one char
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#             d  for $make_gz_in_sub_dir_opt, putting resultant sso files in gz format and in single char subdir
#             D  for $make_msp_in_sub_dir_opt, convert sso to msp and put in sub dir like /D/, /S/
#             n  for new format to create new msp file format with sso_to_msp routine
#             PVM=  for PVM run of FASTA (FASTA only)
#             M  for machine readable format -m 10 option
#             M= for machine readable format -m 10 option
#             N  for 'NO' do not do any processing but, do the searches only.
#
# Returns   : the names of files created (xxxxx.msp, yyy.msp,,)
# Argument  :
# Version   : 3.7
#----------------------------------------------------------------------------------------
sub do_sequence_search{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@final_out, $add_range, $single_big_msp, $base_name, $create_sso, @nondup,
	   $Single_msp_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
	   @sso, @temp, $algorithm, $margin, $out_msp_file, @MSP, @final_msp_file_names_out,
	   $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $No_processing,
	   $new_format, $PVM_FASTA_run, $over_write );
	my ($E_val) = 5;  ## default 5 <<<<<<<<<<<<<<<<<<<<<

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# DEFAULTS
	#________________________________________
	$k_tuple           =1;  # 1 or 2, 1 is more sensitive
	$algorithm         ='fasta';
	$upper_expect_limit=1;
	$lower_expect_limit=0;
	$Score_thresh      =75;
	$margin            =0;
	$add_range         ='';
	$pwd               =`pwd`; chomp($pwd);
	if($ENV{'PDB40D_FASTA'}){ $sequence_DB  =$ENV{'PDB40D_FASTA'} if $ENV{'PDB40D_FASTA'};
	}else{	print "\n# INFO: Your ENV setting for PDB40D_FASTA doesn't seem to be correct\n";   }

	if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
	if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
	if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
	if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
	if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
	if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
	if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
	if($vars{'s'}=~/\S+/){ $single_big_msp     = 's'                   };
	if($vars{'DB'}=~/\S+/){ $sequence_DB       = $vars{'DB'}           };
	if($vars{'FILE'}=~/\S+/){ $input_file_name = $vars{'FILE'}; push(@file,$input_file_name) };
	if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}; push(@file,$input_file_name) };
	if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
	if($vars{'Query'}=~/\S+/){      %seq_input = %{$vars{'Query'}}};
	if($vars{'u'}    =~/\S+/){ $E_val          = $vars{'u'}            };
	if($vars{'PVM'}  =~/\S+/){ $PVM_FASTA_run  = $vars{'PVM'}; print "\n# PVM opt is set\n";     };
	if($vars{'M'}  =~/\S+/){ $machine_readable = $vars{'M'};           };

	if($char_opt=~/r/){    $add_range          = 'r' }
	if($char_opt=~/o/){    $over_write         = 'o' }
	if($char_opt=~/c/){    $create_sso         = 'c' }
	if($char_opt=~/s/){    $single_big_msp     = 's'; print "\n# Single file opt is set\n"; }
	if($char_opt=~/m/){    $msp_directly_opt   = 'm' }
	if($char_opt=~/M/){    $machine_readable   = 'M' }
	if($char_opt=~/d/){    $save_in_gz_in_sub_dir  = 'd' }
	if($char_opt=~/D/){$make_msp_in_sub_dir_opt= 'D' } # for simple search and storing msp file
	if($char_opt=~/N/){    $No_processing      = 'N'; $create_sso='c'; }

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	# When no %seq is given, but files
	#___________________________________________
	if(@hash==0 and @file > 0){
		print "\n# do_sequence_search: You did not put sequences as in \%seq, but raw sequence file!\n";
		print "        I will run \'open_fasta_files\' sub to fetch sequences to store in \%seq_input\n";
		%seq_input=%{&open_fasta_files(\@file)};
	}else{
		print "\n# do_sequence_search: I will use given seqs in \%seq_input from \%\{\$hash\[0\]\}\n";
		%seq_input=%{$hash[0]};
	}
	my (@list)=keys %seq_input;

	print "\n# line:",__LINE__, ", You are in do_sequence_search with \$algorithm => $algorithm\n";
	$base_name = ${&get_base_names($input_file_name)};

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Controlling which kind of search it should do. Do save_in_gz_in_sub_dir first if d is set
   #_________________________________________________________
   if( $char_opt =~/[dD]/){
	   print "\n# do_sequence_search: You set \'d\' or \'D\' opt\n";
	   my ($seq_name, $seq)= %seq_input;
	   my $first_char= substr("\U$seq_name", 0, 1);
	   mkdir ("$first_char", 0777) unless -d $first_char;
	   chdir("$first_char");
	   my $temp_file_name="$seq_name.fa";
	   &write_fasta_seq_by_seq(\%seq_input, $temp_file_name ); ## e makes skip writing when file already
	   $out_file_sso_name="$seq_name\.sso";
	   $out_file_msp_name="$seq_name\.msp";
	   $out_file_gz_name="$seq_name\.msp\.gz";

	   if($char_opt =~/D/){ #### To make MSP file
		  if($machine_readable=~/M/){
			 @temp=`$algorithm -m 10 -H  -E $E_val $temp_file_name $sequence_DB $k_tuple`;
		  }else{
			 @temp=`$algorithm -H -E $E_val $temp_file_name $sequence_DB $k_tuple`;
		  }
		  @msp_hashes_from_temp = @{&open_sso_files(\@temp, $add_range,
		                                            "u=$upper_expect_limit",
		                                            "l=$lower_expect_limit")};
		  @msp_from_temp= values %{$msp_hashes_from_temp[0]};
		  if( !(-s $out_file_gz_name) or $over_write=~/o/){
			  open(MSP, ">$out_file_msp_name");
			  for(@msp_from_temp){    print MSP $_;  }
			  close MSP;
			  if(-s $out_file_gz_name){
			      unlink ($out_file_gz_name);
			      system("gzip $out_file_msp_name"); ## gzipping it
			  }
		  }else{
			  print "\n# Line No. ", __LINE__,", $out_file_gz_name already exists  (do_sequence_search)\n";
		  }
	   }else{ ### To make gzipped SSO files
		  system(" $algorithm -m 10 -H  -E $E_val $temp_file_name $sequence_DB $k_tuple > $out_file_sso_name");
		  system("gzip $out_file_sso_name");
	   }
	   unlink("$seq_name.fa");
	   print "\n# Sub dir $first_char has been made, finishing do_sequence_search\n";
	   chdir ('..');
	   goto EXIT;
   }


   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # This is the big single MSP output
   #______________________________________________
   $Single_msp_out_file="$base_name\.msp" if($single_big_msp eq 's');
   if(-s $Single_msp_out_file and $char_opt !~/o/){
	   print "\n# $Single_msp_out_file exists, skipping \n";
	   push(@final_out, $Single_msp_out_file);
	   return(\@final_out);
   }else{
	   $char_opt .='o';
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Check if it is necessary to write sequences.fa files
   #______________________________________________________
   if($char_opt=~/o/){
	  &write_fasta_seq_by_seq(\%seq_input); ## e makes skip writing when file already
   }else{
	  &write_fasta_seq_by_seq(\%seq_input, 'e'); ## e makes skip writing when file already
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
   #_______________________________________________________________________________________

   if($sequence_DB=~/^$/){
	  print "\n# FATAL: do_sequence_search: You did not use \"DB=\$XXX\" format\n"; exit   };

   print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta_seq_by_seq";
   print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB with seqs of \'@list\'\n";


   for($j=0; $j< @list; $j++){  # @list has sequence names
	   my @temp;
	   my $each_seq_fasta="$list[$j]\.fa";
	   unless(-s $each_seq_fasta){   print "\n# do_sequence_search: $each_seq_fasta does not exist, error\n"; exit }
	   print "\n# Found $each_seq_fasta is searched against $sequence_DB\n";
	   $out_msp_file="$list[$j]\.msp";
	   $out_sso_file ="$list[$j]\.sso";
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # If files already exist
	   #__________________________________________
	   if( -s $out_msp_file and $char_opt !~/o/ ){
		   print "\n# File: $out_msp_file exists, skipping, to overwrite use \'o\' opt";

		   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		   # c opt for creating SSO file
		   #____________________________________
		   if($create_sso=~/c/){
		      if($machine_readable=~/M/){
				 @temp=`$algorithm -m 10 -H  -E $E_val $each_seq_fasta $sequence_DB $k_tuple`;
			  }else{
				 @temp=`$algorithm -H -E $E_val $each_seq_fasta $sequence_DB $k_tuple`;
			  }
			  if(@temp < 20){
				  print "\n# OUTPUT of fasta is too small, error \n"; print chr(7);
				  exit;
			  }
			  open(SSO, ">$out_sso_file");
			  print SSO @temp;
			  print "\n# $out_sso_file is created";
			  close SSO;
		   }
		   push(@final_out, $out_msp_file);
		   unlink($each_seq_fasta);
	   }
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # If files DONT  exist
	   #__________________________________________
	   else{  ## -E is for e value cutoff. -b is for num of seq fetched
		   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		   #  K-tuple is  set  to  1 by default
		   #
		   #  If xxxx.sso exists, let's skip running fasta or ssearch
		   #____________________________________________________________

		   if(-s $out_sso_file and $char_opt !~/o/ ){
			   open(SSO_ALREADY, "$out_sso_file");
			   @temp=<SSO_ALREADY>;
			   close(SSO_ALREADY);
		   }else{
			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			   #  NORMAL Default FASTA run comes here
			   #________________________________________
			   if($machine_readable=~/M/){   print "\n# do_sequence_search: You put \'M\' opt \n";
			      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			      # PVM FASTA run
			      #__________________________________________
				  if($PVM_FASTA_run=~/PVM/){
				     unless(-s $ENV{'PVM_HOSTFILE'}){
				         print "\n# do_sequence_search: $ENV{'PVM_HOSTFILE'} does not exsists error\n";
						 exit;
				     }
					 open(PVM_CSH, ">pvm_fasta.csh"); print "\n# pvm_fasta.csh is created \n";
				     print PVM_CSH "\#\!\/bin\/csh\n";
					 print PVM_CSH "pvm $ENV{'PVM_HOSTFILE'} \<\< \'eof\'\n\'eof\'\n";
					 print PVM_CSH '/gn0/jong/App/Fasta/pvcompfa -m 10 -H -E ', " $E_val ",
									" $each_seq_fasta ", "$sequence_DB $k_tuple \> temp.sso\n";

					 print PVM_CSH "\npvm \<\< \'eof\'\n";
					 print PVM_CSH "halt\n\'eof\'\n";
					 close PVM_CSH;
					 system(" csh pvm_fasta.csh");
					 open(TEMP_SSO, "temp.sso");
					 @temp=<TEMP_SSO>;
					 close TEMP_SSO;
				  }else{
					 @temp=`$algorithm -m 10 -H  -E $E_val $each_seq_fasta $sequence_DB $k_tuple`;
				  }
			   }else{
			      if($PVM_FASTA_run=~/PVM/){
				     unless(-s $ENV{'PVM_HOSTFILE'}){
				         print "\n# do_sequence_search: $ENV{'PVM_HOSTFILE'} does not exsists error\n";
						 exit;
				     }
					 open(PVM_CSH, ">pvm_fasta.csh");
				     print PVM_CSH "\#\!\/bin\/csh\n";
					 print PVM_CSH "pvm $ENV{'PVM_HOSTFILE'} \<\< \'eof\'\n\'eof\'\n";
					 print PVM_CSH '/gn0/jong/App/Fasta/pvcompfa -H -E ', " $E_val ",
									" $each_seq_fasta ", "$sequence_DB $k_tuple \> temp.sso \n";
					 print PVM_CSH "\npvm \<\< \'eof\'\n";
					 print PVM_CSH "halt\n\'eof\'\n";
					 close PVM_CSH;
					 system("csh pvm_fasta.csh");
					 open(TEMP_SSO, "temp.sso");
					 @temp=<TEMP_SSO>;
					 close TEMP_SSO;
			      }else{
			         @temp=`$algorithm -H -E $E_val $each_seq_fasta $sequence_DB $k_tuple`;
				  }
			   }

			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			   # c opt for creating SSO file
			   #____________________________________
			   if($create_sso=~/c/){
				  if(@temp < 20){
					  print "\n# OUTPUT of fasta is too small, error \n"; print chr(7);
					  exit;
				  }
				  open(SSO, ">$out_sso_file");
				  print SSO @temp;
				  print "\n# $out_sso_file   is created because of \"c\" or \"N\" option you set ";
				  close SSO;
			   }
		   }

		   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
		   # If $char_opt is m
		   #____________________________________________________________
		   if($char_opt=~/m/){  # make msp files directly from search not going through sso_to_msp
			   my @msp_hashes_from_temp = @{&open_sso_files(\@temp, $add_range,
			                                                "u=$upper_expect_limit",
			                                                "l=$lower_expect_limit")};
			   my @msp_from_temp= values %{$msp_hashes_from_temp[0]};
			   $MSP{$out_msp_file} = \@msp_from_temp;
			   unlink($each_seq_fasta);
			   next;
		   }elsif($No_processing !~/N/){ ## When sso output is not directly converted to MSP
				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 # REmoving some junk lines  in SSO output
				 #_________________________________
				 for($o=0; $o < @temp; $o++){
					 if($temp[$o]=~/^[ \w\-]+/){                    splice(@temp, $o, 1); $o--;
					 }elsif($temp[$o]=~/^\; mp/){                   splice(@temp, $o, 1); $o--;
					 }elsif($temp[$o]=~/^\; pg/){                   splice(@temp, $o, 1); $o--;
					 }elsif($temp[$o]=~/^\; fa_[ozi]/){             splice(@temp, $o, 1); $o--;
					 }elsif($temp[$o]=~/^\; sq_type$/){             splice(@temp, $o, 1); $o--;
					 }
				 }
				 unlink($each_seq_fasta);
				 if(@temp < 20){
					 print "\n# FATAL: OUTPUT of FASTA is too small (less than 20 byte), error\n";
					 print "\n# @temp\n";
					 print chr(7);
					 exit;
				 }
				 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				 # opt s is for the big single output
				 #___________________________________
				 if($single_big_msp eq 's'){  push(@SSO, \@temp); ## <<------------ to go to (3)
				 }else{
					 $msp_out_file="$list[$j]\.msp";
					 push(@final_out, @{&sso_to_msp(\@temp, "l=$lower_expect_limit",
										 "u=$upper_expect_limit", $msp_out_file,
										 $create_sso, $add_range, "t=$Score_thresh",
										 "e=$Evalue_thresh", "m=$margin", $new_format )} );
					 if($char_opt=~/c/){ ##  create SSO file (sequence search out file
						$out_sso_file ="$list[$j]\.sso";
						open(SSO, ">$out_sso_file");
						print SSO @temp;
						print "\n# $out_sso_file is created \n";
						close SSO;
					 }
				 }
		   }else{   # endof if($char_opt=~/m/){ }
				 print "\n# do_sequence_search: You set \'N\' option for NO processing of the results\n";
		   }
	   }
   } # end of for($j=0; $j< @list; $j++){

   if($char_opt=~/m/){  # make msp files directly from search not going through sso_to_msp
	   if($single_big_msp=~/s/){
		  open(SINGLE_BIG_MSP, ">$Single_msp_out_file");
		  @MSP= keys %MSP;
		  for($m=0; $m< @MSP; $m++){
			 print SINGLE_BIG_MSP @{$MSP{$MSP[$m]}}, "\n";
		  }
		  close(SINGLE_BIG_MSP);
		  push(@final_msp_file_names_out, $Single_msp_out_file);
		  return(\@final_msp_file_names_out);
	   }else{
		  @MSP= keys %MSP;
	      for($t=0; $t <  @MSP; $t++){
			 open(SING_MSP, ">$MSP[$t]");
	         print SING_MSP @{$MSP{$MSP[$t]}}, "\n";
			 close(SING_MSP);
			 push(@final_msp_file_names_out, $Single_msp_out_file);
		  }
		  return(\@final_msp_file_names_out);
	   }
   }else{
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # (3) This now processes the @SSO which has all the sso Single SSO
	   #________________________________________________________
	   if($single_big_msp =~ /s/){
		  push(@final_out, @{&sso_to_msp(@SSO, $Single_msp_out_file, $create_sso,
			 "u=$upper_expect_limit", "l=$lower_expect_limit",
			 $add_range, "m=$margin", $new_format)} );
		  if($char_opt=~/c/){ ##  create SSO file (sequence search out file
			 $out_sso_file ="$base_name\.sso";
			 open(SSO, ">$out_sso_file");
			 for($i=0; $i< @SSO; $i++){
				print SSO @{$SSO[$i]}, "\n";
			 }
			 print "\n# $out_sso_file is created \n";
		  }
		  close(SSO);
	   }
	   @nondup = grep { ! $duplicate{$_}++ } @final_out;
	   return(\@nondup);
   }
   EXIT:
}




#__________________________________________________________________________
# Title     : do_hmm_sequence_search
# Usage     : &do_hmm_sequence_search(\@file, "method=$default_search_method",
#								$over_write, "DB=$pdbd40_seq_fasta");
#
# Function  : does hmm sequence search using Sean Eddy's HMMER (hmmls, hmmfs)
# Example   :
# Keywords  : do_seq_search_with_hmm, do_hmmt_sequence_search
# Options   :
#    "method=ls"  for turning hmmls search option on (default)
#    "method=fs"  for turning hmmfs search option on
#    method= by method=
#   o  for overwriting existint xxxxx.hmm files
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
#   t=15  for score thresh at the level of hmmls. Default of hmmls is 0. example showed has 15
#
#   $over_write = o by -o o
# Returns   :
# Argument  :
# Version   : 1.3
#----------------------------------------------------------------------------
sub do_hmm_sequence_search{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(@out_hmm_file_names);
	my $score_thresh=16; # default threshold

	$default_search_method='hmmls';
	if($vars{'method'}=~/ls/){
	}elsif( $vars{'method'}=~/fs/){	$default_search_method='hmmfs';	}

	if( $vars{'DB'} =~/\S/){  $target_DB = $vars{'DB'}
	}else{  print "\n# I need target DB to search for hmmls-fs. Use: DB=xxxx.fa form";
	        print "\n#     or Default PDB40D_FASTA ENV setting will be used for DB\n";
			$target_DB= $ENV{'PDBD40_SEQ_FASTA'};
 	}
	if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'} }
	if($vars{'t'}=~/\S/){ $score_thresh        =$vars{'t'} }


	for($i=0; $i< @file; $i++){
		print "\n# Running do_hmm_sequence_search with $file[$i]\n";

		if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
		}else{
			$base=${&get_base_names($file[$i])};
		}

		if($default_search_method=~/hmmfs/){
			$output_hmm_result = "$base\.hmmfs";
	    }elsif($default_search_method=~/hmmls/){
			$output_hmm_result = "$base\.hmmls";
	    }
		if($char_opt=~/o/ or !(-s $output_hmm_result) ){
			print "Running: $default_search_method -t $score_thresh $file[$i] $target_DB \> $output_hmm_result\n";
			system("$default_search_method -t $score_thresh $file[$i] $target_DB > $output_hmm_result");
		}else{
			print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
		}
		push(@out_hmm_file_names, $output_hmm_result);
	}
	if(@out_hmm_file_names > 1){
	   return(\@out_hmm_file_names);
	}else{
	   return(\$out_hmm_file_names[0]);
	}
}


#_______________________________________________________________________
# Title     : divide_clusters
# Usage     : &divide_clusters(\@file);
# Function  : This is the main funciton for divicl.pl
# Example   : &divide_clusters(\@file, $verbose, $range, $merge, $sat_file,
# 	  $dindom, $indup, "t=$thresh", "e=$evalue", $over_write, $optimize,
#	  "s=$score", "f=$factor");
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Class     :
# Keywords  : divicl
# Options   : _  for debugging.
#             #  for debugging.
#  $factor = by f=   # factor is for the merge proces
#                      (misoverlap tolerance factor 3=33%, 2=50%)
#                      factor works within msp chunk for one sequence
#                      to filter a good mergable seqlets
#  $short_region=  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region=  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region=A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 2.0
#------------------------------------------------------------------------
sub divide_clusters{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($merge, $verbose, $sat_file, $thresh, $factor, $indup, $indup_percent,
	   $score, @temp_show_sub, $optimize, $file, $evalue, $over_write, $din_dom,
	   $sum_seq_num, $base_1, $output_clu_file, $short_region, $large_region, $average_region);

	$factor=7; # default factor is 7

	if($char_opt=~/m/){	       $merge='m';
	}if($char_opt=~/v/){       $verbose='v';
	}if($char_opt=~/i/){	   $indup='i';
	}if($char_opt=~/o/){	   $optimize='o';
	}if($char_opt=~/w/){       $over_write='w';
	}if($char_opt=~/d/){	   $din_dom='d';
	}if($char_opt=~/s/){	   $sat_file='s';
	}if($char_opt=~/S/){       $short_region  ='S';
	}if($char_opt=~/L/){	   $large_region  ='L';
	}if($char_opt=~/A/){	   $average_region='A';
	}if($vars{'t'}=~/\d+/){	   $thresh= $vars{'t'};
	}if($vars{'f'}=~/\d+/){    $factor= $vars{'f'};
	}if($vars{'s'}=~/\d+/){	   $score = $vars{'s'};
	}if($vars{'e'}=~/\d+/){	   $evalue= $vars{'e'};	}

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # When more than one file input were given
   #____________________________________________________________
   if(@file > 1){  #<=== @file has xxxx.msp, yyyy.msp  zzzz.msp ....,
		my (@good, @bad);
		if($indup =~/i/i){   open (INDUP, ">indup_stat\.txt");  } # this is not essential.

		for($i=0; $i< @file; $i++){
		     my (@out, @temp_show_sub);
			 my $indup_c=0;
			 $file=$file[$i];
			 $base_1=${&get_base_names($file)};
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
			 #______________________________________________________________________________________________
			 $output_clu_file="$base_1\_F${factor}\.clu";

			 if( !$over_write and -s $output_clu_file){  print "\n# $output_clu_file Already EXISTS, skipping. Use \'w\' opt to overwrite\n";
				 next;  }

			 print "\n# (1)  divide_clusters: processing file \"$file\" for $output_clu_file";

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 #  If clu file(eg 2-1618_ss.clu ) is in pwd, tries to skip
			 #____________________________________________________________
			 if((-s $output_clu_file) > 512 and $over_write !~/w/){
				print "# $output_clu_file exists, skipping, use \"w\" option to overwrite\n";  next;
			 }

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 #  Merging similar sequences
			 #____________________________________________________________
			 @out=@{&merge_sequence_in_msp_file(\$file, "s=$score", $din_dom, $sat_file, $optimize,
				 "t=$thresh", "e=$evalue", "f=$factor", "$range", "$merge", $verbose, $over_write,
				  $short_region, $large_region, $average_region )};

			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 #  Clustering the sets of merged seqlets => CORE algorithm
			 #____________________________________________________________
			 @out=@{&cluster_merged_seqlet_sets(\@out, "f=$factor", $optimize,
			        $short_region, $large_region, $average_region)};

			 $percent_fac=int(100-(1/$factor)*100);
			 @temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup,
			                             "e=$evalue", "p=$percent_fac", "f=$factor");
			 $good_bad       = $temp_show_sub[0];
			 $indup_c        = $temp_show_sub[1];
			 $sum_seq_num   += $temp_show_sub[2];

			 if($good_bad==1){          push(@good, $file);
			 }else{         push(@bad, $file);       }

		}
		######################################### Writing stuff ##
		&write_good_bad_list_in_divide_clusters(\@good, \@bad);
		sub write_good_bad_list_in_divide_clusters{
			my  (@good, @bad);
			@good=@{$_[0]}; @bad=@{$_[1]};
			open(GOODBAD, ">good_bad.list");
			print GOODBAD "GOOD: all link    : 000\n";
			for($i=0; $i< @good; $i++){
			   print GOODBAD "$good[$i]\n";
			}
			print GOODBAD "BAD : Not all link: 000\n";
			for($i=0; $i< @bad; $i++){
			   print GOODBAD "$bad[$i]\n";
			}
			close(GOODBAD);
		}
		##########################################################

   }
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # when one single file input is given
   #____________________________________________________________
   else{
		$file=$file[0];
		$base_1=${&get_base_names($file)};
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
		#______________________________________________________________________________________________
		$output_clu_file="$base_1\_F${factor}\.clu";

		if( !$over_write and -s $output_clu_file){
			print "\n# $output_clu_file Already EXISTS, skipping. Use \'w\' opt to overwrite\n"; exit;
		}
		print "\n# (1)  divide_clusters: processing ONE single file \"@file\" \n";
		@out=@{&merge_sequence_in_msp_file(\@file, "s=$score", $optimize, $din_dom, $sat_file,
		    $optimize, "t=$thresh", "e=$evalue", "f=$factor", "$range", "$merge", $verbose,
		    $short_region, $large_region, $average_region, $over_write)};

		print "# (2) divide_clusters: finished running \"merge_sequence_in_msp_file\" \n";
		@out=@{&cluster_merged_seqlet_sets(\@out,  "f=$factor",
		       $short_region, $large_region, $average_region, $optimize)};

		$percent_fac=int(100-(1/$factor)*100);
		@temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup,
						   "e=$evalue", "p=$percent_fac", "f=$factor" );
		$good_bad       = $temp_show_sub[0];
		$indup_c        = $temp_show_sub[1];
		$sum_seq_num   += $temp_show_sub[2];

		if($good_bad==1){      push(@good, $file);
		}else{                 push(@bad, $file);       }

		######################################### Writing stuff ##
		&write_good_bad_list_in_divide_clusters(\@good, \@bad);
		##########################################################
   }
}




#_____________________________________________________________________________
# Title     : remove_similar_seqlets
# Usage     : @seqlets=@{&remove_similar_seqlets(\@split)};
# Function  : merges(gets average starts and ends ) of similar
#             seqlets to reduce them into smaller numbers. This can also handle
#              names like XLBGLO2R_8-119_d1hlm__.
#
# Example   : @seqlets=@{&remove_similar_seqlets(\@mrg1, $mrg2, \@mrg3)};
#               while @mrg1=qw(M_2-100 M_2-110 M_8-105 M_4-108 N_10-110 N_12-115);
#                     $mrg2='Z_3-400 Z_2-420';
#                     @mrg3=('X_2-300 X_3-300', 'X_2-300', 'X_5-300 X_2-301' );
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : merge_sequence_names, merge_seq_names, merge_sequence_ranges
#             merge_seq_ranges
# Options   : _  for debugging.
#             #  for debugging.
#             f= for factor
#             s  for shorter region matched is used
#             a  for average region matched is used
#             l  for larger region matched is used
#
# Version   : 1.9
#-------------------------------------------------------------------------------
sub remove_similar_seqlets{
   my ($i, $seq1, $smaller_leng, $leng1, $leng2, $start1, $end1, $seq2, $start2,
	   $av_diff, $num_of_seq, $av_end, $av_start, $end2, @seqlets,
	   @array_input, @seqlet, $tail1, $tail2, $shorter_region, $larger_region,
	   $average_region);
   my $factor=5;  ## !!! This var makes big difference in the final clustering
   $average_region = 'a'; ## default is to get the average of comparing regions

   for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		   @array_input=@{$_[$i]};
		   for($j=0; $j<@array_input; $j++){
			   @seqlet=split(/ +/, $array_input[$j]);
			   push(@seqlets, @seqlet);
		   }
	   }elsif($_[$i]=~/f=(\S+)/){ $factor=$1
	   }elsif($_[$i]=~/^(s) *$/){     $shorter_region=$1 ; $average_region=0;
		   print "\n# remove_similar_seqlets: You chose short_region preservation option\n";
	   }elsif($_[$i]=~/^(l) *$/){     $larger_region =$1 ; $average_region=0;
	       print "\n# remove_similar_seqlets: You chose large_region preservation option\n";
	   }elsif($_[$i]=~/^(a) *$/){     $average_region=$1 ; $shorter_region=$larger_region=0;
		   print "\n# remove_similar_seqlets: You chose average_region preservation option\n";
	   }elsif($_[$i]=~/\S+\_\d+\-\d+/){
		   push(@seqlets, split(/ +/, $_[$i]) );
	   }elsif(ref($_[$i]) eq 'SCALAR' and ${$_[$i]}=~/\S+\_\d+\-\d+/){
	       push(@seqlets, split(/ +/, ${$_[$i]}) );
	   }
   }
   print "\n# remove_similar_seqlets : I am using \$factor : $factor\n";

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Sorting is necessary as I am not doing the real thorough comparison
   #______________________________________________________________________
   $num_of_seq=@seqlets=sort @seqlets;
   #print "\n# (1) remove_similar_seqlets: Num of seq to merge: $num_of_seq (from \@seqlets)";

   my ($short_start, $large_start, $short_end, $large_end);
   for($i=0; $i< @seqlets; $i++){
	  if($seqlets[$i]=~/^ *(\S+)_(\d+)\-(\d+)(\S*)/){  ## last (\S*) is necessary for XLBGLO2R_8-119_d1hlm__
		 ($seq1, $start1, $end1, $tail1)=($1, $2, $3, $4);
	     if($seqlets[$i+1]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
			($seq2, $start2, $end2, $tail2)=($1, $2, $3, $4);
			if($seq1 eq $seq2){
			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			   $diff_start=abs($start1 - $start2);
			   $diff_end  =abs($end1   - $end2  );
			   $leng1=$end1-$start1;
		       $leng2=$end2-$start2;

			   if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
			   if( ($diff_start+$diff_end)/2 <= $smaller_leng/$factor ){

				   if($average_region){
					   $av_start=int(($start1+$start2) / 2);
					   $av_end  =int(($end1 + $end2) / 2);
					   $seqlets[$i]="$seq1\_$av_start\-${av_end}$tail1";  # $tail1 is for names like XLBGLO2R_8-119_d1hlm__
					   splice(@seqlets, $i+1, 1);
					   $i--;
				   }else{
					   if($start1 < $start2){
							$short_start=$start2; $large_start=$start1;  ## note that short start should be $start2 if $start2 is bigger
					   }else{
							$short_start=$start1; $large_start=$start2;
					   }
					   if($end1 < $end2){
							$short_end=$end1;  $large_end=$end2;
					   }else{
							$short_end=$end2;  $large_end=$end1;
					   }
					   if($shorter_region){
						   $seqlets[$i]="$seq1\_$short_start\-${short_end}$tail1";
					   }elsif($larger_region){
						   $seqlets[$i]="$seq1\_$large_start\-${large_end}$tail1";
					   }

					   splice(@seqlets, $i+1, 1);
					   $i--;
			       }
			   }
			}
		 }
	  }
   }
   return(\@seqlets);
}




#__________________________________________________________________________
# Title     : show_subclusterings
# Usage     : &show_subclusterings(\@out);
# Function  : This is the very final sub of divicl.pl
# Example   : @temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup);
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : print_subclusterings, sum_subclusterings, write_subclustering
# Options   : _  for debugging.
#             #  for debugging.
#             f  for file output, eg: xxxxxxx.sat
#
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Version   : 2.1
#-------------------------------------------------------------------------
sub show_subclusterings{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($max_size, $SAT_file_out_too, $sat_file_name, $clu_file_name,
		$ori_cluster_size, $ori_cluster_num, $good_bad, @keys, $percentage_fac,
		$indup, @sizes, $sum_seq_num, $indup_percent, $indup_count);  # clusall_1e-5_clu_14-324_ss.sat
	my @out=@{$array[0]};
	$indup_count=0;
	$|=1;

	if($char_opt=~/s/){	    $SAT_file_out_too=1;	}
	if($char_opt=~/d/){	    $dindom=1;	}
	if($char_opt=~/i/){		$indup=1;	}
	if($vars{'f'}=~/\d+/){     $factor= $vars{'f'}; }
	if($vars{'p'}=~/\d+/){ $percentage_fac= int($vars{'p'}); }
	if($vars{'s'}=~/\d+/){	   $score = $vars{'s'};	}
	if($vars{'e'}=~/\d+/){	   $evalue= $vars{'e'};	}

	print "\n# show_subclusterings : \@file has : @file\n";
	if( ($file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.msp/)||
		($file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.sat/) ){
		 $SAT_file_out_too=1;
		 $ori_cluster_size=$2;
		 $ori_cluster_num =$3;
		 $base=$1;
		 $sat_file_name="$base\.sat";
		 $clu_file_name="$base\.clu";
	}else{
	     print "\n# The \@file input to show_subclusterings is not the right format\n"; exit;
	}

	open(CLU, ">$clu_file_name") or die "\n# show_subclusterings failed miserably to open \"$clu_file_name\" \n";

	@out=@{&sort_string_by_length(\@out)};

	for($i=0; $i< @out; $i++){ # @out has ( 'YAL054C_98-695 YBR041W_90-617', 'YBR115C_230-842 YBR222C_16-537 YER015W_121-686', etc)
	   my $count+=$i+1;
	   my ( $int_dup_number, $sub_clu_size, $seq_with_range, @sp, $new_clus_NAME,  %tem, %tem2, %tem3, $j, @keys, $num_seq);
	   if($out[$i]=~/^ *$/){ next }
	   @sp=split(/ +/, $out[$i]);

	   for($j=0; $j < @sp; $j++){
		  $seq_with_range=$sp[$j];
		  if($seq_with_range=~/^((\S+)_(\d+\-\d+))/){
			 $tem{$2}++;
			 $tem2{$2}.=sprintf("%-15s ", $1);
			 $tem3{$2} =$3;
		  }
	   }

	   @keys=keys %tem;
	   $num_seq=$sub_clu_size=@keys;

	   if($max_size < $sub_clu_size){
		  $max_size=$sub_clu_size; ## This is to collect the sizes of clusters to see if it is good.
	   }
	   $indup_count= &print_summary_for_divicl( $sat_file_name,
		  $SAT_file_out_too, $count, \%tem2, \%tem, $ori_cluster_num, $ori_cluster_size, $dindom,
		  $clu_file_name, \%tem3, $indup );

	   sub print_summary_for_divicl{ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			$sat_file_name=$_[0];
			if($_[1]== 1){	   $SAT =1;
			   open (SAT, ">$sat_file_name");
			};
			my $count=$_[2]; # count of cluster
			my %tem2=%{$_[3]};	my $num_seq=@keys=keys %tem2;
			my %tem=%{$_[4]};	my $ori_cluster_num=$_[5];
			my $new_clus_NAME=$ori_cluster_num.'0'.$count.'0'.$num_seq;
			my $ori_cluster_size=$_[6];
			my $dindom=$_[7];	my %tem3=%{$_[9]};
			my $indup=$_[10];	my (%internal_dup);

			if($SAT==1){
			   print SAT "\n_________________________________________________________";
			   print SAT "\n>$num_seq sequences in this subcluster seqlet($count)====";
			   print SAT "\n$out[$i] \n\n";
			   for($x=0; $x <@keys; $x++){
				  printf SAT ("   # %-11s : %-4s times occur %-s\n", $keys[$x], $tem{$keys[$x]}, $tem2{$keys[$x]});
			   }
			   print SAT "\n";
			   #~~~~~~~~~~ Domain Inside Domain ~~~~~~~~~~~~~~~~~
			   for($x=0; $x <@keys; $x++){
				  @domain_inside_domain=@{&get_domain_inside_domain($tem2{$keys[$x]})};
				  for($m=0; $m< @domain_inside_domain; $m++){
					 printf SAT "      Dindom: $m : $domain_inside_domain[$m]\n";
				  }
				  print SAT "\n";
			   }
			}
			#~~~~~~~~~~ Domain Inside Domain ~~~~~~~~~~~~~~~~~
			if($dindom==1){
				for($x=0; $x <@keys; $x++){
					@domain_inside_domain=@{&get_domain_inside_domain($tem2{$keys[$x]})};
					@domain_inside_domain=@{&remove_dup_in_array(\@domain_inside_domain)};
					for($m=0; $m< @domain_inside_domain; $m++){ print "  # Dindom: $m : $domain_inside_domain[$m]\n";   }
					print "\n";
				}
			}
			#==========================================================================================

			#~~~~~~~~~~ Internal duplication  ~~~~~~~~~~~~~~
			if($indup==1){
			   # @keys is the same as sub cluster size,
			   for($x=0; $x < @keys; $x++){
				  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				  # Checks each sequence for duplication
				  #___________________________________________________
				  my %internal_dup=%{&get_internal_dup_in_a_cluster( $tem2{$keys[$x]} )};
				  my @dup_keys=keys %internal_dup;
				  if(@dup_keys > 0){
					  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
					  #  This calculates the actual duplicated number rather than jus tthe sequences
					  #______________________________________________________________________________
					  $indup_count++;
					  printf ("%-14s %-12s %-4s", $keys[$x], $new_clus_NAME, $num_seq);
					  for($m=0; $m< @dup_keys; $m++){
						  printf ("%-19s=> %s\n", $dup_keys[$m], $internal_dup{ $dup_keys[$m] } );
					  }
				  }
			   }
			}

			#~~~~~~~~~~ Summary ~~~~~~~~~~~~~~~~~~~~~~~~~~~
			print  CLU  "Cluster size $num_seq\n";
			printf CLU ("Cluster %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
						  $new_clus_NAME, $evalue, $factor, $percentage_fac,
						  $ori_cluster_size, $num_seq, $ori_cluster_num);
			print       "Cluster size $num_seq\n";
			printf     ("Cluster %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
			              $new_clus_NAME, $evalue, $factor, $percentage_fac,
			              $ori_cluster_size, $num_seq, $ori_cluster_num);
			for($x=0; $x <@keys; $x++){
			   printf CLU ("   %-4s %-5s %-17s %-10s %-3s\n",
				  $num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]});
			   printf     ("   %-4s %-5s %-17s %-10s %-3s\n",
				  $num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]});
			}
			return($indup_count);
	   }
	}

	if($max_size == $ori_cluster_size){   $good_bad=1;
	}else{	                              $good_bad=0;	}

 	print "\n";
	return($good_bad, $indup_count, $ori_cluster_size);
}


#__________________________________________________________________________
# Title     : exchange_query_with_match_in_msp
# Usage     : @exchanged_msp=@{&exchange_query_with_match_in_msp(\@file)};
# Function  :
# Example   :
# Keywords  : swap_query_with_match_in_msp, invert_query_with_match_in_msp,
#             swap_query_seq_with_match_seq_in_msp,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub exchange_query_with_match_in_msp{

	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%exchanged_msp, @sorted_by_query_seq_names, @new_msp_lines);
	$open_msp_files_x_opt = 'x';
	if($char_opt=~/n/){ $names_only='n' }
	%exchanged_msp=%{&open_msp_files(@file, $open_msp_files_x_opt, $names_only )};

	@new_msp_lines=values %exchanged_msp;
	@sorted_by_query_seq_names=
	   map{ $_->[0] } sort {$a->[1] cmp $b->[1]} map {/^\d+ +\S+ +\d+ +\d+ +(\S+)/ && [$_, $1] } @new_msp_lines;
	return(\@sorted_by_query_seq_names);
}



#______________________________________________________________
# Title     : get_internal_dup_in_a_cluster
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_internal_dup_in_a_cluster{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, %out, $seq_name);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/ +/, $cluster_line);  ## These sequence are single seq with different regions
	@seq= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2] }
			             map {/^((\S+)_(\d+)\-(\d+) *.*)$/ && [$1, $2, $3, $4]} @seq;

	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=1; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  $leng2=$end2-$start2;
		  $margin=$leng2/12;   ## 8% overlap is regarded as not overlapping

		  if(( ($start1+$margin) > $end2)||
		    ( ($start2+$margin) > $end1)){ # skips non overlapping seqlets

			$out{"$start1\-$end1"}.="$start2\-$end2 ";

			splice(@seq, $j, 1);
			$j--;
		  }
	   }
	}
	#@out=sort (@out);
	#@out=@{&remove_dup_in_array(\@out)};
	#@out=@{&remove_similar_seqlets(\@temp, "f=2")};
	return(\%out);
}

#______________________________________________________________
# Title     : get_domain_inside_domain
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : find_dindoms, domain_inside_domain, domain_in_domain
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_domain_inside_domain{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, @out);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/ +/, $cluster_line);
	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=0; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  if(($start1 > $end2)||($start2 > $end1)){ # skips non overlapping seqlets
			 next;
		  }
		  if(($start1 > $start2)&&($end1 < $end2)){  #   -----
			 $leng_seq1=$end1-$start1;               # ----------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq2/2) >= $leng_seq1 )&&
			    ($leng_seq1 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq2\($seq1\)");
			 }
		  }elsif(($start1 < $start2)&&($end1 > $end2)){  # -----------
			 $leng_seq1=$end1-$start1;                   #   ------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq1/2) >= $leng_seq2)&&
			    ($leng_seq2 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq1\($seq2\)");
			 }
		  }
	   }
	}
	return(\@out);
}



#______________________________________________________________
# Title     : scale_for_horizontal_histogram
# Usage     :
# Function  : used to make things like:
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub scale_for_horizontal_histogram{
  my @query=@{$_[0]};
  if(@query > 2400){
	$condense_factor=20;
  }elsif(@query > 2200){
	$condense_factor=18;
  }elsif(@query > 1900){
	$condense_factor=16;
  }elsif(@query > 1600){
	$condense_factor=15;
  }elsif(@query > 1400){
	$condense_factor=14;
  }elsif(@query > 1200){
	$condense_factor=12;
  }elsif(@query > 1000){
	$condense_factor=10;
  }elsif(@query > 800){
	$condense_factor=9;
  }elsif(@query > 630){
	$condense_factor=8;
  }elsif(@query > 440){
	$condense_factor=6;
  }elsif(@query> 220){
	$condense_factor=4;
  }elsif(@query > 120){
	$condense_factor=3;
  }else{
	$condense_factor=2;
  }
  return(\$condense_factor);
}



#______________________________________________________________
# Title     : get_added_matched_regions_in_msp
# Usage     :
# Function  : This reads MSP file regions matched for a target seq
#             and adds things up to plot horizontally.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_added_matched_regions_in_msp{
   my @lines=@{$_[0]};
   for($i=0; $i< @lines; $i++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($lines[$i]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/){
		 if($4 eq $7){
			$query_name=$4;
			$query_leng=$3;
			for($j=0; $j<$query_leng; $j++){ $query[$j]=0; }
			next;
		 }else{
			if($match_name ne $7){ push(@matched_members, $7); }
			$query_start=$2;
			$query_end  =$3;
			$query_seq  =$4;
			$match_start=$5;
			$match_end  =$6;
			$desc       =$8;
			$match_name =$7;
			for($k= $query_start; $k<$query_end; $k++){
			   $query[$k]++;
			}
		 }
	  }
   }
   return(\@query);
}



#______________________________________________________________
# Title     : cluster_merged_seqlet_sets
# Usage     : @out=@{&cluster_merged_seqlet_sets(\@lines)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.3
#--------------------------------------------------------------
sub cluster_merged_seqlet_sets{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($optimize, @splited1, @splited2, $link_or_not);
   my @lines=@{$array[0]};
   $link_or_not=0;
   my $factor=6.5; ## this is the default threshold factor to merge two similar sequence
				   ## 10 will be used to divide the small seqlet leng and the result
				   ## 10% of the seqlet will be the minimum allowed difference between
				   ## the two seqlet regions. The factor is empirical and essential

   if($vars{'f'}=~/(\d+)/){ $factor=$1 }
   if($char_opt=~/o/){ $optimize=1 }

   F1: for($i=0; $i< @lines; $i++){
	  @splited1=split(/ +/, $lines[$i]);
	  for($j=0; $j< @lines; $j++){
		 if($lines[$i] eq $lines[$j]){ next  }
		 @splited2=split(/ +/, $lines[$j]);

		 $link_or_not=${&check_linkage_of_2_similar_seqlet_sets(\@splited1, \@splited2, "f=$factor")};
		 if($link_or_not==1){

		    if($optimize==1){ ##---- This will remove similar seqlets, not only identical ones
			   $lines[$i]=join(' ', sort @{&remove_similar_seqlets( [@splited1, @splited2])} );
			}else{
			   $lines[$i]=join(' ', sort @{&remove_dup_in_array( [@splited1, @splited2])} );
			}
			splice(@lines, $j,1);
			$j--;
			$i--;
			next F1;
		 }
	  }
   }
   return(\@lines);
}


#______________________________________________________________
# Title     : check_linkage_of_2_similar_seqlet_sets
# Usage     :
# Function  : connects two clusters of seqlets if they share
#              identical or near identical seqlets
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#  $factor = by f=  # eg)  "f=$factor" in the higher level sub
#
# Returns   :
# Argument  :
# Version   : 1.5
#--------------------------------------------------------------
sub check_linkage_of_2_similar_seqlet_sets{
   my ($seq1, $name1, $start1, $end1, $seq2,
	   $leng1, $leng2, $name2, $start2, $end2, $diff_start,
	   $diff_end, $optimize);
   my @splited1=@{$_[0]};
   my @splited2=@{$_[1]};

   my $link_or_not=0;
   my $factor=6.5; ## this is the default threshold factor to merge two similar sequence
				  ## 6.5 will be used to divide the small seqlet leng and the result
				  ## 20% of the seqlet will be the minimum allowed difference between
				  ## the two seqlet regions
   if($_[2]=~/f=(\S+)/i){
	  $factor=$1;
   }

   F1: for($s=0; $s<@splited1; $s++){
	  if($splited1[$s]=~/^ *((\S+)_(\d+)\-(\d+))/){
		  $seq1=$1;
		  $name1=$2;
		  $start1=$3;
		  $end1=$4;
	  }
	  F2: for($t=0; $t< @splited2; $t++){
		 if($splited2[$t]=~/^ *((\S+)_(\d+)\-(\d+))/){
			 $seq2=$1;
			 $name2=$2;
			 $start2=$3;
			 $end2=$4;
		 }
		 if($seq1 eq $seq2){ $link_or_not=1; return(\$link_or_not) }
		 if($name1 ne $name2){
			 next F2;
		 }elsif($name1 eq $name2){ ## ~~~~~~~~~~~~~ THIS is the MOST IMP CORE PART ~~~~~~~~~~~~~
			 $leng1=$end1-$start1;
		     $leng2=$end2-$start2;
			 if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
			 $diff_start=abs($start1-$start2);
			 $diff_end  =abs($end1  -$end2  );
			 if((($diff_start+$diff_end)/2) <= ($smaller_leng/$factor) ){
			 	$link_or_not=1;
				return(\$link_or_not);
			 }
		 }## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  }
   }
   return(\$link_or_not);
}

#__________________________________________________________________________
# Title     : merge_arrays_by_common_elements
# Usage     :  @out=@{&merge_arrays_by_common_elements(\@ref_of_arrays)}
# Function  : merges arrays if there are common array elements.
#             if @A has (1,2,3) and @B has (2, 4, 5), they share 2, so
#             they are merged to be (1,2,3,4,5)
# Example   :
# Keywords  : cluster_arrays_by_common_elements, merge_arrays_if_common_elements
#             merge_array_if_common_elements,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub merge_arrays_by_common_elements{
	my ($i, @mother_array);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Handling input array
	#______________________________________
	if( @_==1 and ref($_[0]) eq 'ARRAY'){  @mother_array=@{$_[0]};
	}elsif(@_ > 1){   @mother_array=@_;
	}else{
	   print "\n# The input for merge_arrays_by_common_elements needs one ref of array or multiple refs of array\n";
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Main algo
	#______________________________________
	for($i=0; $i< @mother_array; $i++){
	   my @merged=(@{$mother_array[$i]}, @{$mother_array[$i+1]});
	   my ($common_or_not, %merged_hash, $j);

	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   #  Checks if there is a common element
	   #______________________________________
	   for($j=0; $j< @merged; $j++){
		  $merged_hash{$merged[$j]}++;
		  if($merged_hash{$merged[$j]} > 1){ $common_or_not=1 }
	   }
	   my @non_redundant=keys %merged_hash;

	   if($common_or_not==1){
		   $mother_array[$i]=\@non_redundant;
		   splice(@mother_array, ($i+1), 1);
		   $i--;
	   }
	}
	return(\@mother_array);
}

#__________________________________________________________________________
# Title     : check_common_elements_in_array
# Usage     : &check_common_elements_in_array($mother_array[$i], $mother_array[$i+1]));
# Function  : accepts 1 or 2 refs of arrays and checks if there is any
#             common(repeating) elements between the two (or inside one)
#             The result is either ref of 1, or 0
# Example   :
# Keywords  : is_there_common_element, if_common_elements
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub check_common_elements_in_array{
	my(@merged_array, %common_elemnts_count, $m);
	if(@_ > 1){
	   @merged_array=(@{$_[0]}, @{$_[1]});
	}else{
	   @merged_array=(@{$_[0]});
	}
	for $m (@merged_array){
	   $common_elemnts_count{$m}++;
	   if($common_elemnts_count{$m} > 1){
		  print "\n# $common_elemnts_count{$m}     $m common!\n";
		  return(\1);
	   }
	}
	return(\0);
}

#__________________________________________________________________________
# Title     : merge_similar_ranges
# Usage     : @all_ranges = @{&merge_similar_seqlets(@all_ranges)};
# Function  : merges ranges(10-20, 11-21 etc) when there is any overlap
#              is present
#             If you put a reverse range like '2000-20', it will
#              complain and reverse the order and do the job after correction.
#
# Example   : INPUT:
#
#   @input=( '1-30 1-40 1-50',
#            '2-49 4-40 2-99'....)
#
# Keywords  : merge_similar_regions, merge_ranges, merge_regions,
#              merge_sequence_ranges, merge_overlap_ranges, connect_ranges
#              connect_overlapping_ranges, connect_similar_ranges
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------------
sub merge_similar_ranges{
   my (@all_ranges, $new_start, $new_end, @output, $i, $seq1, $start1,
	   $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2);
   my $factor=6.5;     #  33% sequence mismatch region is allowed(3)
   my $leng_thresh=30;
   my $optimize=0;
   for($i=0; $i< @_; $i++){
	  if(ref($_[$i]) eq 'ARRAY'){
		  @all_ranges=@{$_[$i]};
	  }elsif($_[$i]=~/f=(\S+)/){
	      $factor=$1
	  }elsif($_[$i]=~/o/i){
	      $optimize=1 }
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  Sorting the ranges by the starting range number.(essential)
   #______________________________________________________________
   @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			   map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  iterating merger
   #_________________________________________________________
   for($i=0; $i< @all_ranges; $i++){
	   if($all_ranges[$i] =~/(\d+)\-(\d+)/){
		   ($start1, $end1)=($1, $2);
		   if($start1 > $end1){
			  print "\n# merge_similar_ranges: Error, \$start1 :$start1 is larger than \$end1: $end1\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }
	   }
	   if($all_ranges[$i+1] =~/(\d+)\-(\d+)/){
		   ($start2, $end2)=($1, $2);
		   if($start2 > $end2){
			  print "\n# merge_similar_ranges: Error, \$start2 :$start2 is larger than \$end2: $end2\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i+1]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }

		   if($start1 <= $start2      and $end1 >= $end2){    ## -----------------
			   $new_start=$start1;                             #      --------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if( $start2 <= $start1     and $end2 >= $end1){      ##    -------
			   $new_start=$start2;                              #  ----------------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start1 <= $start2      and $start2 <= $end1){     #  -----------
			   $new_start=$start1;                               #     -----------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start2 <= $start1      and $start2 <= $end1){    #       -----------
			   $new_start=$start2;                              #  ---------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
	   }
   }
   return(\@all_ranges);
}



#______________________________________________________________
# Title     : merge_similar_seqlets
# Usage     : @all_seqlets = @{&merge_similar_seqlets(@all_seqlets)};
# Function  : merges seqlet sets which have identical
#             sequences and share similar regions by connection factor of 30%
#             This means, if any two seqlets from the same sequences which
#             share more than 70% seqlet regions overlapping are merged
#             This only sees the very first sequence in the seqlets line!!!
#             (so, PARTIAL MERGE !!)
# Example   : INPUT:
#
#   @input=( 'seq1_1-30 seq2_1-40 seq3_1-50',
#            'seq1_2-49 seq4_4-40 seq8_2-99'....)
#
# Keywords  : merge_similar_sequences, merge_sequence_names,
#              merge_sequence_ranges, merge_similar_sequences_with_ranges
# Options   : _  for debugging.
#             #  for debugging.
#  $short_region=  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region=  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region=A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 1.7
#--------------------------------------------------------------
sub merge_similar_seqlets{
   my (@all_seqlets, @result_all_seqlets, $i, $seq1, $start1, $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2,
	   $short_region, $large_region, $average_region);
   my $factor=6.5;     #  33% sequence mismatch region is allowed(3)
   my $leng_thresh=30;
   my $optimize=0;
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   # Sorting (parsing) input to get options and input array
   #_________________________________________________________
   for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		   @all_seqlets=@{$_[$i]};
	   }elsif($_[$i]=~/f=(\S+)/){ $factor=$1
	   }elsif($_[$i]=~/o/i){      $optimize=1
	   }elsif($_[$i]=~/^S/){      $short_region='S';
	   }elsif($_[$i]=~/^L/){      $large_region='L';
	   }elsif($_[$i]=~/^A/){      $average_region='A'; }
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # This is to remove which are identical in @all_seqlets;
   #_________________________________________________________
   for($i=0; $i< @all_seqlets; $i++){
	  if($all_seqlets[$i] eq $all_seqlets[$i+1]){
		  push(@result_all_seqlets, $all_seqlets[$i]);
		  $i++;
		  next;
	  }
	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  # @split1 and 2 are arrays from different string entry in @all_seqlets
	  #_________________________________________________________
	  @split1=sort split(/ +/, $all_seqlets[$i]);
	  @split2=sort split(/ +/, $all_seqlets[$i+1]);

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	  #  (1) If the first elements of @split1 and 2 are identical, lets merge the two arrays
	  #________________________________________________________________________________
	  if($split1[0] eq $split2[0]){
		  @split=(@split1, @split2);
		  if($optimize==1){ #~~~~~ optimize option removes similar seqlets
			 push(@result_all_seqlets, join(' ', sort @{&remove_similar_seqlets(\@split,
			                              $short_region, $large_region, $average_region)} ));
		  }else{
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # Only removes exactly identical ones
			 #__________________________________________________________
			 push(@result_all_seqlets, join(' ', @{&remove_dup_in_array(\@split, 's')} ));
		  }
		  $i++;
		  next;
	  }
	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	  # (2) If the first elements of @split1 and 2 are NOT identical, lets check the sequence ranges
	  #________________________________________________________________________________
	  if($split1[0] =~/^(\S+)_(\d+)\-(\d+)/){
		   ($seq1, $start1, $end1)=($1, $2, $3);
		   if($split2[0] =~/^(\S+)_(\d+)\-(\d+)/){
			   ($seq2, $start2, $end2)=($1, $2, $3);

			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
			   # Check if the seqs are identicl (from the two arrays), no point to merge which are not identical from the first
			   #__________________________________________________________________________________________
			   if($seq1 eq $seq2){
					$diff_start=abs($start1-$start2);
					$diff_end  =abs($end1  -$end2  );
					$leng1=$end1-$start1;
					$leng2=$end2-$start2;
					if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }

					#~~~~~~ If the sum of overhangs are smaller than a third of average length
					if( ( ($diff_start+$diff_end)/2 <= $smaller_leng/$factor ) &&
						($smaller_leng > $leng_thresh ) ){
						@split=(@split1, @split2);
						if($optimize==1){ #~~~~~ optimize option removes similar seqlets
						   push(@result_all_seqlets, join(' ', sort @{&remove_similar_seqlets(\@split,
						                            $short_region, $large_region, $average_region )} ));
						}else{
						   push(@result_all_seqlets, join(' ', @{&remove_dup_in_array(\@split, 's')} ));
						}
						$i++;
						next;
					}else{
						push(@result_all_seqlets, join(' ', @split1));
						next;
					}
			   }
			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			   # As they are not teh same, lets just check the next one in @split2
			   #_____________________________________________________________________
			   else{
					push(@result_all_seqlets, join(' ', @split1));
					next;
			   }
		   }
		   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		   # If there is no range (region) in seq naem, let's skip, as there is no way to check
		   #__________________________________________________________________________________
		   else{
			   push(@result_all_seqlets, join(' ', @split1));
			   next;
		   }
	  }
   }
   return(\@result_all_seqlets);
}





#______________________________________________________________
# Title     : sort_by_digits_in_string
# Usage     :
# Function  : sorts arrays of strings like
#
#   MJ0228_314-573 MJ1197_348-601
#   MJ0228_451-576 sll0078_502-594 sll1425_489-611
#   MJ0228_479-572 sll0078_502-594
#
#   According to the digits after seq names _314-, _451-, _479-
#    in the above
#   This only looks at the very first sequence in the string
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.4
#--------------------------------------------------------------
sub sort_by_digits_in_string{
   my (@out, $i,  @temp1, @temp2, $old, @T);
   my @array_of_string=sort @{$_[0]};

   for($i=0; $i<= @array_of_string; $i++){
	  if($array_of_string[$i]=~/^((\S+)_(\d+)\-(\d+) *.*)$/){
		 unless(defined($old)){
			$old=$2;
			push(@temp1, $1);
			push(@temp2, $3);
		    next;
		 }elsif($2 eq $old){
			push(@temp1, $1);
			push(@temp2, $3);
			next;
		 }elsif( ($2 ne $old)||($i==$#array_of_string) ){
			&sort_and_put_strings_to_out;
		    push(@temp1, $1);
		    push(@temp2, $3);
			$old  =$2;

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			sub sort_and_put_strings_to_out{
			   my ($j, $k, $num);
			   @temp2=sort { $a<=>$b } @temp2; ## sort numerically
			   F1: for($j=0; $j< @temp2; $j++){
				  $num=$temp2[$j];
				  for($k=0; $k< @temp1; $k++){
					 if($temp1[$k]=~/^(\S+)_$num\-/){
						push(@out, $temp1[$k]);
						splice(@temp1, $k, 1);
						$k--;
						splice(@temp2, $j, 1);
						$j--;
						next F1;
					 }
				  }
			   }
			   @temp1=@temp2=();

			}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	     }
	  }elsif($i > 0){ ## for the very last sort
		  &sort_and_put_strings_to_out;
	  }
   }
   return(\@out);
}



#______________________________________________________________
# Title     : sort_words_in_string
# Usage     :
# Function  : sort words in strings sperated by ' ' or "\n"
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : sort_words_in_sequences, sort_sequences_in_string,
#             sort_strings_in_string,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub sort_words_in_string{
   my @in=@{$_[0]} || @_;
   my @OUT;
   for (@_){
	  push(@OUT, join(' ', sort split(/ +|\n/) ));
   }
   return(\@OUT);
}



#__________________________________________________________________________
# Title     : convert_hmmls_to_msp_files
# Usage     : @out=@{&convert_hmmls_to_msp_files(\@file)};
# Function  :
# Example   :
# Keywords  : convert_hmmls_to_msp
# Options   :
#   S=$single_out_file_name   for producing single msp file with all the hmmls contents
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
# Returns   :
# Argument  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub convert_hmmls_to_msp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@all_the_files_written, $written_file, $base,
		$out_msp_file_name, $input_hmmls_file,  %out);
	if($vars{'S'}=~/\S/){ $single_out_file_name=$vars{'S'} };
	if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'}   # default $enquiry_name is input file base
	}else{ $enquiry_name='HMM' }

	for($i=0; $i< @file; $i++){

		if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
		}else{
			$base=${&get_base_names($file[$i])};
		}
		$out_msp_file_name="$base\.msp";
		$input_hmmls_file=$file[$i];
		if($vars{'S'}=~/\S/){
			%out=(%out, %{&open_hmmls_files($input_hmmls_file, 'm', "E=$enquiry_name")} );
		}else{
			%out=%{&open_hmmls_files($input_hmmls_file, 'm')};  # m for msp out
			$written_file=${&write_msp_files(\%out, $out_msp_file_name)};
			push(@all_the_files_written, $written_file);
		}
	}

	if($vars{'S'}=~/\S/){
		$written_file=${&write_msp_files(\%out, $single_out_file_name)};
		push(@all_the_files_written, $written_file);
	}
	if(@all_the_files_written > 1){
		return(\@all_the_files_written);
	}else{
		return(\$all_the_files_written[0]);
	}
}

#______________________________________________________________
# Title     : convert_mmp_to_mrg
# Usage     :
# Function  :
# Example   :
#  Example OUT as string
#
#   slr1950 sll1920 sll0672 sll1076 sll1614 slr0797 slr0798 slr0822 slr1729
#   slr1729 sll1076 sll0672 sll1614 sll1920 slr0797 slr0798 slr0822 slr1950
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub convert_mmp_to_mrg{
   my @mmp=@{$_[0]};
   my($i, $long, $out, @out, $leading_seq);
   for($i=0; $i< @mmp; $i++){
	 if(($mmp[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/)&&($1 eq $2)){
		next;
	 }elsif($mmp[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
		$leading_seq=$1;
		$long=$2;
		$long=~s/\,/ /g;
		$out="$leading_seq $long";
		push(@out, $out);
	 }
   }
   return(\@out);
}

#_______________________________________________________________________________
# Title     : add_ranges_in_msp_line
# Usage     :
# Function  : this adds ranges to the seqnames of msp files
#             mmp line is msp line with additional sequences at the end
# Example   :
# Keywords  : convert_msp_to_mmp, convert_msp, convert_msp_2_mmp
#             change_msp_to_mmp, add_range_in_msp, convert_msp_line_to_mmp_line
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub add_ranges_in_msp_line{
   my $input_msp=${$_[0]} || $_[0];
   my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

   if($input_msp=~/^ *(\d+) +(\S+) *\S*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-8s %-5s %-5s %-32s %-5s %-5s %-32s",
					$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
   }
   return(\$new);
}


#______________________________________________________________
# Title     : convert_msp_line_to_mmp_line
# Usage     :
# Function  : this adds ranges to the seqnames of msp files
#             mmp line is msp line with additional sequences at the end
# Example   :
# Keywords  : convert_msp_to_mmp, convert_msp, convert_msp_2_mmp
#             change_msp_to_mmp, add_range_in_msp
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.5
#--------------------------------------------------------------
sub convert_msp_line_to_mmp_line{
   my $input_msp=${$_[0]} || $_[0];
   my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

   if($input_msp=~/^ *(\d+) +(\S+) *\S*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-8s %-5s %-5s %-32s %-5s %-5s %-32s",
					$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
   }
   return(\$new);
}

#________________________________________________________________________________
# Title     : merge_sequence_alignments
# Usage     : &merge_sequence_alignments(@seq);  while @seq has
#              @seq=(\%hash1, \%hash2);  while %hash1 and %hash2 have
#    %hash1=qw(seq1 ANN-NTMQQRRQQQRKRRRQQQSSSSTTST seq2 --NNN--QQ--QQQ--RRRR--SSSS--);
#    %hash2=qw(seq2 NN-QQQQQ--RRRR----SS--SS---    seq3 -NNXQQQXQRTRRRXTTSTSSMMSSTTT);
#
# Function  :
# Example   :
# Keywords  : combine_sequence_alignment, merge_sequence_alignment_pairs
#             merge_seq_alignment, make_interm_alignment, make_3_way_alignment
# Options   :
#    l=  for sequence block length by print_seq_in_block subroutine
#    t=  for specifying the length of seq names shown.
#    t   for truncating the seq names in printing out.
#    s   for sorting the final output lines (default anyway for print_seq_in_block)
#
# Version   : 1.2
#--------------------------------------------------------------------------------
sub merge_sequence_alignments{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@splited1_common, @splited2_common, @splited1_non_common, @splited2_non_common,
	   @gap_pos1, @gap_pos2, $block_length, $sort_seq_names,
	   $truncate_name_to_10_char, $trunc_name_to, $block_range );
	$block_length=60;
	if($vars{'l'}=~/\d+/){  $block_length=$vars{'l'}; }
	if($char_opt=~/t/){    $truncate_name_to_10_char='t' }
	if($vars{'t'}=~/\d+/){ $trunc_name_to=$vars{'t'}; }
	if($char_opt=~/s/){    $sort_seq_names='s' }
	if($vars{'r'}=~/(\d+\-\d+)/){ $block_range= $1 };

	for($i=0; $i< @hash; $i+=2){
		my %hash1=%{$hash[$i]};
		my %hash2=%{$hash[$i+1]};
		my ($pair1_name1, $pair1_name2)=keys  %hash1;
		my ($pair2_name1, $pair2_name2)=keys  %hash2;

		# finding the common entry
		if("$pair1_name1" eq "$pair2_name1"){
			 $common_seq_entry=$pair1_name1;
			 $non_common_seq_entry1=$pair1_name2;
			 $non_common_seq_entry2=$pair2_name2;
		}elsif("$pair1_name1" eq "$pair2_name2"){
			 $common_seq_entry=$pair2_name2;
			 $non_common_seq_entry1=$pair1_name2;
			 $non_common_seq_entry2=$pair2_name1;
		}elsif("$pair1_name2" eq "$pair2_name1"){
			 $common_seq_entry=$pair1_name2;
			 $non_common_seq_entry1=$pair1_name1;
			 $non_common_seq_entry2=$pair2_name2;
		}else{
			 print "\n# merge_sequence_alignments:
			 ERROR, I can not find common seq entry: $pair1_name1 $pair2_name1 $pair2_name1 $pair2_name2\n";
			 next;
		}

		%hash1=%{&make_seq_alignment_length_even(\%hash1)};
		%hash2=%{&make_seq_alignment_length_even(\%hash2)};


		#~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
		# Aligning the start of common sequences
		#  i.e. putting pre gap part to a sequence which does not have it.
		#
		#  NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
		#  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
		#  becomes->
		#
		#  --------------NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
		#  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
		#________________________________________________________________________
		if( $hash1{$common_seq_entry}=~/^(\-+)/){
			$hash2{$common_seq_entry}="$1".$hash2{$common_seq_entry};
			$hash2{$non_common_seq_entry2}="$1".$hash2{$non_common_seq_entry2};
		}elsif($hash2{$common_seq_entry}=~/^(\-+)/){
			$hash1{$common_seq_entry}="$1".$hash1{$common_seq_entry};
			$hash1{$non_common_seq_entry1}="$1".$hash1{$non_common_seq_entry1};
		}

		@gap_pos1=@{&get_gap_positions(\$hash1{$common_seq_entry}, 'p' )}; # p means all positive positions wanted
		@gap_pos2=@{&get_gap_positions(\$hash2{$common_seq_entry}, 'p' )};

		@splited1_common    =split(//, $hash1{$common_seq_entry} );
		@splited1_non_common=split(//, $hash1{$non_common_seq_entry1} );
		@splited2_common    =split(//, $hash2{$common_seq_entry} );
		@splited2_non_common=split(//, $hash2{$non_common_seq_entry2} );

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		#  Core algorithm
		#__________________________________________
		for($i=0; $i< @splited1_common; $i++){
		    if($splited1_common[$i] ne $splited2_common[$i]){
				if($splited1_common[$i]=~/\W/){
					 splice(@splited2_common, $i, 0, '-');
					 splice(@splited2_non_common, $i, 0, '-');
				}elsif($splited2_common[$i]=~/\W/){
				     splice(@splited1_common, $i, 0, '-');
					 splice(@splited1_non_common, $i, 0, '-');
					 if($splited1_common[$i] eq $splited1_non_common[$i]){
						 $homology_line2[$i]=':';
					 }else{
						 $homology_line2[$i]=' ';
					 }
				}
		    }
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Making Homology line
		#_____________________________________________
		for($i=0; $i< @splited1_non_common; $i++){
			if($splited1_non_common[$i] eq '-'){ $homology_line1[$i]=' '; }
			if($splited1_non_common[$i] eq $splited1_common[$i] and $splited1_non_common[$i] eq $splited2_non_common[$i] ){
				$homology_line1[$i]= $splited1_non_common[$i]  unless $splited1_non_common[$i] eq '-';
			}elsif($splited1_non_common[$i] eq $splited1_common[$i] ){
				$homology_line1[$i]=$splited1_non_common[$i] unless $splited1_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited1_non_common[$i]){
				$homology_line1[$i]=':' unless $splited1_non_common[$i] eq '-';
			}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$i], $splited1_non_common[$i])} ){
				$homology_line1[$i]='.' unless $splited1_non_common[$i] eq '-';
			}
			else{ $homology_line1[$i]=' '; }

			if($splited2_non_common[$i] eq '-'){ $homology_line2[$i]=' '; }
			if($splited2_non_common[$i] eq $splited2_common[$i] and $splited2_non_common[$i] eq $splited1_non_common[$i] ){
				$homology_line2[$i]=$splited2_non_common[$i]  unless $splited2_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited2_common[$i]){
				$homology_line2[$i]=$splited2_non_common[$i] unless $splited2_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited1_non_common[$i]){
				$homology_line2[$i]=':' unless $splited2_non_common[$i] eq '-';
			}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$i], $splited1_non_common[$i])} ){
				$homology_line2[$i]='.' unless $splited1_non_common[$i] eq '-';
			}else{ $homology_line2[$i]=' '; }

		}
		if($verbose){
			 print @splited1_non_common, " \t splited1_non_common\n";
			 #print @homology_line1,      " \t homology line\n";
			 #print @splited1_common,    " \t splited1_common\n";
			 print @homology_line1,      " \t homology line\n";
			 print @splited2_common,     " \t splited2_common\n";
			 print @homology_line2,      " \t homology line\n";
			 print @splited2_non_common, " \t splited2_non_common\n";
		}
		$out_hash_issa{"1 $non_common_seq_entry1 "}=join('', @splited1_non_common);
		$out_hash_issa{"3 $common_seq_entry"}=join('', @splited2_common);
		$out_hash_issa{'2 homol_line1'}=join('', @homology_line1);
		$out_hash_issa{'4 homol_line2'}=join('', @homology_line2);
		$out_hash_issa{"5 $non_common_seq_entry2"}=join('', @splited2_non_common);
		&print_seq_in_block(\%out_hash_issa, $sort_seq_names,  "t=$trunc_name_to", "f=defaul_result\.issa",
		                      $truncate_name_to_10_char, "l=$block_length", "r=$block_range");
	}
}





#________________________________________________________________________________________
# Title     : merge_sequence_in_msp_file
# Usage     :
# Function  :
# Example   : INPUT: (MSP file) ===>
#  59     2.6        47    64     d2pia_3                    10    30     d1erd___10-30
#  161    1.1e-07    24    91     d2pia_3                    16    85     d1frd___16-85
#
#  722    0          1     106    d1put__                    1     106    d1put___1-106
#  66     4.9        2     68     d1put__                    43    106    d2lbp___43-106
#  69     1.3        12    49     d1put__                    81    120    d1cgo___81-120
#
#  60     3.3        13    38     d1frd__                    32    57     d1orda1_32-57
#  65     1.7        21    58     d1frd__                    40    69     d2mtac__40-69
#
#   ==== OUTPUT ===>
#    d1frd___1-98 d1frd___1-98_1-98 d1frd___16-85 d2pia_3_24-91_24-91
#    d1frd___16-85_16-85 d2pia_3_24-91
#    d1put___1-106 d1put___1-106_1-106
#    d2pia_3_1-98 d2pia_3_1-98_1-98
#
# Keywords  : mergr_seq_in_msp_file, merge_sequence_in_msp
# Options   :
#  $short_region   =  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region   =  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region =  A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 2.0
#----------------------------------------------------------------------------------------
sub merge_sequence_in_msp_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($msp_value, @all_seqlets, %temp_hash, @msp_chunks, $clu_out, $size_of_all_seqlets,
	    $base, $optimize, $mrg_out, @arr, $sat_out, %final_hash_out, @final_pre_hash,
	    $thresh, $merge, $factor, $evalue, $score,
		$short_region, $large_region, $average_region);
	$factor=$default_factor=4; #~~~~~~~ default connection factor U
	$thresh=30;
	$evalue=10;
	$score =75;

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Following changes the defaults with given parameters
	#_____________________________________________________________
	if($char_opt=~/o/i){	   $optimize='o';    ## This will cause using remove_similar_seqlets than remove_dup_in_array !
	}if($char_opt=~/m/){	   $merge='m';
	}if($char_opt=~/v/){	   $verbose='v';
	}if($char_opt=~/S/){       $short_region='S';
	}if($char_opt=~/L/){	   $large_region='L';
	}if($char_opt=~/A/){	   $average_region='A';
	}if($vars{'t'}=~/\d+/){	   $thresh=$vars{'t'};
	}if($vars{'f'}=~/\d+/){	   $factor=$vars{'f'};  ## Here I give a generous $factor !
	}if($vars{'s'}=~/\d+/){	   $score = $vars{'s'};
	}if($vars{'e'}=~/\d+/){	   $evalue= $vars{'e'};	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Just to inform what parameters have been chosen
	#_____________________________________________________________
	print "\n# merge_sequence_in_msp_file : default \$score  : $score";
	print "\n#                            : default \$evalue : $evalue";
	print "\n#                            : used    \$thresh : $thresh";
	print "\n#                            : default \$factor : $default_factor";
	print "\n#                            : used    \$factor : $factor\n";

   	for($c=0; $c< @file; $c++){
		open(MSP, "$file[$c]");
		$base=${&get_base_names($file[$c])};
		$clu_out="$base.clu"; # <-- This is the most important output. Sarah's program will process this
		$sat_out="$base.sat";
		print "\n# (1) merge_sequence_in_msp_file : processing $file[$c] for $clu_out\n";
		my @msp1=<MSP>;

		for($i=0; $i< @msp1; $i++){
			#~~~~~~~~~~ Include range or NOT in the seq name ~~~~~~~~~~~~~~~~~~~~~~~~~~`
			# %temp_hash is just to get the chunk of MSP block. As msp file uses empty line as a delimiter
			#____________________________________________________________________________
			if($char_opt=~/r/){
				if($msp1[$i]=~/^ *(\d+) +(\S+) *\S* +(\d+) +(\d+) +(\S+)[_\d+\-\d+]?[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[_\d+\-\d+]?/){
					 if($1 < $score or $2 > $evalue){ next };
					 $new_seq1="$5\_$3\-$4";
					 $new_seq2="$8\_$6\-$7";
					 $msp1[$i]=sprintf("%-6s %-8s %-5s %-5s %-40s %-5s %-5s %-40s",
										$1, $2, $3, $4, $new_seq1, $6, $7, $new_seq2);
					 $temp_hash{$5}.="$msp1[$i]\n";
				}
			}else{
				if($msp1[$i]=~/^ *(\d+)[ \t]+(\S+)[ \t]*\S*[ \t]+\d+[ \t]+\d+[ \t]+(\S+) +\d+[\t ]+\d+[ \t]+\S+/){
					 if($1 < $score or $2 > $evalue){ next };
					 $temp_hash{$3}.="$msp1[$i]\n";
				}
			}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		}
		close(MSP);
	}
	@msp_chunks= values(%temp_hash); ## Using temp hash is more than 2 times faster than push

	for($i=0; $i< @msp_chunks; $i++){
	   @arr=@{&merge_sequence_in_msp_chunk($msp_chunks[$i], $verbose, $optimize,
	         "$merge", "e=$evalue", "s=$score", "f=$factor", "t=$thresh",
	         $short_region, $large_region, $average_region)};
	   push(@all_seqlets,  @arr);
	}

	#~~~~~~~~~ sorting inner sequences in strings ~~~~~~~~~
	#______________________________________________________
	@all_seqlets=@{&sort_words_in_string(@all_seqlets)}; ## This speeds up about 2 times !!!

	#~~~~~~~ Sort by the _digit-  in seqlet names ~~~~~~~~~
	@all_seqlets= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2]  }
			             map {/^ *((\S+)_(\d+)\-(\d+).*)/ && [$1, $2, $3, $4]} @all_seqlets;

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# merge sequences in a simple way until there is no change in the array
	#  This is an incomplete merge(merges first seqlets of string ...
	#______________________________________________________________________
	for($i=0; $i< @msp_chunks; $i ++){
		$size_of_all_seqlets=@all_seqlets;
		@all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
		                 $short_region, $large_region, $average_region, "f=$factor")};
		if($size_of_all_seqlets > @all_seqlets){
		   @all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
		                 $short_region, $large_region, $average_region, "f=$factor")};
		}else{
		   last;
		}
	}
	if($optimize==1){
		@all_seqlets=@{&remove_similar_seqlets(\@all_seqlets,
		                $short_region, $large_region, $average_region)};
	}else{
		@all_seqlets=@{&remove_dup_in_array(\@all_seqlets)};
	}
	return(\@all_seqlets);
}



#______________________________________________________________
# Title     : merge_sequence_in_msp_chunk
# Usage     :
# Function  : merges sequences which are linked by common regions
#             This filters the sequences by evalue and ssearch score
#             This is the main algorithm of merging similar sequences.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : connect_sequence_in_msp, link_sequence_in_msp_chunk
#             connect_sequence_in_msp_chunk, link_sequence_in_msp
#             merge_sequence, link_sequence, connect_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             m  for merge file output format (.mrg)
#             t= for threshold of seqlet length eg)  "t=30"
#             f= for overlap factor (usually between 2 to 7 )
#                 2 means, if the two regions are not overlapped
#                  by more than HALF of of the smaller region
#                  it will not regard as common seqlet block
#             s= for ssearch score minimum
#             e= for ssearch e value maximum
#             S  for S -S  # taking shorter region overlapped in removing similar regions
#             L  for L -L  # taking larger  region overlapped in removing similar regions
#             A  for A -A # taking average region overlapped in removing similar regions
#
# Returns   :
# Argument  :
# Version   : 2.2
#--------------------------------------------------------------
sub merge_sequence_in_msp_chunk{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($ssearch_score2, $evalue_found2, $evalue_found1, $ssearch_score1, $optimize );
   my ($L, %out_hash, @out, $LL, @Final_out, $verbose, $final_factor, $R_diff,
		$short_region, $large_region, $average_region);
   my $factor =6; # default factor for around 30% sequence mis-overlap is the threshold for common block
	  #~~~~~~~~~~~~~~ The lower the factor the larger clustering will occur ~~~~~~~~~~~~
   my $score  =105; # default ssearch score. seq below this will be chucked out
   my $evalue =40; # default maximum e value used. Seq higher than this will be thrown out
   my $thresh =40; # sequence length threshold. overlap less than this will be ignored

   if($char_opt=~/v/){     $verbose = 'v'
   }if($char_opt=~/o/){    $optimize = 'o'
   }if($char_opt=~/S/){    $short_region='S';
   }if($char_opt=~/L/){	   $large_region='L';
   }if($char_opt=~/A/){	   $average_region='A'; }

   if($vars{'t'}=~/\d+/){
	  $thresh=$vars{'t'}; print "\n# merge_sequence_in_msp_chunk: Thresh is $thresh\n" if (defined $verbose);
   }if($vars{'f'}=~/\d+/){
	  $factor=$vars{'f'}; print "\n# merge_sequence_in_msp_chunk: Factor is $factor\n" if (defined $verbose);
   }if($vars{'s'}=~/\d+/){
	  $score = $vars{'s'}; print "\n# merge_sequence_in_msp_chunk: Score is $score\n" if (defined $verbose);
   }if($vars{'e'}=~/\d+/){
	  $evalue= $vars{'e'}; print "\n# merge_sequence_in_msp_chunk: Evalue is $evalue\n" if (defined $verbose);
   }
   my @seqlets=split(/\n+/, (${$_[0]} || $_[0]) );

   F1: for($i=0; $i < @seqlets; $i ++){
	  if($seqlets[$i]=~/^ *((\d+) +(\d+\.?[e\-\d]*) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+)) +(\S+) *(.*)/){
		  if($6 eq $9){ splice(@seqlets, $i, 1); $i--; next };
		  ($long_match1, $enq_seq1, $mat_seq1, $R_start1, $R_end1 )=($1, $6, $9, $4, $5);
		  $R_leng1=$R_end1-$R_start1;
		  $ssearch_score1= $2;
		  $evalue_found1 = $3;
	  }
	  if( ($R_leng1 < $thresh) || ($ssearch_score1 < $score) ){ splice(@seqlets, $i, 1); $i--; next; }
	  if( $evalue_found1 > $evalue){ splice(@seqlets, $i, 1); $i--; next; }

	  F2: for($j=0; $j < @seqlets; $j ++){
		 if($seqlets[$i] eq $seqlets[$j]){ next };
		 if($seqlets[$j]=~/^ *((\d+) +(\d+\.?[e\-\d]*) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+)) +(\S+) *(.*)/){
			($long_match2, $enq_seq2, $mat_seq2, $R_start2, $R_end2)=($1, $6, $9, $4, $5);
			$R_leng2=$R_end2-$R_start2;
			$ssearch_score2=$2;
			$evalue_found2= $3;
	     }
		 if( ($R_leng2 < $thresh)||($ssearch_score2 < $score) ){ splice(@seqlets, $j, 1); $j--; next; }
		 if( $evalue_found2 > $evalue){ splice(@seqlets, $j, 1); $j--; next; }

		 $R_diff=abs($R_leng1-$R_leng2)/2;   ## <<<---- Note it is div by 2

		 if($R_leng2 < $R_leng1){ $smaller_leng=$R_leng2; }else{ $smaller_leng=$R_leng1; }

		 $Start_diff=abs($R_start1-$R_start2)/2; ## <<<---- Note it is div by 2
		 $final_factor=$smaller_leng/$factor;


		 #~~~~~~~~~~ If average R_diff and average Start_diff are less then 1/7 of the smaller seqlet
		 #~~~~~~~~~~ we regard they are same selqets
		 if(( $R_diff < $final_factor ) &&       ### $Start_diff is essential!
			($Start_diff < $final_factor ) ){  ### if diff is less than around 30% of the smaller length
			if($verbose=~/v/){
			   print "\n\$R_diff:$R_diff \$Start_diff:$Start_diff $smaller_leng $final_factor $factor";
			}
			if($R_leng2 >= $R_leng1){
			       #~~~~~ $mat_seq1 or $mat_seq2 can increase to 'slr1453,sll0238', so you need ',' in the middle only
				   $extended_name="$mat_seq2,$mat_seq1";
				   $L=length($extended_name);
				   $LL=length($long_match2)+2;
				   $seqlets[$i]= sprintf("%-${LL}s %-${L}s", $long_match2, $extended_name);
				   splice(@seqlets, $j, 1);
				   $i-- unless($i==0);
				   $j--;
				   next F1;
			}elsif( $R_leng1 >= $R_leng2){  ## chooses the bigger range seq
				   $extended_name="$mat_seq1,$mat_seq2"; # must be ',' not ' '
				   $L=length($extended_name);
				   $LL=length($long_match1)+2;
				   $seqlets[$i]=sprintf("%-${LL}s %-${L}s", $long_match1, $extended_name);
				   splice(@seqlets, $j, 1);
				   $i-- unless($i <= 0);
				   $j--;
				   next F1;
			}
	     }else{
			next F2;
		 }
	  }
   }
   if($char_opt=~/m/){
	  for($i=0; $i< @seqlets; $i++){
		if($seqlets[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
		   if($1 eq $2){ next }
		   $leading_seq=$1; $long=$2; $long=~s/\,/ /g;
		   push(@Final_out, "$leading_seq $long" );
		}
	  }
   }
   sort @Final_out;
   return(\@Final_out);
}



#______________________________________________________________
# Title     : get_overlapping_range
# Usage     : @n1=@{&get_overlapping_range(\@ranges1, \@ranges2)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_overlapping_range_in_msp, get_overlapping_range_in_msp_file
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_overlapping_range{
   my (@new_range, $R_start1, $R_start2);
   ($R_start1, $R_end1)=@{$_[0]}[0..1];
   ($R_start2, $R_end2)=@{$_[1]}[0..1];

   if(($R_start1 <= $R_start2)&&        # ------------
	 ( $R_end1 >= $R_end2) ){           #   -------
	   @new_range= ($R_start2, $R_end2);
   }elsif(($R_start1 <= $R_start2)&&    # -----------
	 ( $R_end1 <= $R_end2) &&           #    -----------
	 ( $R_end1 >  $R_start2) ){
	   @new_range= ($R_start2, $R_end1);
   }elsif(($R_start1 >= $R_start2)&&    #    -----------
	 ( $R_end1 >= $R_end2  ) &&         # -----------
	 ( $R_end2 >  $R_start1) ){
	   @new_range= ($R_start1, $R_end2);
   }elsif(($R_start1 >= $R_start2)&&    #   ------
	 ( $R_end1 <= $R_end2) ){           # -----------
	   @new_range= ($R_start1, $R_end1);
   }else{                                #  ----
	  @new_range=(0,0);                  #        --------
   }
   return(\@new_range);
}



#______________________________________________________________
# Title     : find_central_seq_msp_chunk
# Usage     : This finds the correct msp chunk with given seq name
#             and big original or any msp chunk
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub find_central_seq_msp_chunk{
   my $central_seq=${$_[0]};
   my @MSP=@{$_[1]};
   my ($j, $range, @MSP_1);
   for($j=0; $j<@MSP; $j++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($MSP[$j]=~/^ *(\d+) +\d+\.*\d* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) +(.+)/i){
		 if( ($4 eq $central_seq) && ($4 ne $7) ){
			$range="$2 \- $3";
			push(@MSP_1,  $range);
		 }
	  }
   }
   return(\@MSP_1);
}



#______________________________________________________________
# Title     : find_central_sequence
# Usage     :
# Function  : accepts msp file and finds the central sequence.
#             central sequence is in the centre of all the member
#             sequences in a group or cluster
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub find_central_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 3.9 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

   my (%score, $out, @Keys);
   for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];

	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_msp_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  open(FILE_1,"$input_file");
	  @MSP=<FILE_1>;

	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                   $1                 $2     $3    $4      $5     $6    $7     $8
		 #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) +(.+)/){
			if($4 eq $7){
			   $seq_name=$7;
			}elsif( ($4 ne $7) && ( defined($seq_name) ) ){
			   $score{$seq_name}+= $1;
			}
		 }
	  }
   }
   @Keys=keys %score;
   for($i=0; $i< @Keys; $i++){
	  if($score{$Keys[$i]} > $largest){
		 $largest=$score{$Keys[$i]};
		 $out=$Keys[$i];
	  }
   }
   return(\$out);
}



#______________________________________________________________
# Title     : write_dof_files
# Usage     : &write_dof_files(\@msps);
#             while @msps means msp file names
# Function  : write Alex's domfam file. it prints out tilde lines
#             if the seqlet matched are below threshold defined.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             v  for verbose STDOUT
#             n  for NO seq start and end number display
#             t= for teshold (eg, t=40  for Blastp(or ssearch) score 40 threshold)
#
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------
sub write_dof_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my $thresh = 0;
   my @msps=@file;
   my (@msp_name, $msp_file, @list, $leng, $scale);
   $thresh=$vars{'t'} if defined( $vars{'t'} ) ;
   $scale =$vars{'s'};

   for($i=0; $i < @msps; $i++){ ### @msps should have msp file names
	  my($x, $leng, $line_size, $o, $I_len, $SC, %count);
	  my @msp_name=split(/\./, $msps[$i]);
	  my $base=$msp_name[0];
	  my $out_dof_file="$base\_$thresh\.dof";
	  open(DOF, ">$out_dof_file");
	  open(MSP, "$msps[$i]");
	  my @output=<MSP>;

	  ###### Getting automatic $scale ~~~~~~~~~~~~~~~~~~~~~~~
	  unless($scale=~/\d+/){
		 for($j=0; $j< @output; $j++){
			if($output[$j]=~/^ *\S+[\t ]+\S+[\t ]+1[\t ]+(\d+)[\t ]+\S+/){
			   $leng=$1 if ($1 > $leng);
			}
		 }
		 if($leng > 1300){ $scale = 20;
		 }else{
		    $scale=int($leng / (log($leng)*10) );
		 }
		 if($scale < 5){
			$scale=5;
		 }
	  }

	  if($output[0]=~/^$/){ splice(@output, 0, 1); }
	  if($remove=~/r/){ shift(@output); }  ## removing the first line

	  #######======  Drawing the top line ###########
	  $line_size=int($leng/$scale);

	  #######======  SCALE writing =======###########
	  print DOF "\n NAME       LENG  FROM- TO  ";
	  print     "\n NAME       LENG  FROM- TO  "  if ($char_opt=~/v/);
	  my $div=int($leng/$scale);
	  my $Scaled=int($div/$scale);
	  for($x=1; $x< $leng; $x+=$div){
		 $I_len=length($x);
		 $SC=$Scaled-$I_len;
		 print DOF $x."."x$SC;
		 print     $x."."x$SC if ($char_opt=~/v/);
	  }
	  #####~~~~ Processing MSP file lines ~~~~~~~~
	  for ($o=0; $o< @output; $o++){
		 my $each_msp_line=$output[$o];
		 my ($line,$score, $start1, $end1, $query, $start2, $end2,
		     $put_blank_line, $no_num, $target_seq,$first_time,
		     $S2L, $E2L, $L);
		 if($each_msp_line =~/^$/){
			print DOF "\n";
			print "\n"  if ($char_opt=~/v/);
			$first_time=1;
			next;
		 }elsif($each_msp_line =~/^ *(\d+)[ \t]+\S+[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
			$score=$1;
		    #if($score < $thresh){
		    #   print "\n>$4 score too low"; next;
		    #}
			$start1    =$2;
			$end1      =$3;
			$query     =$4;
			$start2    =$5;
			$end2      =$6;
			$target_seq=$7;
			$target_leng=length($fasta{$target_seq});
			if($count{$target_seq} >= 1){
			   $first_time=0;
			   $put_blank_line=0;
			}elsif($count{$target_seq} < 1){
			   $first_time=1;
			}
			$count{$target_seq}++;
		 }
		 my $S=int($start1/$scale);
		 my $E=int($end1/$scale);
		 $L=$E-$S+1;
		 if($char_opt=~/n/i){
			  if($score < $thresh){
				 $line=" "x$S."\~"x$L;
			  }else{
				 $line=" "x$S."\-"x$L;
			  }
		 }else{
			  $S2L=length($start2);
			  $E2L=length($end2);
			  $L=$L-$S2L-$E2L;
			  if($L < 1){ $L=1 }
			  if($score < $thresh){
				 $line=" "x$S.$start2."\~"x$L.$end2;
			  }else{
				 $line=" "x$S.$start2."\-"x$L.$end2;
			  }
		 }

		 ### Actual writing ####
		 if($first_time==1){
			$first_time=0;
				#    Name leng strt-end   |---------------------------------------------------------
			  printf DOF "\n\>%-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1, $line;
			  printf  ("\n\>%-11s %-4d %-4d\-%4d %-${line_size}s",
			   $target_seq, $target_leng, $start1, $end1, $line)  if ($char_opt=~/v/);
		 }elsif($first_time !=1){
			if($put_blank_line==1){
			  print DOF "\n";
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  print  "\n"  if ($char_opt=~/v/);
			  printf  ("\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				 $target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }else{
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  printf ( "\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				$target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }
		 }

	  }
	  print DOF "\n\n";
	  print "\n"  if ($char_opt=~/v/);
	  print "\n# ~~~~~ lines mean match regions with below threshold ($thresh)" if $thresh > $score;
	  print "\n# $out_dof_file   is created \n";
   }

}



#______________________________________________________________
# Title     : make_filtered_list
# Usage     :
# Function  : this is the core of check_genome_cluster.pl
#             finds good linkage seqlets in msp files
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub make_filtered_list{ #####################################33
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   $|=1;
   my ($result, @msp,%temp_msp, @num_of_all_links,$link_counter,$diff1, $diff2);
   my $num_seq=0;
   my @sizes_of_seqlets;
   open(MSP, "$file[0]");
   FIRST_FOR:for($c=0; $c< @file; $c++){
	 my %temp_msp;
	 #print "\nFirst\(${c}\)th INPUT file processing\n";
	 my @msp1=<MSP>;
	 my @msp, $MSP;
	 for($i=0; $i< @msp1; $i++){
	   if($msp1[$i]=~/^ *(\d+) +\d+\.?[e\-\d]*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)/i){
		  if($1 > $msp_value){
		    $temp_msp{$4} .= $msp1[$i];
		  }
	   }
	 }
	 close(MSP);
	 #print "\n", %temp_msp, "\n";

	 @msp=values %temp_msp;
	 $num_seq=@msp;
	 print "\nTOP Number of seq is: $num_seq \n";

	 #""""""  Real algorithm starts HERE##''''''''''''''''''''''''''''''''''''''''

	 # optimization: I found no need to go through all msp chunk. One is enough by experience
	 #MSP1: for($i=0; $i< @msp; $i++){ # @msp has (mspchunk1, mspchunk2...)
	 if($fast==1){ $msp_chunk_num=1
	 }else{ $msp_chunk_num=@msp }

	 MSP1: for($i=0; $i< $msp_chunk_num; $i++){ # @msp has (mspchunk1, mspchunk2...)
 	    my $pos=$i+1;
	    my @seqlets1=split(/\n+/,  $msp[$i]);
	    print "   MSP1 ${i}th  MSP chunk is handled #######","\n";
	    my $temp=@seqlets1-1;
	    SEQLET1: for($j=1; $j < @seqlets1; $j++){
		   my @OUTPUT=&follow_seqlet_link($seqlets1[$j], @msp);
		   my $depth_of_linking=${$OUTPUT[0]};
		   my $size_of_common_seqlet=${$OUTPUT[1]};
		   if(($depth_of_linking==@msp)&&($size_of_common_seqlet > $threshold)){
		      $result=1;
		      if($fast == 1){
				 last FIRST_FOR;
		         #goto EXIT_1;
		      }
		   }
		   #push(@num_of_all_links, $depth_of_linking);
		   #push(@sizes_of_seqlets, $size_of_common_seqlet);
	    }
	 }
   }
   #print "\n All searched links: \n", "@num_of_all_links", "\n";
   #print "\n Sizes of common seqlet\n", "@sizes_of_seqlets\n";
   EXIT_1:
   return(\$result, \$num_seq);
}


#______________________________________________________________
# Title     : follow_seqlet_link
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub follow_seqlet_link{
   my $seqlet_line=shift;
   my @msp=@_;
   my ($i, $j, $link_counter, @common_range,$seqlet_very_ori, @ranges_very_ori,
	  @new_ranges, $seqlet_ori, $matched_ori, @ranges1, @ranges2);
   if($seqlet_line=~/^ *\d+[ \t]+\d+\.?[e\-\d]*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)/i){
	  $seqlet_ori =$3;
	  $seqlet_very_ori=$3;
	  $matched_ori=$6;
	  @ranges1=($1, $2, $4, $5);
	  @ranges_very_ori=@ranges1;
   }
   my $link_counter;
   my $visited .= "$seqlet_ori ";
   #print "\n\=======$seqlet_ori \@ranges1 is @ranges1 =================================\n";

   MSP: for($i=0; $i<@msp; $i++){
	  my @seqlets1=split(/\n+/,  $msp[$i]);
	  #my @common_range;
	  SEQLET1: for($j=1; $j < @seqlets1; $j++){
	     my $seqlet_num=$j;
	     my @temp= &get_msp_range($seqlets1[$j]) if $seqlets1[$j]=~/\S/;
	     my @ranges2=@{$temp[0]}; # has (1 2 3 4);
	     my $seq2        = $temp[1];
	     my $matched_seq2= $temp[2];
	     #splice(@seqlets1, $j, 1);
	     #$j--;
		 if($seq2 eq $seqlet_ori){
		    next MSP; # to the next MSP chunk
		 }elsif(($seq2 eq $matched_ori)&&($visited !~ /$matched_seq2/)){
			if($verbose=~/v$/i){
	          print "\n    SEQLET1_________________________________________\(${pos}th MSP chunk\)\n";
	          print "    SEQLET1 $temp number of seqlets  for the ${pos}th msp file\n";
			  print "    SEQLET1_________________________________________\(${seqlet_num}th seqlet )\n";
			  print "    QUERY seqlet is $temp[1]: ", "@ranges2[0..1]  MATCH seqlet is $temp[2]: ", "@ranges2[2..3]","\n";
			  print "    Target seqlet is $seq2 : @ranges2\n";
			}
			if(($ranges1[2] >= $ranges2[0])&&    ##     =======
			   ($ranges1[3] <= $ranges2[1]) ){   ##  ==============
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n      MSP2      \@ranges1 is ", "@ranges1" if($verbose=~/v/i);
			   #print "\n                \@ranges2 is ", "@ranges2" if($verbose=~/v/i);
			   $diff1=$ranges1[2] - $ranges2[0];
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges =($ranges1[2], $ranges1[3], ($ranges2[2]+$diff1), ($ranges2[3]-$diff2) );
			   @ranges1=(@new_ranges); #, $ranges2[2], $ranges2[3]);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   if($verbose=~/v/i){
			      print "\n      FIRST elsif    Finalout @new_ranges \$link_counter=$link_counter", "\n";
			      print "                                         \$num_seq = $num_seq\n";
			   }
			   if( ($link_counter+2) >= @msp){
				  #print  "\n   All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     Common range: ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&& ##    --------------
				   ($ranges1[3] >= $ranges2[1]) ){ ##       --------
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   @new_ranges =($ranges2[0], $ranges2[1], $ranges2[2], $ranges2[3],);
			   #print "\n      Second elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";

			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&&   #    ======
				   ($ranges1[3] <= $ranges2[1]) &&   ##      =======
				   ($ranges1[3] >= $ranges2[0]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   #print "                                         \$num_seq = $num_seq\n";
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges=($ranges2[0], $ranges1[3],  $ranges2[2], ($ranges2[3]-$diff2));
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Third elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] >= $ranges2[0])&&  #        ======
				  ($ranges1[3] >= $ranges2[1])&&   ##  =======
				  ($ranges1[2] <= $ranges2[1]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   $diff1=$ranges1[2] - $ranges2[0];
			   @new_ranges=($ranges1[2], $ranges2[1], ($ranges2[0]+$diff1), $ranges2[1]);
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Fourth elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}else{
			   if($verbose=~/v/i){
			      print "\nX X X X X   Link broken ", @new_ranges, " \$link_counter=$link_counter\n";
			   }
			   next SEQLET1;
			}
		 }
	  }
   }
   EXIT:
   $final_num_of_seq_linked=$link_counter+2;
   if($final_num_of_seq_linked==@msp){
	  $seqlet_leng=$new_ranges[$#common_range]-$new_ranges[$#common_range-1]+1;
	  #print "\n Common Seqlet size:  $seqlet_leng \n";
   }else{
	  $seqlet_leng=0;
   }
   #print "\nLINKING seq num for seqlet $seqlet_very_ori \(","@ranges_very_ori","\) is $final_num_of_seq_linked \n";
   return(\$final_num_of_seq_linked, \$seqlet_leng);
}


#______________________________________________________________________________
# Title     : sso_to_msp
# Usage     : &sso_to_msp(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &sso_to_msp(@ARGV, 'OUT.msp', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.msp. xxxxx.sso -> XXXX.sso
# Keywords  : sso_file_to_msp_file, convert_sso_to_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.msp"
#             n  for new format (msp2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#
# Returns   : the file names created (xxxx.msp, yyyy.msp,,,,)
# Argument  :
# Version   : 2.6
#-----------------------------------------------------------------------------
sub sso_to_msp{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
   my (%sso, @sso, @SSO, $big_out_msp1,  @final_out, $big_out_msp2,
	   $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_msp,
	   $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
	if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
	if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
	if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
	if($char_opt=~/r/){   $add_range = 'r' }
	if($char_opt=~/c/){   $create_sso = 'c' }
	if($char_opt=~/s/){   $single_out_opt='s' }
	if($char_opt=~/n/){   $new_format='n' }
   print "\n# File given to sso_to_msp is \"@file\", Normally xxx.sso file names\n";

   if($single_file_name=~/\S/){
	   $big_out_msp=$single_file_name;
   }else{
	   for($i=0; $i < @file; $i++){
		   if($file[$i]=~/\.msp$/){ ## when output file name is given
			   $big_out_msp=$file[$i];
			   splice(@file, $i, 1);
			   $i--;
		   }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.msp file name from xxxx.sso
			   $big_out_msp1="\U$1"."$2"."\.msp";
			   $big_out_msp2="\U$1".".msp";
		   }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# sso_to_msp: File matched  xxxx.sso  format \n";
		   }elsif($file[$i]=~/^(\S+)\.out$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# sso_to_msp: File matched  xxxx.out  format \n";
		   }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }
	   }
   }
   if(defined($big_out_msp)){
	   $big_out_msp1=$big_out_msp2=$big_out_msp;
	   print "\n# \$big_out_msp is defined as \'$big_out_msp\'\n";
   }else{
	   print "\n# sso_to_msp: You did not define the big MSP file out format, so $big_out_msp1 \n";
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  (1) When File was given to this sub routine
   #__________________________________________
   if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_msp1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_msp")};
	  push(@final_out, &write_msp_files(@sso, $big_out_msp1,
	        $single_out_opt, $add_range) );

   }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  (2) When NO File but ARRAY is given
   #      Here, you can have SSO files created
   #__________________________________________
   elsif(@array >=1){
	  print "\n# In sso_to_msp, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp,
						  $single_out_opt, $add_range)} );
   }
   return(\@final_out);
}




#______________________________________________________________
# Title     : get_sub_hash
# Usage     : %sub_hash=%{&get_sub_hash(\%FASTA, \@list)};
# Function  : fetches hash keys and values by giving keys to
#             a hash
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : subhash, sub_hash, get_hash_elements, fetch_sub_hash
#             take_sub_hash, get_hash_by_keys, get_sub_hash_by_keys
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_sub_hash{
  	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (%out_hash, @out_hash_all_ref);
	for($i=0; $i < @hash; $i++){
	   my %hash=%{$hash[$i]};
	   my @keys = keys %hash;
	   for($j=0; $j < @raw_string; $j++){
		  for($l=0; $l < @keys; $l++){
			if($keys[$l] eq $raw_string[$j]){
				$out_hash{$keys[$l]}=$hash{$keys[$l]};
			}
		  }
	   }
	   push(@out_hash_all_ref, \%out_hash);
	}
	if(@out_hash_all_ref > 1){
	   return(@out_hash_all_ref);
	}else{
	   return($out_hash_all_ref[0]);
	}
}



#______________________________________________________________
# Title     : get_smallest_file
# Usage     : $smallest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the smallest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_smallest_file, smallest_file, find_smallest_file
#             get_the_smallest_file, choose_the_smallest_file,
#             fetch_smallest_file, take_smallest_file, get_smaller_file,
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the smallest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Version   : 1.3
#--------------------------------------------------------------
sub get_smallest_file{
   my @in;
   if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
   }else{
	 @in = @_;
   }
  my $smallest=10000000000;
  my ($smallest_file, $i, $extract_opt);

  for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
  }
  for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size < $smallest){
	   $smallest=$size;
	   if($extract_opt ==1){
		  print "\$extract_opt is $extract_opt \n";
		  push(@in, $smallest_file) if defined($smallest_file);
		  $smallest_file = splice(@in, $i, 1);
		  print "\n $smallest_file \n";
		  $i--;
	   }else{
		  $smallest_file=$in[$i];
	   }
	}
  }
  if($extract_opt==1){
	 return(\$smallest_file, \@in);
  }else{ return(\$smallest_file); }
}

#______________________________________________________________
# Title     : get_largest_file
# Usage     : $largest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the largest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_largest_file, largest_file, find_largest_file
#             get_the_largest_file, choose_the_largest_file,
#             fetch_largest_file, take_largest_file, get_bigger_file, get_larger_file
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the largest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Version   : 1.3
#--------------------------------------------------------------
sub get_largest_file{
  my @in;
   if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
   }else{
	 @in = @_;
   }

  my ($largest_file, $largest, $i, $extract_opt);
  for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
  }
  for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size > $largest){
	   $largest=$size;
	   if($extract_opt ==1){
		  print "\$extract_opt is $extract_opt \n";
		  push(@in, $largest_file) if defined($largest_file);
		  $largest_file = splice(@in, $i, 1);
		  print "\n $largest_file \n";
		  $i--;
	   }else{
		  $largest_file=$in[$i];
	   }
	}
  }
  if($extract_opt==1){
	 return(\$largest_file, \@in);
  }else{ return(\$largest_file); }
}



#______________________________________________________________
# Title     : get_sequence_complexity
# Usage     : print "\n", ${&get_sequence_complexity(\$seq)};
# Function  : caculates the single sequence's sequence complexity
#             If the seq given is larger than 20, it divides it into
#             frags of 20 aa and gets the average of it.
# Example   :  ${&get_sequence_complexity(\$seq)};
#             while $seq='TTTTTACDEFGHIKLMNPQRSTVWYAAAAACCCADFADFA'
# Warning   :
# Keywords  : sequence_complexity, calc_sequence_complexity,
#             calc_seq_complexity, get_seq_complexity,
# Options   : _  for debugging.
#             #  for debugging.
#             'w=' for window size as the first arg
# Returns   : Ref. for a scalar digit.
# Argument  : ref. of string.
# Version   : 1.3
#--------------------------------------------------------------
sub get_sequence_complexity{
   my ($complexity, @seq,$i, $j, @frag);
   my $win=20;
   if(ref($_[0]) eq 'ARRAY'){
	  @seq=@{$_[0]};
   }else{
	  $seq=${$_[0]} || $_[0];
	  @seq=split(//, $seq);
   }
   if(defined($_[1])){  $win=${$_[1]} || $_[1]; }

   if(@seq <= $win){
	 my (%seq, @keys);
	 for($i=0; $i< @seq; $i++){
		$seq{$seq[$i]}++;
	 }
	 @keys= keys %seq;
	 $complexity=@keys/@seq;
   }else{
	 my @frag=@{&divide_array(\@seq, "s=$win")};
	 my @complexity=();
	 for($i=0; $i < @frag; $i++){
		my (%seq, @keys);
		my @arr=@{$frag[$i]};
		for($j=0; $j< @arr; $j++){
		   $seq{$arr[$j]}++;
		}
		@keys=keys %seq;
		push(@complexity, @keys/$win);
	 }
	 $complexity=${&array_average(\@complexity)};
   }
   return(\$complexity);
}



#______________________________________________________________
# Title     : make_swiss_index
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub make_swiss_index{
   my ($swiss, %index);
   if(-e "$ENV{'SWDIR'}seq.dat" ){
	  $swiss="$ENV{'SWDIR'}seq.dat";
   }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  $swiss="$ENV{'SWISS'}seq.dat";
   }elsif( -e 'seq.dat'){
	  $swiss="seq.dat";
   }elsif( -f "$ENV{'swiss'}seq.dat"){
	  $swiss="$ENV{'swiss'}seq.dat";
   }else{
	 ASK: print "\n Where is your swissprot seq.dat file?\n";
	  $swiss=<>;
	  chomp($swiss);
	  if(-e "$swiss"){
		 goto OPEN;
	  }else{
		 goto ASK;
	  }
   }
   OPEN: open(DB, "$swiss");
   while(<DB>){

	 if(/^ID[\t ]+(\w+) +/){
		$index{$1}=tell(DB);
		print "\n$1 $index{$1}";
	 }
   }
}

#_____________________________________________________________________________
# Title     : fetch_sequence_from_db
# Usage     : %sequence=%{&fetch_sequence_from_db($input_file, \@string)};
# Function  : accept seq names (with or without ranges like _10-111 )
#              and produces hash ref.
#             As an option, you can write(xxxx.fa) the sequences in pwd
#              with the file names with sequence names.
#             The default database used is FASTA format OWL database.
#              You can change this by S (for Swissprot either fasta
#              or full format), P for PDB fasta format data.
#             If you give the path name of DB, it will look for the
#              DB given.
#
#             This automatically checks sequence family number as
#               in >d1bpi___7.6.1
#               and attaches the number in final %sequence output
#
# Example   : %seq=%{&fetch_sequence_from_db(\@input, seq.fa, seq.fa.idx)};
#              while @input=qw( 11S3_HELAN_11-31 A1AB_CANFA A1AT_PIG )
# Keywords  : fetch_seq_from_db, fetch_sequence_from_database
# Options   : _  or #  for debugging.
#     w       for write fasta file
#     d=p100  for PDB100 fasta database from ENV
#     d=p40   for PDB40  fasta database from ENV
#     d=p     for PDB database (usually p100) from ENV
#     d=s     for Swissprot database from ENV
#     d=o     for OWL database from ENV
#     i=      for index filename. If not specified, this looks for it in the same dir as fast     
#          @
# Returns   : ref of hash
# Argument  : gets names of sequences
#             eg) \@array, \%hash, \$seq, while @array=(seq1, seq2), $seq='seq1 seq1'
#                                               %hash=(seq1, xxxx, seq2, yyyy);
# Version   : 2.9
#------------------------------------------------------------------------------
sub fetch_sequence_from_db{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my(@DATABASE, @INDEX_FILE, %sequence, %seq_with_index, @input_seq_names,
	   %long_index, @Keys, $R_start, $NAME, $R_leng, $found_seq_count,
	   $seq_found1, $sequence, @keys, $index_file);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# getting input seq names from all sources
	#________________________________________________________
	for(0..@hash){
	    push(@input_seq_names, keys %{$hash[$_]} );
	}
	for(0..@raw_string){
		push(@input_seq_names, split(/ +/, $raw_string[$_]) );
	}
	print "\n# (1) fetch_sequence_from_db: \@raw_string has: ", scalar(@raw_string), " elements";
	print "\n# (2) fetch_sequence_from_db: No. of seq to fetch is:",scalar(@input_seq_names);
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Choose the DBs and INDEX for fetching sequences. All files input must be DATABAE or INDEXfile
	#___________________________________________
	if(@file > 0){
		for($i=0; $i< @file; $i++){
		   if(-T $file[$i] and $file[$i]=~/\.fa[sta]?$/){      push(@DATABASE, $file[$i]);   next}
		   elsif((-T $file[$i]) and ($file[$i]=~/\.seq$/)){    push(@DATABASE, $file[$i]);   next}
		   elsif((-T $file[$i]) and ($file[$i]=~/\.dat$/)){    push(@DATABASE, $file[$i]);   next}
		   elsif(-T $file[$i] and $file[$i]=~/\.idx$/){        push(@INDEX_FILE, $file[$i]); next }
		   if($file[$i] !~/\.idx/ and -T "$file[$i]\.idx"){    push(@INDEX_FILE, "$file[$i]\.idx"); }
		   else{
			  print "\n#  WARN:  fetch_sequence_from_db:
			  You put a file-name-like which is not a fasta DB. Error. I am removing $file[$i]";
			  splice(@file, $i, 1);
			  $i--;
		   }
		}
	}

	if($vars{'d'}=~/^p[100]*$/){
	   if( -T  $ENV{'PDB_FASTA'} ){             push(@DATABASE,   $ENV{'PDB_FASTA'} );     }
	   elsif(  -T $ENV{'PDB_SEQ_FASTA'} ){      push(@DATABASE,   $ENV{'PDB_SEQ_FASTA'}  ); }
	   elsif(  -T $ENV{'PDB100_FASTA'} ){       push(@DATABASE,   $ENV{'PDB100_FASTA'} ); }
	   if(  -T $ENV{'PDB_FASTA_INDEX'} ){       push(@INDEX_FILE, $ENV{'PDB_FASTA_INDEX'} ); }
	}elsif( $vars{'d'}=~/^p\d+d$/ ){
	   if(  -T $ENV{'PDB100D_FASTA'} ){         push(@DATABASE,   $ENV{'PDB100D_FASTA'});     }
	   elsif(  -T $ENV{'PDBD100_FASTA'}  ){     push(@DATABASE,   $ENV{'PDBD100_FASTA'}); }
	   elsif(  -T $ENV{'PDB100D_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDB100D_SEQ_FASTA'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDBD100_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB100D_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB100D_SEQ_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDBD100_SEQ_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p40/ ){
	   if(  -T $ENV{'PDB40_FASTA'} ){          push(@DATABASE,   $ENV{'PDB40_FASTA'});     }
	   elsif(  -T $ENV{'PDB40_SEQ_FASTA'}  ){  push(@DATABASE,   $ENV{'PDB40_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB40_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB40_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p90/ ){
	   if(  -T $ENV{'PDB90_FASTA'}  ){         push(@DATABASE,   $ENV{'PDB90_FASTA'}    ); }
	   elsif(  -T $ENV{'PDB90_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'PDB90_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB90_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB90_FASTA_INDEX'}); }
	}
	if( $vars{'d'}=~/s/){
	   if(  -T $ENV{'SWISS_FASTA'} ){          push(@DATABASE,   $ENV{'SWISS_FASTA'});     }
	   elsif(  -T $ENV{'SWISS_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'SWISS_SEQ_FASTA'}); }
	   elsif(  -T $ENV{"SWISS_DIR\/seq.fa"} ){ push(@DATABASE,   $ENV{"SWISS_DIR\/seq.fa"}); }
	   if(  -T $ENV{'SWISS_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'SWISS_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'SWINDEX'} ){           push(@INDEX_FILE, $ENV{'SWINDEX'}); }
	}
	if( $vars{'d'}=~/^o/){
		if(  -T $ENV{'OWL_FASTA'} ){            push(@DATABASE,   $ENV{'OWL_FASTA'});     }
		elsif(  -T $ENV{'OWL_SEQ_FASTA'} ){     push(@DATABASE,   $ENV{'OWL_SEQ_FASTA'}); }
		elsif(  -T $ENV{"OWL_DIR\/owl.fa"} ){   push(@DATABASE,   $ENV{"OWL_DIR\/owl.fa"}); }
		if(  -T $ENV{'OWL_FASTA_INDEX'} ){      push(@INDEX_FILE, $ENV{'OWL_FASTA_INDEX'}); }
		print "\n# Fetching sequences from OWL\n";
	}
	if( $vars{'d'}=~/^\S+\.\S+$/ and -T $vars{'d'} ){ push(@DATABASE, $vars{'d'} );     }
	if( $vars{'i'}=~/\S+\.\S+$/ and -T $vars{'i'} ){ push(@INDEX_FILE, $vars{'i'} );   }
	if(@INDEX_FILE > 0 and @DATABASE > 0){
		if( ${&if_file_older_than_x_days("$DATABASE[0]\.idx", 5)} > 0 ){
			$index_file=${&make_seq_index_file(\@DATABASE)};
	        push(@INDEX_FILE, $index_file);
		}elsif(-s "$DATABASE[0]\.idx"){
			push(@INDEX_FILE, "$DATABASE[0]\.idx");
		}else{
			print "\n# fetch_sequence_from_db: Some weird error in pushing \$index_file to \@INDEX_FILE\n"; exit;
		}
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Final check for ALL the inputs
	#___________________________________________________
	if( @DATABASE  < 1){ print "\n# fetch_sequence_from_db: DATABASE file no found. Error\n"; exit     }
	if( @INDEX_FILE < 1){
		print "\n# fetch_sequence_from_db: \@INDEX_FILE has less than 1 elem. Error\n";
		push(@INDEX_FILE, ${&make_seq_index_file(@DATABASE)});
		print "     fetch_sequence_from_db called make_seq_index_file to make @INDEX_FILE\n";
	}
 	if($debug==1){
		print "\n# DATABASE used     : @DATABASE";
		print "\n# INDEX_FILE used   : @INDEX_FILE";
		print "\n# input_seq_names   : @input_seq_names";
	}

	#--------------------------------------------------------------------------


  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ##  Now I have @DATABASE, @INDEX_FILE, @input_seq_names
  ##_______________________________________________________________

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	#  Reading in index file to get 'seq' 'seek pos' to make %seq_with_index
	#__________________________________________________________________________
	print "\n#  fetch_sequence_from_db: \@INDEX_FILE @INDEX_FILE, \@DATABASE :@DATABASE\n";
	for($i=0; $i< @INDEX_FILE; $i++){
	   open(INDEX, "$INDEX_FILE[$i]");
	   while(<INDEX>){
		  if(/(\S+) +(\S+)/){
			  $long_index{$1}=$2;
		  }
	   }
	   for($j =0; $j < @input_seq_names; $j++){

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
			#  If DATABASE has sequence names with ranges already index the seq with ranges
			#____________________________________________________________________________________
			if($input_seq_names[$j]=~/(\S+\_\d+\-\d+)/ and $long_index{$1}){

			    $seq_with_index{$1}=$long_index{$1};

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
			#  If DATABASE has sequence names without ranges index the seq without ranges
			#____________________________________________________________________________________
			}elsif($input_seq_names[$j]=~/(\S+)\_\d+\-\d+/ and $long_index{$1}){

				$seq_with_index{$input_seq_names[$j]}=$long_index{$1}; # !!!! <--- This line is critical

			}elsif($input_seq_names[$j]=~/(\S+)\_\d+\-\d+/ and $long_index{"$1\_"}){ # to handle Tim's new pdb100.fa files

			    $seq_with_index{$input_seq_names[$j]}=$long_index{"$1\_"};
			    print "\n# Warning: $1 (from $input_seq_names[$j]) matched with $1\_ in $INDEX_FILE[$i],
					  I hope this is correct!!\n";
			}
			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
			#  If input_seq_name has SCOP superfamily numbers
			#____________________________________________________________________________________
			elsif($input_seq_names[$j]=~/^(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/ and $long_index{$1}){

				$seq_with_index{"$1\_$2"}=$long_index{$1}; # !!!! <--- This line is critical

			}elsif($input_seq_names[$j]=~/\S/ and $long_index{$input_seq_names[$j]}){
				$seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]}
			}else{
				print "\n#  $input_seq_names[$j](with, without range) have NO corresponding index in $INDEX_FILE[$i], ERR";
			}
	   }
	   close INDEX;
	   if ( scalar(keys %seq_with_index) < 1){
		    print "\n# fetch_sequence_from_db: \%seq_with_index is too small, ERROR?\n";
	   }
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	#  Fetching sequences from DATABASE
	#_______________________________________________________________
	print "\n# fetch_sequence_from_db: Fetching seqs from @DATABASE with  @INDEX_FILE ";
	my @Keys= keys %seq_with_index;        ## <<< NOTE it is @Keys, not @keys
	print "\n# (3) fetch_sequence_from_db: No. of seq indexed is:", scalar(@Keys);

	for($f=0; $f< @DATABASE; $f++){
	   open(FASTA, $DATABASE[$f]);
	   F0: for($e=0; $e< @Keys; $e++){
		  my ($seq_found1, $super_fam_class, $NAME, $R_leng, $R_start, $sequence);
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When seq name has range attachment, it handles
		  #________________________________________________
		  if($Keys[$e]=~/(\S+)_(\d+)\-(\d+)$/){
			  $NAME=$1;
			  $R_start=$2-1;      ## to fit in substr function
			  $R_leng =$3-$2+1; ## to fit in substr
			  print "\n# (4) fetch_sequence_from_db: Sequences have ranges only (not superfamily numb.) \n";
		  }
		  elsif($Keys[$e]=~/(\S+)_(\d+)\-(\d+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
			  $NAME=$1;
			  $R_start=$2-1;      ## to fit in substr function
			  $R_leng =$3-$2+1; ## to fit in substr
			  $super_fam_class=$4;
			  print "\n# (4) fetch_sequence_from_db: Sequences have ranges and superfamily numb.\n";
		  }
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When superfamily (scop) number is attached
		  #___________________________________________________
		  elsif($Keys[$e]=~/(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
			  $NAME=$1;
			  $super_fam_class=$2;
			  print "\n# (4) fetch_sequence_from_db: Sequences have SCOP superfamily numbers only \n";
		  }elsif($Keys[$e]=~/^ *(\S+)[\,]*$/){
			  print "\n# (4) fetch_sequence_from_db: Sequences DON't have ranges or SCOP superfam numb.\n";
			  $NAME=$1;
		  }

		  if($seq_with_index{$NAME}=~/(\d+)/        # It is importnt having $seq_with_index{$Keys[$e]}
			   or $seq_with_index{$Keys[$e]}=~/(\d+)/
			   or $seq_with_index{"$NAME\,"}=~/(\d+)/    # this is for overcoming '>xxxx,'  entry(the comma)
			   or $seq_with_index{"$NAME\_"}=~/(\d+)/    # to handle Tim's  >c1eru_ 3.30.1.1.4
			   ){
			   my $finding_position= $1-300;
			   if( $finding_position >= 0 ){   seek(FASTA, $1-300, 0);  # -300 is necessary
			   }elsif($finding_position < 0){  seek(FASTA, 0, 0); }      ## This is essential !!!
			   while(<FASTA>){
				  if(!$seq_found1){
					  if(/\> *$NAME[\,_]? +\d+\./){
						  $seq_found1=1;
					  }
				  }else{
					  if(/ *(\w+) *$/ ){ $sequence .=$1;  ## you should use $1 to avoid including NEW line
						  unless(eof FASTA){ next   ## This is critically important to prevent error.
						  }else{ goto PUT_SEQ }     ## If the last seq has only one single line seq string, it could be a problem
					  }elsif( (/ *\> *\S+/)  or (eof FASTA) ){
						  #======= When range is defined ==================
						  PUT_SEQ:
						  if($R_start =~/\d+/){
							  $sequence{$Keys[$e]}=substr($sequence, $R_start, $R_leng); next; #
						  }
						  #======= To handle superfamily information given ==========
						  if($super_fam_class){
							  $sequence{$Keys[$e]}=$sequence;
							  $acquired_seq_count++;
						  }
						  #======= When range is NOT defined ==================
						  else{
							  $sequence{$Keys[$e]}=$sequence;
						  }
						  ($R_start, $sequence, $seq_found1)='';  ## reset $R_start, $seq_found1,,
						  next F0;
					  }
				  }
			   }

		  }else{
			   print "\n# Error, the sequence pos for $NAME (from $Keys[$e]) in DB doesnt exist in xxxx.idx file?\n";
		  }
	   }
	   close FASTA;
	}
	#print "\n# (6) fetch_sequence_from_db: counted fetched seqs: $found_seq_count, $acquired_seq_count";
	#print "\n# (7) fetch_sequence_from_db: Fetching seq has finished \n";

	return(\%sequence);
}


#______________________________________________________________
# Title     : fetch_seq
# Usage     : &fetch_seq(@ARGV);
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
#             -a is for ALL matched seq. (same as using glob=> *YEAST)
#             -c is for Creating seq.idx file
#             -h is for HELP!
#             -g is for GDF file format output
#             -l is for list of match entries(in 1 column)
#             -s is for species option (input name mst be species (YEAST, RAT, HUMAN..)
#             n= is for Number of seq you want to get from swissprot
#             s= is for Size limit. Min seq size in swiss, s=10  -> minimum 11 aa seq.
#             S= is for Size limit. Max seq size in swiss, s=1000 -> get less than 1000
#
# Argument  : swissprot seqname
# Version   : 1.6
#--------------------------------------------------------------
sub fetch_seq{
   my @in=@_;
   my $FASTA_index, $FASTA, $where_index, %index, $question, $i,
	  $s,$t,$fasta,$index_file, $all, $species,$target, $matched, $seq, $gdf, $list, $count, $create;
   my $SEQ_size_max=100000000;

   if(@_ < 1){	  &HELP_fetch_seq;
   }else{
	 F: for($t=0; $t<@in; $t++){ #'''''''''''' PROMPT ARGV processing ''''''''''''''''''
		if($in[$t]=~/^\-c$/i){
		   $create=1; splice(@in, $t, 1); $t--;
		   print "\n You should provide database\(e.g, seq.dat\) file with this opt, I guess you did\n";
		   print "\n If you wanted to make an index with any fasta db, you also have to\n";
		   print "  give the file name. e.g:\n     $0 -c /DB/swiss/seq.dat\n";
		   print "  or $0 -c my_db.fa\n\n";
		   next; }
		if($in[$t]=~/^\-af$/){ $fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-afs$/){ $species=$fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-ag$/){ $gdf=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-g$/){    $gdf=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-f$/i){   $fasta=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-a$/i){   $all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-l$/i){   $list=$all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-s$/i){  $species=$all=1; splice(@in, $t, 1); $t--; next; }
		if( ($in[$t]=~/seq\.dat/)&&(-f $in[$t])){ ## if the path for swiss prot is given
		   $DB=$in[$t];  splice(@in, $t, 1); $t--; next;        }
		if( ($in[$t]=~/seq\.idx/)&&(-e $in[$t])){ ## if the path for swiss index is given
		   $index_file=$in[$t];	splice(@in, $t, 1); $t--; next;	}

		#''''''' SWiss prompt input file check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^ID[\t ]+\w+/){$DB=$in[$t]; splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' FASTA prmpt input file check '''''''''''''''''''
		if( (-f $in[$t]) && !(defined($FASTA))){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^\> {0,4}\S+/){$FASTA=$in[$t]; $fasta=1;
			 if(-s "$FASTA\.idx"){ $FASTA_index="$FASTA\.idx"; }
		     splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' INDEX file automatic check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP2, "$in[$t]");
		   my $first_pos, $Count, @splited;
		   while(<TEMP2>){
			 $Count++;
			 if( $Count>3 ){
				if(/^ {0,2}\S+ +(\d+)/){
				   if(defined($first_pos) && ($1-$first_pos ) > 1000 ){
					  $index_file=$in[$t];
					  splice(@in, $t, 1);$t--;next F;
				   }elsif( defined($first_pos) && ($1-$first_pos)<1000 ){
					  $FASTA_index=$in[$t]; $fasta=1;
					  if($FASTA_index=~/^(\S+)\.\w+$/){
					     if(-s $1){ $FASTA= $1; }
					  }
					  splice(@in, $t, 1);$t--;next F;
				   }
				   $first_pos=$1;
				}
			 }
		   }
		   close TEMP2;
		}
		if($in[$t]=~/^\-h$/i){ &HELP_fetch_seq; exit;}
		if($in[$t]=~/^n=(\d+)$/i){ $SEQ_num_to_fetch=$1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^s=(\d+)$/){ $SEQ_size_min=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^S=(\d+)$/){ $SEQ_size_max=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
	 }

	 if(($create==1)&&(defined($DB)) ){ goto CREATE; }
	 elsif(($create==1) && (defined($FASTA)) ){ goto CREATE; }
	 elsif($create==1){
	    print "\n You must give db filename (e.g. seq.dat) with path to make an index";
	    print "\n  I can handle fasta db file to make an index\n";
	    exit;
	 }
   }

   if($SEQ_size_max < $SEQ_size_min){ print "\n Seq size Max is smaller than min\n"; exit; }

   ##""""""""""""""""""""""" DB file if not defined """"""""""""""""""""""""""""""""""""""""""""
   if (!defined($DB)){
	  if((!defined($FASTA))&&($fasta==1)&&(-T "$ENV{'FASTADB'}")){
		 $FASTA=$ENV{'FASTADB'};
	  }elsif(defined($FASTA) && ($fasta==1) &&($create !=1) ){
		 goto SW_INDEX;
	  }elsif(!defined($FASTA) && (defined($FASTA_index))&& !(-e "$ENV{'FASTADB'}") ){
		 print "\n NO fasta db is defined\n";
		 goto ASK;
	  }elsif(-e "$ENV{'SWDIR'}seq.dat" ){
		 $DB="$ENV{'SWDIR'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}seq.dat" ){
		 $DB="$ENV{'FETCHSWISS'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}" ){
		 $DB="$ENV{'FETCHSWISS'}";
	  }elsif(-e "$ENV{'SWDIR'}\/seq.dat" ){
		 $DB="$ENV{'SWDIR'}\/seq.dat";
	  }elsif( -f "$ENV{'SWISS'}seq.dat" ){
		 $DB="$ENV{'SWISS'}seq.dat";
	  }elsif( -f "$ENV{'SWISS'}\/seq.dat" ){
		 $DB="$ENV{'SWISS'}\/seq.dat";
	  }elsif( -e 'seq.dat'){
		 $DB="seq.dat";
	  }elsif( -f "$ENV{'swiss'}seq.dat"){
		 $DB="$ENV{'swiss'}seq.dat";
	  }elsif(-f "ENV{'HOME'}seq.dat"){
		 $DB="ENV{'HOME'}seq.dat";
	  }elsif(-f "ENV{'SWDIR'}\/seq.dat"){
		 $DB="ENV{'SWDIR'}\/seq.dat";
	  }else{
		ASK: print "\n Where is your swissprot seq.dat(or fasta db) file?\n";
			 print "  I recommand you to set the path for them in ENV vars\n";
			 print "  e.g. export SWDIR=/DB/Swiss/  to where you put seq.dat\n";
			 print "  e.g. export FASTADB=/DB/Swiss/my_swiss.fa  for fasta database\n";
		 $swiss=<STDIN>;
		 chomp($swiss);
		 if( -f $swiss){
			open(TEMP, "$swiss");
			while(<TEMP>){
			   if(/^ID[\t ]+\w+/){ $DB=$swiss; goto SW_INDEX; }
			   elsif(/^\> {0,3}\S+/){ $FASTA=$swiss; goto SW_INDEX;}
			}
			close TEMP;
		 }else{
			goto ASK;
		 }
	  }
   }
   ##""""""""""""""""""""""""""""" INDEX file """"""""""""""""""""""""""""""""""""""""
   if( !defined($index_file)){
	  SW_INDEX:
	  if((!defined($FASTA_index))&&($fasta==1)&&(-T "$ENV{'FASTAINDEX'}")){
		 $FASTA_index=$ENV{'FASTAINDEX'};
	  }elsif(!defined($FASTA_index)&&(-T $FASTA)){
		 goto W;
	  }elsif(defined($FASTA_index)&&(-T $FASTA)){
		 goto MAIN_SEARCH;
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}seq.idx";
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}\/seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}\/seq.idx";
	  }elsif(-e "$ENV{'SWDIR'}seq.idx" ){
		 $index_file="$ENV{'SWDIR'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}seq.idx" ){
		 $index_file="$ENV{'SWISS'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}\/seq.idx" ){
		 $index_file="$ENV{'SWISS'}\/seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}" ){
		 $index_file="$ENV{'SWINDEX'}";
	  }elsif( -e 'seq.idx'){
		 $index_file="seq.idx";
	  }elsif( -f "$ENV{'swiss'}seq.idx"){
		 $index_file= "$ENV{'swiss'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file= "$ENV{'SWINDEX'}seq.idx";
	  }elsif( -f "$ENV{'HOME'}seq.idx"){
		 $index_file= "$ENV{'HOME'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file="$ENV{'SWINDEX'}\/seq.idx";
	  }elsif( -f "$ENV{'swindex'}seq.idx"){
		 $index_file="$ENV{'swindex'}seq.idx";
	  }elsif(defined($DB)|| defined($FASTA) ){
		 print "\n Your swissprot is in $DB, but no seq.idx file for it.\n";
		 W: print "\n Where is seq.idx(or fasta idx file eg. $FASTA\.idx), type path and filename?\n";
		    print "  I recommand you to set the path for them in ENV vars later\n";
			print "  e.g. export SWINDEX=/DB/Swiss/  to where you put seq.dat index\n";
			print "  e.g. export FASTAINDEX=/DB/Swiss/my.fa.idx  for fasta db index\n";
			print "  Asking where 3 times. After, will ask creation of seq.idx or $FASTA.idx\n";
		 $question++;
		 $where_index=<STDIN>;
		 chomp($where_index);
		 if(-f $where_index){
			open(TMP, "$where_index");
		    while(<TMP>){
				if($_=~/^ {0,2}\S+ +\d+/){
				   $index_file=$where_index;
				   print "\n Your index file seems to be right \($index_file\) \n";
				   goto MAIN_SEARCH;
				}elsif($count > 4){ # read at least 4 lines and see if they are index
				   print "\n $where_index doesn't seem to be index file\n";
				   print "\n Terminate(t) or go on (g) trying\n";
				   $try=getc;
				   if($try=~/t/i){  exit; }
				   else{ goto W; }
				}else{
				   $count++;
				}
			}
			close TMP;
		 }else{
			if($question > 2){
			   print "\n I can create the index in pwd for you run $0 and \n";
			   print "\n you can copy seq.idx(or $FASTA\.idx) into your swissprot dir later\n";
			   goto CREATE;
			}
			goto W;
		 }

		 #""""""""""""""" CREATION of INDEX file """""""""""""""""""""""""""""""""""""""""""""
		 CREATE:
		 if(defined($DB)){ print "\n Can I create seq.idx in pwd? (y+return or return)\n" }
		 if(defined($FASTA)){ print "\n Can I create $FASTA\.idx in pwd? (y+return or return)\n" }
		 $yes_no=getc;
		 if($yes_no=~/y/i){
			if(defined($DB)){
			   print "\n seq.idx being created...\(1 min in my Linux\)\n";
			   open(DB, "$DB");
			   open(IDX, ">seq.idx");
			   print IDX "# swiss_index\n";
			   while(<DB>){
				 if(/^ID[\t ]+(\w+) +/){
					$index{$1}=tell(DB);
					print IDX "\n$1 $index{$1}";
				 }
			   }
			   close(DB, IDX);
			   if(-s "seq.idx"){
				   print "\nGood. seq\.idx is created.";
				   print "\n Copy seq.idx to SWISSPROT dir or you can set\n";
				   print "absolute path ENV var \'SWINDEX\' to your seq.idx path\n";
				   print "e.g. #bash\> export SWINDEX=\/DB\/Swiss\/seq.idx\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat seems to have gone wrong";
			   }

			}elsif(defined($FASTA)){
			   $F_idx="$FASTA\.idx";
			   print "\n $F_idx being created...\n";
			   open(FASTADB, "$FASTA");
			   open(FASTAIDX, ">$F_idx");
			   print FASTAIDX "# fasta_index\n";
			   while(<FASTADB>){
				 if(/^\> {0,4}(\S+) */){
					$index{$1}=tell(FASTADB);
					print FASTAIDX "\n$1 $index{$1}";
				 }
			   }
			   close(FASTADB, FASTAIDX);
			   if(-s $F_idx){
				   print "\nGood! Copy $F_idx to your DB dir and set two ENV vars\n";
				   print "absolute path ENV var \'FASTADB\' to your fastadb path\n";
				   print "absolute path ENV var \'FASTAINDEX\' to your $F_idx path\n";
				   print "e.g. #bash\> export FASTADB   =\/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #bash\> export FASTAINDEX=\/DB\/mySwiss\/$F_idx\n";
				   print "e.g. #tcsh\> setenv FASTADB    \/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #tcsh\> setenv FASTAINDEX \/DB\/mySwiss\/$F_idx\n";
				   print "Unless, you can specify the database each time at prompt\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat or $F_idx seems to have gone wrong";
			   }
			}
		 }else{
			exit;
		 }
	  }
   }

   #""""""""""""""""""""""""""" MAIN SERACH """""""""""""""""""""""""""""""""""""""""""""""
   MAIN_SEARCH:
   for($i=0; $i<@in; $i++){
	  my (@possible, @pos, %possible); my $target=$in[$i];
	  if($target=~/\*/){
		 $target=~s/\*/\\\w\{0,6\}/; # to handle glob input
		 $all=1;
	  }
	  if(defined($index_file)){
		 open(INDEX, "$index_file");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto SWISS;
	  }elsif(($fasta==1) && (defined($FASTA_index)) ){
		 open(INDEX, "$FASTA_index");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto FASTA;
	  }

	  SWISS:
	  @poss = sort keys %possible;

	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 exit;
	  }elsif($all !=1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){
			print "\n# You defined the number of sequence to fetch: $SEQ_num_to_fetch\n";
			$num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }

		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};   # %possible has the name and index num
			 seek(DB, ($matched-52), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }
			   elsif(/^ {0,2}\/\// and  $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(/^ {0,2}\/\//  and  $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq=''; next A;
				  }else{  $seq=''; $num_sequence++;  next A; }
			   }elsif( $fasta==1 and /^[\t ]+\w+/){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         print "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }elsif($all==1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){ $num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }
		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(DB, ($matched-51), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }elsif(/^ {0,2}\/\// and $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq='';  next A;
				  }else{  $seq=''; $num_sequence++; next A; }
			   }elsif(/^ {0,2}\/\// and $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(($fasta==1)&&(/^[\t ]+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }

	  FASTA:
	  @poss = sort keys %possible;
	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 exit;
	  }elsif($all !=1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B:for($p=0; $p < @poss; $p++){
		 if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
			if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B;
	 			}elsif(/^> {0,4}($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }elsif($all==1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B2:for($p=0; $p < @poss; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
				if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B2;
				}elsif(/^> *($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }
   }
}



#______________________________________________________________
# Title     : fetch_swiss_seq
# Usage     :
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Warning   :
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence, fetch
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
# Returns   :
# Argument  : swissprot seqname
# Version   : 1.0
#--------------------------------------------------------------
sub fetch_swiss_seq{
   my @in=@_;
   my ($i, $index_file, $target, $matched, $seq);
   if(@_ < 1){
	 print "\n Usage: $0 [-f] <any swissprot name entry>\n";
	 print "   -f is for fasta output format only\n";
	 print "\n You have to set ENV var, SWDIR to seq.dat path\n";
	 print chr(7);
   }
   for($i=0; $i<@in; $i++){
	  if($in[$i]=~/\-f$/i){
		 $fasta=1;
		 splice(@in, $i, 1);
		 next;
	  }
   }

   if(-e "$ENV{'SWDIR'}seq.dat" ){
	  open(DB, "$ENV{'SWDIR'}seq.dat");
   }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  open(DB, "$ENV{'SWISS'}seq.dat");
   }elsif( -e 'seq.dat'){
	  open(DB, "seq.dat");
   }elsif( -f "$ENV{'swiss'}seq.dat"){
	  open(DB, "$ENV{'swiss'}seq.dat");
   }

   if(-e "$ENV{'SWDIR'}seq.idx" ){
	  $index_file="$ENV{'SWDIR'}seq.idx";
   }elsif( -f "$ENV{'SWISS'}seq.idx" ){
	  $index_file="$ENV{'SWISS'}seq.idx";
   }elsif( -e 'seq.idx'){
	  $index_file="seq.idx";
   }elsif( -f "$ENV{'swiss'}seq.idx"){
	  $index_file= "$ENV{'swiss'}seq.idx";
   }
   for($i=0; $i<@in; $i++){
	  my @possible;
	  my $target=$in[$i];
	  $target=~s/\*/\\\w\{0,4\}/; # to handle glob input
	  open(INDEX, "$index_file");
	  while(<INDEX>){
		if( /(\w*$target\w*)/ ){
		   push(@possible, $1);
		}
	  }
	  close INDEX;
	  open(INDEX,  "$index_file");
	  if(@possible >1){
		 print "\n@possible", "\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i], \n be more specific!\n\n";
	  }else{
		 print "\n";
		 A:while(<INDEX>){
		   if(/(\w*$target\w*)[\t ]+(\d+)/){
			 $matched=$1;
			 seek(DB, ($2-51), 0);
			 while(<DB>){
			   if((/^\/\/$/)&&($fasta==1)){
				  $seq=~s/ //g;
				  print "\>$matched\n$seq\n";
				  $seq='';
				  next A;
			   }elsif((/^\/\/$/) && ($fasta !=1)){
				  print "\n";
				  next A;
			   }elsif(($fasta==1)&&(/^[\t ]+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 print "========= Search for $ARGV[$i] was a success\n" if @in > 1;
	  }
   }
}


#______________________________________________________________
# Title     : get_sequence_number
# Usage     :
# Function  : reads database and tells how many sequences are there
#             fasta format db is only accepted for now.
# Example   :
# Warning   :
# Keywords  : count_number_of_sequence, get_number_of_sequence
#             get_sequence_number_in_fasta
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------
sub get_sequence_number{
	my @file=@{$_[0]};
	my %out;
	for($i=0; $i< @file; $i++){
	   my $seq_number_in_db;
	   open(DB, "$file[$i]");
	   while(<DB>){
		  if(/^\> {0,5}\w+/){
			 $seq_number_in_db++;
		  }
	   }
	   close DB;
	   $out{$file[$i]}=$seq_number_in_db;
	}
	return(\%out);
}



#______________________________________________________________
# Title     : write_msp_files
# Usage     : &write_msp_files(\%in1, \%in2, ['s'], [$filename],,)
# Function  : Writes input which is already in msp file format to
#              files either the name is given or generated
#              If more than one ref of hash is given, this will
#              concatenate all the hashes to one big one to
#              make one file.
#             When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Example   :  &write_msp_files(@sso, 's', $out_file);
# Warning   : When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Keywords  : write_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             s  for each single file output for each hash input
#      filename  for putting output to the specified filename, should be xxx.msp
#
# Returns   : if 's' option is set, it will make say,
#               HI001.msp HI002.msp HI003.msp  rather than
#
#               HI001HI002HI003.msp
#  eg of one output(single file case)
#
#   1027     0.0     1     154   HI0004     1     154   HI0004
#   40       0.0     84    132   HI0004     63    108   HI0001
#   31       0.0     79    84    HI0004     98    103   HI0003
#
# Version   : 2.3
#--------------------------------------------------------------
sub write_msp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my ($out_msp_file, $add_range, @final_out, $msp_file_out,
	     @keys, $N, $temp_1, %hash, $query_seq_name, $single_out_opt);

	if($char_opt=~/r/){ $add_range      ='r' };
	if($char_opt=~/s/){ $single_out_opt ='s' };
	if(@file == 1){ $out_msp_file=$file[0]; $single_out_opt='' } # s is for single file output

	if($single_out_opt eq 's'){ #~~~~~~~~~~~` single files output option WITHOUT a given outfilename
		$msp_file_out='default_single_out.msp';
		for($i=0; $i< @hash; $i++){
			my %hash=%{$hash[$i]};
			my @keys =sort keys %hash;
			for($j=0; $j< @keys; $j++){
				#------------------ Writing the first line ---------------------------
				if($keys[$j]=~/(\S+)_\d+\-\d+/){ $N = $1 }else{ $N = $keys[$j] }
				if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
				   open(MSP, ">$msp_file_out") ||
					   die "# write_msp_files: I can not create $msp_file_out, check permission\n";
				   chomp( $hash{$keys[$j]} );
				   print MSP $hash{$keys[$j]}, "\n";
				   splice(@keys, $j, 1);
				   $j--; last;
				}
			}
			for($j=0; $j< @keys; $j++){
				chomp( $hash{$keys[$j]} );
				print MSP $hash{$keys[$j]}, "\n";
			}
			print MSP "\n";
		}
		if(-s $msp_file_out){
			print "\n# write_msp_files: $msp_file_out is written \n";
		}else{
			print "\n# Error, write_msp_files\n"; exit
		}
		push(@final_out, $msp_file_out);
		close(MSP);
		return(\@final_out);
	}else{
	   #~~~~~~~~~~~~~ DEfault ~~~~~~~~~~~~~~~~~~
	   #  When output file name was given!
	   #________________________________________
	   if(@file==1){
		   my($temp_1);
		   open(MSP, ">$out_msp_file") ||
			  die "# write_msp_files: I can not create $out_msp_file, check permission\n";
	       for($i=0; $i< @hash; $i++){
			  my %hash=%{$hash[$i]};
			  my @keys =sort keys %hash;
			  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			  # for Final output
			  #_____________________________
			  push(@final_out, $out_msp_file);
			  for($j=0; $j< @keys; $j++){
				  #~~~~~~~ Writing the first line only ~~~~~~~~~~~~~~~~~~
				  if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }
				  if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
					  $temp_1=$keys[0]; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
				  }
			  }
			  for($j=0; $j< @keys; $j++){
				  chomp($hash{$keys[$j]});
				  print MSP $hash{$keys[$j]}, "\n";
			  }
			  print MSP "\n";
		   }
		   print MSP "\n";
		   close(MSP);
		   if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
			   print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
		   }else{
			   print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or \".msp\" is written\n";
		   }
	   }else{
	      for($i=0; $i< @hash; $i++){
			  my %hash=%{$hash[$i]};
			  my @keys =sort keys %hash;
			  ($query_seq_name)=$hash{$keys[0]}=~/\S+ +\d+ +\d+ +(\S+) +\d+ +\d+ +\S+/;
			  $msp_file_out="$query_seq_name\.msp";
			  open(MSP, ">$msp_file_out") or die "\n# write_msp_files: Failed to open $msp_file_out\n";

			  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			  # for Final output
			  #_____________________________
			  push(@final_out, $msp_file_out);
			  for($j=0; $j< @keys; $j++){
				 #~~~~~~~ Writing the first line only ~~~~~~~~~~~~~~~~~~
				 if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }
				 if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
					$keys[0]=$temp_1; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
				 }
			  }
			  for($j=0; $j< @keys; $j++){
			     chomp($hash{$keys[$j]});
				 print MSP $hash{$keys[$j]}, "\n";
			  }
			  print MSP "\n";
		   }
		   print MSP "\n";
		   if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
			   print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
		   }else{
			   print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or only \".msp\" is written\n";
		   }
		   close MSP;
	   }
   }
   if(@final_out > 1){
	   return(\@final_out);
   }else{
	   return(\$final_out[0]);
   }
}





#______________________________________________________________________
# Title    : write_msf
# Function : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Usage    : two argments:  $seq_hash_reference  and $output_file_name
# Usage    : takes a hash which has got names keys and sequences values.
# Usage    : uses Perl5 pointers(references).
# Example  : &write_msf(\%hash, \$out_file_name);
#    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#    PileUp
#
#
#
#       MSF: 1205  Type: P    Check:  9937   ..
#
#     Name: PYC1_YEAST      oo  Len: 1205  Check:  7954  Weight:  1.00
#     Name: PYC2_YEAST      oo  Len: 1205  Check:  5807  Weight:  1.00
#     Name: PYC_MOUSE       oo  Len: 1205  Check:  6176  Weight:  1.00
#
#    //
#
#
#
#    PYC1_YEAST      MSQ.RKFAGL RDNFNLLGEK N......... .......... .KILVANRGE
#    PYC2_YEAST      MSSSKKLAGL RDNFSLLGEK N......... .......... .KILVANRGE
#    PYC_MOUSE       ...MLKFQTV RGGLRLLGVR RSSSAPVASP NVRRLEYKPI KKVMVANRGE
#
#
#    PYC1_YEAST      IPIRIFRTAH ELSMQTVAIY SHEDRLSTHK QKADEAYVIG EVGQYTPVGA
#    PYC2_YEAST      IPIRIFRSAH ELSMRTIAIY SHEDRLSMHR LKADEAYVIG EEGQYTPVGA
#    PYC_MOUSE       IAIRVFRACT ELGIRTVAVY SEQDTGQMHR QKADEAYLIG R..GLAPVQA
#
#
#    PYC1_YEAST      YLAIDEIISI AQKHQVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC2_YEAST      YLAMDEIIEI AKKHKVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC_MOUSE       YLHIPDIIKV AKENGVDAVH PGYGFLSERA DFAQACQDAG VRFIGPSPEV
#
# Keywords : write_msf_files, save_msf_files
# Version  : 1.2
# Warning  :
#------------------------------------------------------------
sub write_msf{
   $| =1;
   my(%input)=%{$_[0]};
   my($output_file)=${$_[1]};
   my($string, $name);
   open (msf_WRITE,">$output_file");		# $string is the seq string.

   print msf_WRITE 'PileUp', "\n\n";
   print msf_WRITE '   MSF: 1205  Type: P    Check:  9937   .. '; ## This is dummy
   print msf_WRITE "\n\n";

   my(@names) = keys %input;
   my($larg)  = length($input{$names[0]});

   for $name (keys %input){
	 $len = length($input{$name});
	 printf msf_WRITE (" Name: %-15s oo  Len: %-5s Check:  9999  Weight:  1.00\n", $name, $len);
   }
   print msf_WRITE "\n";
   print msf_WRITE "\/\/\n\n\n\n";

#""""""""""""""""""""""""""""""""""""""""""""""""""
#             MSF file form
#==================================================
format msf_WRITE =
@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$names,         $seq
.
  for ($k=0; $k < $larg; $k+=50){    # 50 residues interval
	for($i=0; $i < @names; $i++){    # number of sequences
	  $names = $names[$i];
	  $input{$names[$i]}=~ s/\n//g;
	  $seq = substr($input{$names[$i]}, $k, 50);
	  $seq = &put_gaps_every_x_position_in_string($seq, 10, ' ');
	  select (msf_WRITE); ## to print out to a FILE
	  #$~='msf_WRITE';
	  write msf_WRITE;
	}
	print "\n";                       # next block starts.
  }
  close(msf_WRITE);
  select STDOUT;  # <- this is necessary to normalize output for other sub
}

#______________________________________________________________
# Title     : get_seqblock
# Usage     :
# Function  :
# Example   : @blocks_in_hash=@{&get_seqblock(\%msf, 30)};
# Warning   :
# Keywords  : find_sequence_block, get_sequence_block,
#             make_seq_block, make_seqblock, find_seqblock
# Options   : _  for debugging.
#             #  for debugging.
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_seqblock{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($connect_gap, @seq_frag, %digitized, $verbose, %hash, $best_block_opt);
	my $margin=3;
	my $threshold=0.8;
	my $min_seqlet_size=25;
	my $connect_gap=5;
	my @vars=keys %vars;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_gap=$vars{$vars[$i]} if( defined($vars{$vars[$i]}) );
	   }
	}
	if($char_opt=~/b/){ $best_block_opt='b' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/c/){ $connect_opt ='c' }
	if($char_opt=~/v/){ $verbose='v' }
	for($o=0; $o<@hash; $o++){
	  %hash=%{$hash[$o]};
	  %digitized=%{&convert_char_to_0_or_1_hash($hash[$o])};
	}

	%added=%{&add_columns(\%digitized)}; # 11111 + 1010101 => 2121211

	&show_hash(\%added) if ($debug==1);

	%blocks=%{&get_high_score_blocks(\%added,
			   "m=$margin", "t=$threshold", "l=$min_seqlet_size", $verbose,
			   "c=$connect_gap", $connect_opt, $best_block_opt, $range_in_name)};

	my @keys=keys %blocks;
	for($e=0; $e< @keys; $e++){

	   my $range="$keys[$e]\-$blocks{$keys[$e]}";
	   my $seq_let_leng=$blocks{$keys[$e]} - $keys[$e] + 1;
	   if($seq_let_leng < $min_seqlet_size){
		  next;
	   }else{
	      push(@RANGE, $range);
	   }
	}
	@seq_frag=&get_seq_fragments(\%hash, @RANGE,
	    "l=$min_seqlet_size", "$range_in_name");
	return(\@seq_frag);
}


#______________________________________________________________
# Title     : add_columns
# Usage     :
# Function  :
# Example   :
# Warning   : if the attached name is too long(over 12 char),
#             it changes to 'Added_upX' while X is a numb.
# Keywords  : add_seq_columns, add_sequence_columns,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------
sub add_columns{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $gap_char=' ';
	for($i=0; $i<@hash; $i++){
	  my %hash=%{$hash[$i]};
	  my @names=keys %hash;
	  my %final_hash_out;
	  my @final_added;
	  my $out_seq_name='Add';
	  for($j=0; $j<@names; $j++){
	     $out_seq_name.= "_$names[$j]";
		 my $string =$hash{$names[$j]};
		 my @ar_string;
		 if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			@ar_string =split(/$gap_char|\,/, $string );
		 }elsif($string=~/^\d{5,}$/){
			@ar_string =split(//, $string );
		 }
		 for($s=0; $s < @ar_string; $s++){
			$final_added[$s]=$ar_string[$s]+$final_added[$s];
		 }
	  }
	  if(length($out_seq_name) > 12){ $out_seq_name="Added_up${i}"; }
	  $final_hash_out{$out_seq_name}=join("$gap_char", @final_added);
	  push(@OUT_HASH, \%final_hash_out);
	}
	wantarray ? return(@OUT_HASH) : return($OUT_HASH[0]);
}


#____________________________________________________________________
# Title     : get_high_score_blocks
# Usage     : get_high_score_blocks(<ref. of hash for number string>)
# Function  : gets hash of key and number string and filters out the
#              number string region which is below certain threshold
#              determined inside this sub and returns a selected high
#              number regions
# Example   : %block_start_end=%{&get_high_score_blocks(\%input_numb_block)};
#             %out=%{&get_high_score_blocks(\%inp_numbs, 'v', 'b')};
# Warning   : This assumes that the inputs are multiply aligned seq
# Keywords  : high_scoring_regions
#             get_high_scoring_blocks, find_blocks, get_blocks
# Options   : _  for debugging.
#             #  for debugging.
#             b  for best_block_opt, returns best block only
#             v  for showing the final range hash output
#             c  for connect close blocks
#             c= for connect close blocks with specific closing gap size
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  : accepts one single ref. of hash
# Version   : 1.4
#--------------------------------------------------------------------
sub get_high_score_blocks{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my $min_seqblock_leng=25;
	my $threshold=0.8;
	my (%block_start_end, @possible_block, %hash, $range_in_name,$connect_gap);
	my $margin=$margin_ori=2;  # $margin is m in .....mmm111111111111mmm.....
	my $gap_char=' ';
	my @vars=keys %vars;
	my $connection_gap=5;
	my $connect_opt=1;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqblock_leng=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_opt='c';
		  $connection_gap=$vars{$vars[$i]};
		  #print "\n get_high_score_blocks: \$connection_gap is $connection_gap\n";
	   }
	}
	if($char_opt=~/c/){ $connect_opt='c' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/v/){
		print "\n  \$threshold         is $threshold    ";
		print "\n  \$margin            is $margin       ";
		print "\n  \$min_seqblock_leng is $min_seqblock_leng \n\n";
		print "\n  \$connection_gap   is $connection_gap \n\n";
		print "\n  \$connect_opt      is $connect_opt \n\n";
	}

	for($i=0; $i<@hash; $i++){
	   my @range;
	   my %hash_ori=%{$hash[$i]};
	   my @names=keys %hash_ori;
	   if(@names>1){ # If the hash has multi entry, make one added up hash
	      %hash=%{&add_columns(\%hash_ori)};
	      @names=keys %hash;
	   }else{  %hash=%hash_ori;    }
	   for($j=0; $j< @names; $j++){
		  my $string=$hash{$names[$j]};
		  if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			 @ar_string =split(/$gap_char|\,/, $string );
		  }elsif($string=~/^\d{4,}$/){ ## the string should be minimum 4 length
			 @ar_string =split(//, $string );
		  }
		  my $largest = ${&get_largest_element(\@ar_string)};
		  my $cut_line=$largest*$threshold;
		  #print "\n \$cutline in get_high_score_blocks is $cut_line \n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
		  for($s=0; $s< @ar_string; $s++){
			 if($ar_string[$s] > $cut_line){ # possible_block is the increasing seqlet
				if(@possible_block == $min_seqblock_leng){
				   while( $ar_string[$s] > $cut_line){
					  $ar_string[$s]=1;
					  while($s+1+$margin > @ar_string){ $margin-- }
					  push(@possible_block, ($s+1+$margin));
					  $margin=$margin_ori;
					  $s++;
				   }
				   $ar_string[$s]=0;  #<--- Should be 0 than 1
				   $block_start_end{$possible_block[0]}=$possible_block[$#possible_block];
				   @possible_block=();
				}else{
				   $ar_string[$s]=1;
				   while(($s+1-$margin) < 0){ $margin-- };
				   push(@possible_block, ($s+1-$margin) );
				   $margin=$margin_ori;
				}
			 }elsif($ar_string[$s] <= $cut_line){
				$ar_string[$s]=0;
				@possible_block=();
			 }
		  }
		  #print "\n", @ar_string,"\n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
	   }
	}
	#print "\n@ar_string\n";
	#&show_hash(\%block_start_end);
	&show_hash(\%block_start_end) if($char_opt=~/v/);

	#~~~~~~~~~~~~  Connecting blocks ~~~~~~~~~~~~~~~~~~~~~~
	if($connect_opt=~/c/){
	   my @keys=sort numerically keys %block_start_end;
	   sub numerically{  $a <=> $b;  }

	   ### sorting the %block_start_end
	   for($i=0; $i< @keys; $i++){
		  push(@block_s_e, $keys[$i], $block_start_end{$keys[$i]});
	   }

	   for($i=1; $i< $#block_s_e; $i++){  ## must be $#block_s_e to stop
		  $first_end   =$block_s_e[$i];   ## before it removes everything
		  $second_start=$block_s_e[$i+1];

		  #""""""" if gap is smaller than connection_gap given """"""""
		  if($connection_gap > ($second_start-$first_end) ){
			 splice(@block_s_e, $i, 2);
			 $i--;
		  }else{
		     $i++; # to skip to the next start correctly
		  }
	   }
	   %block_start_end=@block_s_e;
	}
	print "\n# Blocks start and end after connection(gap was $connection_gap)\n"  if($char_opt=~/v/);

	&show_hash(\%block_start_end)  if($char_opt=~/v/);

	#~~~~~~~~~~~~ Getting the largest ~~~~~~~~~~~~~~~~~~~~~~~
	if($char_opt=~/b/){
	   print "\n# Getting the largest block only to get all the blocks use a opt\n";
	   my @keys=keys %block_start_end;
	   my ($largest, %largest,$range_size, $largest_key);
	   for($i=0; $i< @keys; $i++){
		  $range_size = $block_start_end{$keys[$i]}-$keys[$i];
		  if($range_size > $largest){
			 $largest=$range_size;
			 $largest_key=$keys[$i];
			 #print "\n $largest_key \n";
		  }
	   }
	   $largest{$largest_key}=$block_start_end{$largest_key};
	   %block_start_end=%largest;
	   print "\n# The best block chosen (from to) \n"  if($char_opt=~/v/);
	   &show_hash(\%block_start_end) if($char_opt=~/v/);
	}
	return(\%block_start_end);
}






#______________________________________________________________
# Title     : delbut
# Usage     : delbut *.zip  (delete files except xxxx.zip)
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------
sub delbut{
  my $i;
  @save_files=@{$_[0]} || @_;
  for(@save_files){
	unless(-e $_){
	  print "\n\n \"$_\" does not exist, so nothing is deleted\n\n";
	  print chr(7);
	  exit;
	}
  }
  my @files=@{&read_file_names_only('.')};
  my @del_files=@{&subtract_array(\@files, \@save_files)};

  for($i=0; $i< @del_files; $i++){
	 system("rm -f $del_files[$i]");
  }
  print "\n\n Subdirs are never deleted \n\n";
}

#______________________________________________________________
# Title     : get_msp_range
# Usage     : @range=@{&get_msp_range($seqlet)};
#             @temp=&get_msp_range($seqlet);
#
# Function  :
# Example   :
# Warning   :
# Keywords  : get_msp_file_ranges
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.5
#--------------------------------------------------------------
sub get_msp_range{
   my $lines1=${$_[0]} || $_[0];
   my ($SEQ, $num_seq, $matched_SEQ, @Ranges);
   if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	  if($SEQ eq $matched_SEQ){ ## skipping self match
		  $num_seq++;
	  }else{
		  @Ranges=($1, $2, $4, $5);  ## <-- example. (10-20, 30-45)
	  }
   }
   return wantarray ? (\@Ranges, \$SEQ, \$matched_SEQ): \@Ranges;
}
#______________________________________________________________
# Title     : get_msp_enquiry_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_msp_target_sequence, get_msp_enquiry_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_msp_enquiry_sequence{
   my $lines1=${$_[0]} || $_[0];
   my ($SEQ, $matched_SEQ);
   if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
   }
   return \$SEQ;
}

#______________________________________________________________
# Title     : get_msp_matched_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_msp_matched_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_msp_matched_sequence{
   my $lines1=${$_[0]} || $_[0];
   my ($SEQ, $matched_SEQ);
   if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
   }
   return \$matched_SEQ;
}


#______________________________________________________________
# Title     : get_linked_sequence
# Function  : opens msp file and links the sequences according
#             to the matches.
# Usage     :
# Example   : seq1 ------------------------------
#                            |||||||||||
#             seq2        --------------------------------
#             OUT  000000000011111111111000000000000000000
#
# Warning   :
# Keywords  : link_sequence_from_msp_file, linked_sequenced_length
#             get_clustered_sequence_length, get_annexed_sequence_length
#             connect_sequences, merge_sequences, combine_sequences
# Options   : _  for debugging.
#             #  for debugging.
# Returns   : A ref. of an array
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_linked_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($final_leng, $start_diff,@MSP, %seq_sizes, $final_leng);
   my ($max_head_overhang, $head_diff, $tail_diff, $start_diff,
		 $off_set, $max_tail_overhang, @LINKED, $LINKED);
   my $Threshold=40;
   for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];
	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_msp_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  my %seq_sizes=%{&open_msp_files(\$input_file, '-s')};
	  my @NAmes=keys %seq_sizes;
	  for($s=0; $s< @NAmes; $s++){ # making '000000000000000.....';
		 my $len=$seq_sizes{$NAmes[$s]};
		 for($t=0; $t< $len; $t++){
			${"$NAmes[$s]"}[$t]=0;
		 }
	  }
	  open(FILE_1,"$input_file");
	  my @MSP=<FILE_1>;
	  close(FILE_1);
	  for($j=0; $j<@MSP; $j++){
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\w+) +(\d+) +(\d+) +(\w+) +(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			   for($x=$query_start; $x<= $query_end; $x++){
				  ${"$query_name"}[$x]++;
			   }
			   for($y=$match_start; $y<= $match_end; $y++){
				  ${"$match_name"}[$y]++;
			   }
			}
		 }
	  }
	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                 $1                $2     $3    $4      $5     $6    $7     $8
		 #                 171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\w+) +(\d+) +(\d+) +(\w+) +(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   $query_name=$4;   $query_leng=$3;
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			}
			@matched_seq_array= @{"$match_name"};
			$matched_seq_array=join('', @matched_seq_array);
			my $start_diff= $query_start - $match_start;
			if($start_diff >= 0){
			   my $tail_diff= $start_diff + $seq_sizes{$match_name} - $seq_sizes{$query_name};
			   if($tail_diff > 0){
				  $max_tail_overhang = $tail_diff if $tail_diff > $max_tail_overhang;
				  for($x=0; $x< $tail_diff; $x++){
					 $tail_start=$seq_sizes{$match_name}-$tail_diff + $x;
					 @matched_seq_array=split(//,$matched_seq_array);
					 $tail_array[$x] +=$matched_seq_array[$tail_start];
				  }
			   }
			}elsif($start_diff < 0){
			   $head_diff = abs($start_diff);
			   $max_head_overhang=$head_diff if $head_diff > $max_head_overhang;
			   for($z=0; $z< $head_diff; $z++){
				  $head_array[$z] += ${"$match_name"}[$z];
			   }
			}
		 }
	  }
   }
   @LINKED=( @{"$match_name"}[0..($max_head_overhang-1)], @{"$query_name"}, @tail_array);
   $LINKED=join('', @LINKED);
   if($debug eq 1){
	 print __LINE__, " In open_msp_files \%sequence is", %sequence ,"\n";
   }
   $final_offset=$extened_number_line - $query_leng;
   return(\@LINKED);
}


#______________________________________________________________
# Title     : get_average_sequence_size
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_av_sequence_size, get_average_seq_size
#             get_av_seq_size, average_seq_size, av_seq_size
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_average_sequence_size{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my( @OUT_AV, $size, @size, @names, %hash, $sum, $av);
   for($i=0; $i<@hash; $i++){
	 %hash=%{$hash[$i]};
	 @names=keys %hash;
	 for($j=0; $j<@names; $j++){
		$size=length($hash{$names[$j]});
		push(@size, $size);
	 }
	 for($j=0; $j<@size; $j++){
		$sum+=$size[$j];
	 }
	 $av=int($sum/@names);
	 push(@OUT_AV, $av);
   }
   wantarray ? \@OUT_AV : \$OUT_AV[0];
}


#______________________________________________________________
# Title     : get_linux_kernel_version
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_kernel_version, kernel_version,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_linux_kernel_version {
  my($image)  = ${$_[0]} || $_[0];
  unless(defined($image)){
	 if(-e '/vmlinuz'){
		$image='/vmlinuz';
	 }elsif(-e '/boot/vmlinuz'){
		$image='/boot/vmlinuz';
	 }
  }
  my($str)             = "phlogiston";
  my($version_start)   = 1164;
  my($version_length)  = 10;

  open(DATA, $image) or return(undef);
  seek(DATA, $version_start, 0);
  read(DATA, $str, $version_length);
  ######  Do careful matching in case we got some random string.
  my($version) = $str =~ /^(\d+\.\d+\.\d+)\s/;
  return(\$version)
}


#______________________________________________________________
# Title     : load_mount_info
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub load_mount_info {
  undef %mounted;
  undef %fs_type;

  open(MTAB, "</etc/mtab") or die "Can't read /etc/mtab: $!\n";
  while (<MTAB>) {
	my($dev, $mp, $type) = split;
	next if $dev eq 'none';
	$mounted{$dev} = $mp;
	$mounted{$mp}  = $dev;
	$fs_type{$dev} = $type;
  }
  close(MTAB);
}



#______________________________________________________________
# Title     : plot_vertically
# Usage     : &plot_vertically(\@query);
# Function  : This is a sub used for plot_domains.pl for
#             genome_analysis
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub plot_vertically{
  @numbers=@{$_[0]};
  print "\n    |===================================================\>\n";
  for($i=0; $i<@numbers;$i++){
	 printf ("%-4d\|", $i);
	 print "\*"x$numbers[$i], "\n";
  }
  print "    |===================================================\>\n";
}

#______________________________________________________________
# Title     : plot_histogram_horizontally
# Usage     : &plot_horizontally(\@query);
# Function  :
# Example   :
#  Input: $input= '00001111111113333333333444444444111111111111111';
#
#  Output:
#   00001111111113333333333444444444111111111111111
#   1-------------------------------------------47
#  |
#  |
#  |                       *********
#  |             *******************
#  |             *******************
#  |    *******************************************
#  |-----------------------------------------------
#
# Warning   :
# Keywords  : plot_horizontally, plot_numbers_horizontally, plot,
#             plot_numbers,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------
sub plot_histogram_horizontally{
  my @numbers=@{$_[0]};
  my $leng= @numbers;
  my ($largest, @inversed, $m, $i);
  for($i=0; $i< @numbers; $i++){
	$largest=$numbers[$i] if $largest < $numbers[$i];
  }
  for($i=0; $i< @numbers; $i++){ # this inverse the digits
	$inversed[$i]=abs($numbers[$i]-$largest);
  }
  print "\n ", @numbers;
  print "\n 1", "\-"x($leng-4),$leng;
  print "\n\|";
  print "\n\|";

  for($m=0; $m< $largest; $m++){
	print "\n\|";
	for($i=0; $i<@inversed;$i++){
	   if($inversed[$i] > 0){
		 print " ";
		 $inversed[$i]--;
	   }else{
		 print "\*";
	   }
	}
  }
  print "\n\|", "\-"x@numbers;
  print "\n";
}




#______________________________________________________________
# Title     : condense_number_string
# Usage     :
# Function  : condenses the numbers by making an average with
#             given factor. If the factor is 2 on number seq
#              1334284425 , result will be 23543
#              133428442  ,                23541 <-- preserved end
#             Factor 3 =>
#              133428442  , (1+3+3)/3 = 2
#                           (4+2+8)/3 = 4,,,
# Example   : @output=@{&condense_number_string(\@input, $factor)};
#             with @input=qw(1 2 4 10 10 22 2 3 44 2 3); and $factor=3
# Warning   :
# Keywords  : compact_number_string, compact_digits, condense
#             condense_string
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub condense_number_string{
	my @ARRAY=@{$_[0]};
	my $factor = ${$_[1]} || $_[1];
	my ( $i, $j, @out )
	unless(defined($factor)){ $factor=1 }
	for($i=0; $i< @ARRAY; $i+=$factor){
	  my $temp_sum;
	  for($j=$i; $j < ($factor+$i); $j++){
		$temp_sum+=$ARRAY[$j]
	  }
	  push(@out, int($temp_sum/$factor) );
	}
	return(\@out);
}


#___________________________________________________________
# Title     : get_seq_fragments
# Usage     : @seq_frag=&get_seq_fragments(\%msf, @RANGE);
# Function  : gets sequence(string) segments with defined
#             ranges.
# Example   :
#  %test=('seq1', '1234AAAAAAAAAAAaaaaa', 'seq2', '1234BBBBBBB');
#  @range = ('1-4', '5-8');
#
#  %out = %{&get_seq_fragments(\%test, \@range)};
#  %out => (seq1_5-8   AAAAA
#           seq2_5-8   BBBBB
#           seq1_1-4    1234
#           seq2_1-4    1234 )
#
# Warning   :
# Keywords  : get_sequence_fragments,
# Options   : _  for debugging.
#             #  for debugging.
#             l=  for min seqlet length
#             r  for adding ranges in the seq names
#
# Returns   :
# Argument  :
# Version   : 1.8
#-------------------------------------------------------
sub get_seq_fragments{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my $min_seqlet_size=10;
   my @vars=keys %vars;
   my $no_range_in_name=1;
   for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }
   }
   if($char_opt=~/v/){ print "\n \$char_opt is $char_opt  @char_opt\n"; }
   if($char_opt=~/n/){ $no_range_in_name = 1 }
   if($char_opt=~/r/){ $no_range_in_name = 0 }

   print "\nget_seq_fragments \$no_range_in_name is $no_range_in_name \n";
   for($i=0; $i< @hash; $i++){
	 my (%out_frag, $frag_name, $range_start, $range_end, @out_hash);
	 my %seqs = %{$hash[$i]};
	 my @names = keys %seqs;
	 if(@names==1){
	    for($j=0; $j < @names; $j++){
		   my $seq_name = $names[$j];
		   my $seq = $seqs{$seq_name};
		   for($k=0; $k< @range; $k++){
			  my $range = $range[$k];
			  if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			  }else{
			     $frag_name = "$seq_name\_$range";
			  }
			  #if(length($frag_name)>14 ){
			  #	 $frag_name ='x'."${j}_${range}";
		      #}
			  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
			  my $frag_len = $range_end-$range_start+1;
			  if($frag_len < $min_seqlet_size){
			     next;
			  }
			  my $fragment = substr($seq, $range_start-1, $frag_len);
			  $out_frag{$frag_name}=$fragment;
		   }
		}
		push(@out_hash,  \%out_frag);
	 }elsif(@names > 1){
	    for($k=0; $k< @range; $k++){
		  my %out_frag=();
	      my $range=$range[$k];
		  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
	      my $frag_len = $range_end-$range_start+1;
		  if($frag_len < $min_seqlet_size){
		     next;
		  }
	      for($j=0; $j < @names; $j++){
	         my $seq_name=$names[$j];
			 my $seq = $seqs{$seq_name};
		     if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			 }else{
			     $frag_name = "$seq_name\_$range";
			 }
			 #if(length($frag_name)>15 ){
			 #	$frag_name ='x'."${j}_${range}";
		     #}
			 if($range_start==0){ $range_start++; } ## This is a bugfix
			 my $fragment = substr($seq, $range_start-1, $frag_len);
			 $out_frag{$frag_name}=$fragment;
		  }
		  push(@out_hash, \%out_frag);
		}
	 }
   }
   if(@out_hash > 1){ return(@out_hash)
   }elsif(@out_hash==1){ return($out_hash[0]) }
}




#________________________________________________________________________
# Title     : make_standalone_subroutines
# Usage     :
# Example   :
# Function  : Creates each subroutine derived xxx.pl file from B.pl or any
#             given library file.
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub make_standalone_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($each_sub, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);
  $|=1;
  for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		my @lib =<LIB_FILE>;
		FOR: for($j=0; $j < @lib; $j++){
				  my (%out_subs, $each_sub);
				  my $title_found;
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
					 &&($lib[$j+1]=~/^(# *title *: *([\w\-]+))[^\.pl]/i) ){
					  $each_sub=$2;
					  $title_found =1;
					  if( (-s "$each_sub\.pl") > 2400 ){
						  print (-s "$each_sub\.pl"), "   ";
						  print "  $each_sub", " exists \n";
						  next FOR;
					  }elsif((-s "$each_sub\.pl") < 2400){
					     my $temp;
					     open (TEMP, "<$each_sub\.pl");
					     while(<TEMP>){
					        if(/^#[_\-\*]{10,120} *$/){ $temp++ }
					        elsif(/^# *title *: *[\w\-]+[^\.pl]/i ){
					           $temp++;
					        }elsif(/^# *\w+/){
					           $temp=$temp+0.5;
					        }
					     }
					     if($temp >2){
					        next FOR;
					     }
					  }

					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||($lib[$j]=~/^#---+ *$/) ||
							 ($lib[$j]=~/^#_____+ *$/) || ($lib[$j]=~/^#\*\*+ *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if( ($title_found==1)&&($lib[$j]=~/^sub +([\w\-]+) *\{/) ){
				     $each_sub=$1;
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  if($lib[$j]=~/^sub +([\w\-]+) *\{.+\}/){
						  goto WRITE;
					  }
					  $j++;
					  until($lib[$j]=~/^\}/){
					     $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
					  $j++;

					  WRITE:
				     open (EACH_FILE, ">$each_sub\.pl");
				     print EACH_FILE  "#\!\/perl\n";
				     print EACH_FILE  "# Made by $0 at: ", `date`, "\n";
				     print EACH_FILE $out_subs{$each_sub};
		           close EACH_FILE;
		           %out_subs=();
		           #chmod

				  }
			  }
	}#""""""""""""" end of for (@file)
	close LIB_FILE;
}



#___________________________________________________________
# Title     : is_html
# Usage     :
# Function  : Checks if it is an html file.
# Example   : $html=&is_html(\@test);
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#-------------------------------------------------------
sub is_html{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my $html=0;
  if( @string >0 ){
	 for($i=0; $i<@string; $i++){
		if($string[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			$html_head=1;
		}if($string[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			$html_end=1;
		}
	 }
	 if( ($html_head eq $html_end)&&($html_end=1)){
		$html=1;
	 }
	 if($debug==1){ print "\n \@string is @string\n"; }
  }elsif(@file>0){
	 for($i=0; $i< @file; $i++){
		my $all_lines;
		open(F, "$file[$i]");
		while(<F>){
		  $all_lines.=$_;
		}
		print "\n All the lines of $file[$i] is $all_lines\n" if $debug==1;
		if($all_lines =~/\< *HTML *\>/i){
		  if($all_lines=~/\< *\/ *HTML *\>/i){
			  $html=1;
			  print "\n html matched $html\n" if $debug ==1;
		  }
		}
	 }
  }elsif( @array>0 ){
	 @arr = @{$array[$i]};
	 for($i=0; $i< @arr; $i++){
		 if($arr[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			$html_head=1;
		 }if($arr[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			$html_end=1;
		 }
	 }
	 if( ($html_head eq $html_end)&&($html_end=1) ){
		$html=1;
	 }
  }elsif(@hash>0){
	 for($i=0; $i< @hash; $i++){
		@hash=%{$hash[$i]};
		for($i=0; $i< @hash; $i++){
			if($hash[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			  $html_head=1;
			}if($hash[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			  $html_end=1;
			}
		}

	 }
  }
  return($html);
}

#___________________________________________________________________
# Title     : get_column
# Usage     : &get_column(\@ar, 1,2 ,3);
#             &get_column(\%ha, 1,2 ,3);
#             &get_column(@ARGV);
#             # where prompt is like: column.pl temp.txt 1 2 3 4
# Function  : Prints any specified columns, can change order of them,
#             can filter values of columns to filter (max or min value)
#
# Example   : For getting only necessary columns
#             Input: %Hash=(1, 'col1 col2 col3',
#                           2, 'col1 col2 col3',
#                           3, 'col1 col2 col3');
#             input format: &get_column(\%Hash, 3,2,1, 'k'); # k is opt
#             Ouput format: STDOUT as
#
#                1     col3 col2 col1
#                2     col3 col2 col1
#                3     col3 col2 col1
#
# Warning   : Skipps blank line.
# Keywords  : columns, column.pl, column, get_columns, take_columns,
# Options   : #  for debugging.
#             _  for debugging.
#             k  for Key print when hash input is given.
#             n  for no first line display(Handy when you have title line
#                                          and wanna remove it)
#             ?max?=xxx for filtering column numbers by maximum of xxx
#             ?min?=yyy for filtering column numbers by minimum of yyy
#                      (eg, min4=100000 means 4th column minimum is 100000)
#                      (eg, 1min4=10, 2min3=10, means get 4th column values
#                           below 10 as the first output column. Get 3rd
#                           column values below 10 as the second out column.
#
# $combine = 1 by -c c   # c is for combining columns in different files
# $ignore  = 1 by -i i   # i is for ignoring leng diff in columns over 1 input
#
# Returns   : Ref of
# Argument  : Ref of Hash, Array or just filename, and wanted column numbers.
# Version   : 1.4
#---------------------------------------------------------------
sub get_column{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (%whole, $previous, @out, @columns);
	my $len =4;
	my @v_keys= sort keys %vars; ## To be able to exchange order of column.
	if($char_opt=~/i/i){
	   $ignore =1;
	}elsif($char_opt=~/c/i){
	   $combine=1;
	}

	if(@v_keys > 0){
	   for($i=0; $i< @v_keys; $i++){
	      if($v_keys[$i]=~/\d*(m..)(\d+)/i){
			 $M=$1.$2;
			 ${"$1$2"}= $vars{$&};
	         push(@columns, $2);
		  }
	   }
	}else{
	   @columns=@num_opt;
	}
	my $troubled_column ;

	if((@num_opt==0)&&(@file>1)){ $combine=1 }; # when no column num. is given assume $combine

	#""""""""""" When combine option is set """"""""""""""""""""""
	if(($combine==1)&&(@file > 1)){
	  for ($f=0; $f<@file; $f++){
		open(IN, "$file[$f]"); # real showing is now.
		my @all_lines=<IN>;
		if((@all_lines != $previous)&&($ignore !=1)){
		   print "\n The column lengths do not match in the inputs\n";
		   print "\n you can use -i option \n";
		   exit;
		}
		$previous=@all_lines;
		for($w=0; $w< @all_lines; $w++){
		   if($all_lines[$w]=~/^[\t ]*$/){ next }  # skipping blank line
		   chomp($all_lines[$w]);
		   $out[$w].="$all_lines[$w] ";
		}
		close IN;
	  }
	  push(@array, \@out);
	}

	###### File is given as input #######""""""""""""""""""""""""""""""""""""""
	if((@file >=1)&&(@array < 1)){
	  my $file;
	  for $file(@file){
		 my $line_num, $line_read;
		 my $change=0.1;
		 open(IN, "$file");
		 my @all_lines=<IN>;
		 for($q=0; $q < @all_lines; $q++){ # This open is only for getting largest column width size
		    $line_read++;
		    @splited=split(/ +/, $all_lines[$q]);
			$l=${&get_longest_str_size(\@splited)};
			if($l>$len){ $len=$l; $change++ }
			if( ($line_read/$change) > 50 ){ last } # this is to check the column
		 }                                          # consistency and stops after some
		 if($debug==1){
		   print "\n$line_read lines read to get right column size\n";
		 }
		 close(IN);
		 my $line_counter;
		 for($x=0; $x < @all_lines; $x++){
		    if($all_lines[$x]=~/^[\t ]*$/){ next }  # skipping blank line
		    $line_counter++;
		    if(($char_opt=~/n/i)&&($line_counter==1)){ next } ## NO title #
		    if(@M=$all_lines[$x]=~/(\S+)/g){
			  if(@columns < 1){
			    for($n=0; $n< @M; $n++){
			      $columns[$n]=$n+1;
			    }
			  }

	          for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
				  $col =$columns[$r]-1;  #
				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M[$col])&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M[$col] ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M[$col] ){
							printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
				  }
	          }
	          print "\n";
			}
		 } #for

	  }
	}    ###### Array is given as input #######""""""""""""""""""""""""""""""""""
	elsif(@array>0){  # if input is ('x y xx y','k t yy zz',,,,)
	  for($t=0; $t<@array; $t++){
		 my @arr=@{$array[$t]};
	     print "\n Array input\n" if $debug ==1;
		 for($i=0; $i<@arr;$i++){
		   @splited=split(/ +/,$arr[$i]);
		   $l=${&get_longest_str_size(\@splited)};
  		   $len=$l if $l>$len;
	    }
	    for($i=0; $i< @arr; $i++){
			if($arr[$i]=~/^[\t ]*$/){ splice(@arr, $i, 1); $i--; next }
			if(($char_opt=~/n/i)&&($i==0)){ next } # skipping the first line
			if(@M=$arr[$i]=~/(\S+)/g){
			   if(@columns < 1){
				  for($n=0; $n< @M; $n++){
					 $columns[$n]=$n+1;
				  }
			   }
	           for($j=0; $j< @columns; $j++){
				  $col =$columns[$j]-1;  #

				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M["$col"] )&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M["$col"]  ){
							 printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M["$col"]  ){
							printf "%-${len}s ",$M["$col"]  unless($M["$col"] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M["$col"] unless($M["$col"] eq '');
				  }

	           }
	           print "\n";
			}
	    }
	  }
	}  ##### Hash is given as input #######""""""""""""""""""""""""""""""""""
	elsif(@hash>0){
	  my @arr;
	  for($h=0; $h<@hash; $h++){
		  my @array=values %{$hash[$h]};
		  my @keys =keys %{$hash[$h]};
		  for($i=0; $i< @array; $i++){ # getting the longest str size
			 @arr=split(/ +/,$array[$i]);
			 $l=${&get_longest_str_size(\@arr)};
			 $len=$l if $l>$len;
		  }
		  for($i=0; $i< @array; $i++){
		    if($array[$i]=~/^[\t ]*$/){ splice(@array, $i, 1); $i--; next }
		    if(($char_opt=~/n/i)&&($i==0)){ next } #  skipping the first line
		    printf "%-10s", $keys[$i] if($char_opt=~/k/i); ## Option for key printing
		 	 if(@M=$array[$i]=~/(\S+)/g){
			   if(@columns < 1){
				  for($n=0; $n< @M; $n++){
					 $columns[$n]=$n+1;
				  }
			   }
	           for($j=0; $j< @columns; $j++){

	              $col =$columns[$j]-1;

				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M[$col])&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M[$col] ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M[$col] ){
							printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
				  }

	           }
	           print "\n";
			}
		  }
	  }
	}
}




#________________________________________________________________________
# Title     : set_debug_option
# Usage     : &set_debug_option;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug_option #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Version   : 1.8
#--------------------------------------------------------------------
sub set_debug_option{
  my($j, $i, $level);
  unless( defined($debug) ){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 $debug=1;
				  print chr(7);
			 print __LINE__," \$debug  is set to ", $debug, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"debug$i"}=1;
				 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
			 }
		 }
	 }
  }
}


#____________________________________________________________________
# Title    : write_sdb_file
# Function : gets a hash ref. and writes the SDB file with 'sprintf'
# Usage    : @out=@{&write_sdb_file(\%seq)};
# Example  : @out=@{&write_sdb_file(\%seq, 'v')};  ## for STDOUT as well
#    ___________________________________________________________________________
#    Title      : EST_YEAST.sdb
#    Full Name  : Telomerase_yeast_699aa
#    Nicknames  :
#    EMBL       :
#    PDB        :
#    Swissprot  :
#
# Argument : \%ref_of_seq
# Returns  :
# Options  : v  for verbose representation. This will print boxes on STDOUT
#            n  for no '#' leader.
#            e  for Endline( '-----------------------------..' )
# Version  : 1.0
# Keywords : write_sdb
# Warning  : if version no. is null, it automatically puts '1.0'
#---------------------------------------------------------------
sub write_sdb_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($Commont_Symbol, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $s, $t, $x,  $z,
	  $delimiter, $Enclosed_came, $end_found, $end_line, $entry,
	  $length, $line,  $name, $name_found, $name_found, $num,
	  $original_dir, $output, $out_string, $pre, $pwd, $start_line, $string, $string1,
	  $temp, $title_found, $type_DSSP, @arg_output, @Final_out, @k, @keys, @names, @out, @out_hash,
	  @out_hash_final, @output_box, @outref, @read_files, @str1, @str2,  @string1,
	  %correct_head_box_entry, %Final_out, %hash, %input, %out_hash_final
	 );
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($Entry_length) =9 ;
  my($VL) =60; ## key length( like in  # Title )  ## value length
  $num    =80;
  if($char_opt =~ /n/i){
	  $Commont_Symbol=' '; ## Comment symbol. For help display, you can change into ' '
  }else{
	  $Commont_Symbol='';   #  Comment symbol. Default head_box display.
  }
  for($x=0; $x < @hash; $x++){
		my(%input) = %{$hash[$x]};  my(@keys)= sort (keys %input); my(@out);

		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		##  PUTTING an order in the printout entries. To make 'Title' come first
		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		for($i=0; $i < @keys; $i++){
		  if($keys[$i]=~/^Title/i){
			  $temp=$keys[0];	   $keys[0]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Enclosed?/i){
			  $temp=$keys[$#keys];	   $keys[$#keys]=$keys[$i];   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Usage$/i){
			  $temp=$keys[1];	   $keys[1]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Function/i){
			  $temp=$keys[2];	   $keys[2]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Example/i){
			  $temp=$keys[3];	   $keys[3]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Version/i){
			  $temp=$keys[$#keys-2];  $keys[$#keys-2]=$keys[$i];   $keys[$i]=$temp;
			  #### To make null version value to '1.0'
			  if($input{$keys[$#keys-2]}=~/^ *$/){ $input{$keys[$#keys-2]}='1.0'; }
		  }elsif($keys[$i]=~/^Warning/i){
			  $temp2=$keys[$#keys-1]; $keys[$#keys-1]=$keys[$i];   $keys[$i]=$temp2;
		  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		##       Writing starting line
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($start_line) = "$Commont_Symbol".'_'x"$num"."\n";
		if( $char_opt =~ /v/i){
			print $start_line;      } # Prints to STDOUT,

		my($Enclosed_came);  ## <<-- This should be HERE !
		$Entry_length=${&get_longest_str_size(\@keys)};
		for( $i =0; $i < @keys; $i++){  #### @keys has been sorted before.
		  my($Len) = length($input{$keys[$i]});
		  my $delimiter = ':';
		  my($entry) = $keys[$i];
		  $entry =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e; ## capitalizing word
		  if($entry=~/^Enclosed?$/i){ $Enclosed_came = 1; }
		  my(@input) = split(/\n+/, $input{$keys[$i]});
		  if(@input > 0){
			  for($j =0; $j < @input; $j++){
				 ## If NO entry name(blank) is given    ##
				 if($j > 0){  ## If the value is a multi line.
					  $entry = '';   $delimiter=' ';    }
				 if( $char_opt =~ /v/i){
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  ###  This is to reduce the entry length of Enclosed content lines   ##
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  if( ($Enclosed_came==1)&&($entry eq '') ){
						  $Entry_length=2; $VL=80; }
					  printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry , $input[$j]);
				 }
				 if(($Enclosed_came==1)&&($entry eq '')){ $Entry_length=2; $VL=80; }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry,$input[$j]);
				 if($entry=~/^Enclosed?/){ $Enclosed_came = 1; }   }}
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  ##   If the entries have null descriptions, just print entries  ######
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  elsif(@input ==0){
				 if( $char_opt =~ /v/i){
					 printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');   }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');
		  }
		}
		############################################################
		##       Writing  Ending  line                            ##
		############################################################
		$end_line = "$Commont_Symbol".'-'x"$num"."\n";
		if( ($char_opt =~ /v/i)&&($char_opt =~ /e/i) ){  print $end_line;  }
		if( $char_opt =~ /e/i){  push(@out, $end_line)   }
		unshift(@out, $start_line);
		push(@Final_out, \@out);
  }
  if(@Final_out > 1){ @Final_out; }
  elsif( @Final_out==1){ $Final_out[0] }
} #<--- END of write_sdb_file
#________________________________________________________________________
# Title     : push_if_not_already
# Usage     : @out=@{&push_if_not_already(\@mother_array, \@adding_array )};
#             @out=@{&push_if_not_already(\@mother_array, $adding_scalar)};
# Function  : returns ref. of an array for a list of non-repetitive entry.
# Example   :
# Warning   :
# Keywords  : add_if_not_already, add_element_if_not_already, if_not_already
#             add_element_if_not_already, push_element_if_not_already,
#             if_no_already_push, put_element_if_not_already, add_new_element
#             add_new_items_only, push_new_items_only, push_new_elements_only
#             put_if_not_already,
# Options   :
# Returns   : a ref. of an array.
# Argument  : two references. The first should be an array ref. The 2nd can be either
#             scalar or array reference.
# Version   : 1.3
#--------------------------------------------------------------------
sub push_if_not_already{
	my($already_in, $already, $i, @push_items_given);
	my(@out_array)=@{$_[0]};
	if(ref($_[0]) ne 'ARRAY'){ print "\n push_if_not_array need ref\n"; exit; }
	push(@push_items_given, ${$_[1]}) if(ref($_[1]) eq 'SCALAR');
	@push_items_given=@{$_[1]} if(ref($_[1]) eq 'ARRAY');
	for $already (@out_array){  ## This for is to remove repetitive
	  for ($i=0; $i< @push_items_given; $i++){
		 if($already eq $push_items_given[$i]){ splice(@push_items_given,$i, 1); $i--; }
	  }
	}
	push(@out_array, @push_items_given);
	return(\@out_array);
}

#_______________________________________________________________
# Title     : compare_sec_template_with_db
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : sec structure mapping, map sec str, map_sec_structure
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#-----------------------------------------------------------
sub compare_sec_template_with_db{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  $ref_target_hash = shift @hash;
  %target = %{$ref_target_hash};
  @names = keys %target;
  $name_target = $names[0];
  if($name_target =~/\w+(\d+)/){ $ori_target_seq_len = length($1); }
  @target_frag = split(/ +/, $target{$name_target} );

  for($i =0; $i< @hash; $i ++){
	 %db=%{$hash[$i]};
	 @db_name = keys %db;
	 for($j=0; $j < @db_name; $j ++){
		$name = $db_name[$j];
		@db_frag = split( / +/, $db{$name} );
		for($k=0; $k < @target_frag; $k ++){
		  if( ($target_frag[$k]=~/H(\d+)/i)&&($db_frag[$k]=~/H(\d+)/i) ){
			 $simple_match_output{$name}++;
			 $leng_diff = abs($1 - $2)/15;
			 $simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/E(\d+)/i)&&($db_frag[$k]=~/H(\d+)/i) ){
			 $simple_match_output{$name}--;
			 #$leng_diff = abs($1 - $2)/10;
			 #$simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/H(\d+)/i)&&($db_frag[$k]=~/E(\d+)/i) ){
			 $simple_match_output{$name}--;
			 #$leng_diff = abs($1 - $2)/10;
			 #$simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/E(\d+)/i)&&($db_frag[$k]=~/E(\d+)/i) ){
			 $simple_match_output{$name}++;
			 $leng_diff = abs($1 - $2)/15;
			 $simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }
		}
	 }
  }
  return(\%simple_match_output);
}



#___________________________________________________________________
# Title     : get_peptide_occurance
# Usage     :
# Function  : gets the number of occurances of peptide(with given size) for
#             any number of sequences given.
# Example   : %stat=%{&get_peptide_occurance(\%pro_sequence, $size)};
#              while %pro_sequence has one or more sequences like
#              seq1 AAAAAAAAAAAA, seq2 BBBBBBBBBBBBBB, ...
#              $size is number. For dipeptide=2, tripeptide=3, tetrapep=4...
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : eg=> (\%ref_hash, 4)
# Version   : 1.2
#---------------------------------------------------------------
sub get_peptide_occurance{
	 my($k, $i, $s, $peptide, $pep_entry_num, @name, %stat);
	 %all=%{$_[0]};
	 $size=$_[1];
	 @name=keys %all;
	 for($k=0; $k<@name; $k++){
		if($all{$name[$k]}=~/[BZX]/i){ next;
		}else{
			$seq_leng += length($all{$name[$k]});
	      my @seq=split(//, $all{$name[$k]});
			my $all_occur_pep;
	      for($i=0; $i< (@seq-($size-1)); $i ++){
	        my $peptide;
	        for($s=0; $s < $size; $s++){
		       $peptide .= $seq[$i+$s];
	        }
	        $stat{$peptide}++;
			  $all_occur_pep ++;
	        print "\n$peptide  $stat{$peptide}" if $debug==1;
	        $pep_entry_num=keys %stat;
			  if( ($debug==1)&&($pep_entry_num%100 == 0 ) ){
				  print "\n Present peptide entries are:  $pep_entry_num  out of $all_occur_pep residues \n";
			  }
	      }
		 }
	 }
	 \%stat;
}


#___________________________________________________________________
# Title     : open_lottery_file
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#---------------------------------------------------------------
sub open_lottery_file{
  my ($i, @lines, @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus, @allbins);
  open(F, "$_[0]");
  if($_[1] =~/\-a/i){ $single_array_return=1 };
  @lines = <F>;
  for($i=0; $i< @lines; $i++){
		if($lines[$i]=~/ {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,2}(\d\d)/){
	push(@bin1, $1);
	#print $1, "\n";
	push(@bin2, $2);
	push(@bin3, $3);
	push(@bin4, $4);
	push(@bin5, $5);
	push(@bin6, $6);
		  push(@bonus, $7);
		}
  }
  if($single_array_return == 1){ @allbins=( @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus); }
  else{  return(\@bin1, \@bin2, \@bin3, \@bin4, \@bin5, \@bin6, \@bonus); }
}


#_________________________________________________________________
# Title     : get_probable_half
# Usage     :
# Function  : This produces a hash ref. which is supposed to be most probable
#             according to the given array. It divides array into halves
#             gets the more probable half until it gets one single number.
# Example   :
# Warning   :
# Keywords  : get_frequent_halves,
# Options   :
# Returns   :
# Argument  : \@array
# Version   : 1.0
#-----------------------------------------------------------------
sub get_probable_half{
	my (%hash, $i, @keys, @values);
	%hash=%{$_[0]};
	@keys=  sort {$a<=$b} keys   %hash;
	@values= values %hash;
	print "\n Hash is ",%hash,"\n";
	if(@keys == 1){
		 return(\%hash); last;
	}elsif(@keys >1){
		if((@keys % 2) != 0){ # make the number even
			 $keys[@keys]=$keys[$#keys];
		}
		@first_half=@keys[0..(@keys/2-1)];
		@second_half=@keys[(@keys/2)..$#keys];
		print "@first_half", "\n";
		print "@second_half", "\n";
		my($sum1, $sum2, %hash1, %hash2);
		for($i=0; $i<(@keys/2); $i++){
			 $sum1 +=$hash{$first_half[$i]};
			 $sum2 +=$hash{$second_half[$i]};
			 $hash1{$first_half[$i]} =$hash{$first_half[$i]};
			 $hash2{$second_half[$i]}=$hash{$second_half[$i]};
		}
		if($sum1 > $sum2){ &get_probable_half(\%hash1);}
		else{  &get_probable_half(\%hash2);}
	}
}

#_______________________________________________________________
# Title     : divide_array
# Usage     : &show_array(&divide_array(\@input, 6));
# Function  : divides any array to the denominator given.
#             If you give array of 100 elem, with 5, you will
#             get 5 arrays with 20 elem each.
# Example   :
# Warning   :
# Keywords  : split_array_into_pieces, split_array, chop_array,
#             fragment_array,
# Options   : s=  for dividing the array with sub array size
#                 eg) to get 20 elem length sub arrays from
#                     a big array
#                     @ar_ref=@{&divide_array(\@array, 's=20')};
# Returns   :
# Argument  :
# Version   : 1.4
#-----------------------------------------------------------
sub divide_array{
	 my ($size,$remaining, $size_div, $s);
	 my @array = @{$_[0]};
	 my  @final_array_ref=();
	 if(ref($_[1])){
		if(${$_[1]}=~/^\d+$/){
		   $denominator = ${$_[1]};
		}elsif(${$_[1]}=~/s=(\d+)$/i){
	       $size=$1;
		   $size_div=1;
	    }
	 }elsif($_[1]=~/^\d+$/){
	    $denominator = $_[1];
	 }elsif($_[1]=~/s=(\d+)$/i){
		$size=$1;
		$size_div=1;
	 }

	if((@_ ==1)&&($denominator == 0)){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}

	if($size_div==1){
	   while(@array){
		  push(@final_array_ref, [splice(@array, 0, $size)]);
	   }
	}else{
	   my $frag_ar_size = int(@array/$denominator);
	   if($debug eq 1){ print "\n Frag arr size is :  $frag_ar_size \n" }
	   $remaining = @array % $denominator;
	   if($debug eq 1){ print "\n Remnant elem size is : $remaining \n" }
	   for($i=0; $i < $denominator; $i++){
		  if($remaining > 0){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size+1),)] );
		      $remaining --;
		  }elsif(($remaining == 0)&&(@array>0)){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size),)] );
		  }
	   }
	}
	return(\@final_array_ref);
}



#__________________________________________________________________________
# Title     : split_fasta_files
# Usage     : @names_of_single_files=@{&split_fasta_files(\@files)};
# Function  :
# Example   :
# Keywords  : divide_fasta_files, split_fasta_db_files, divide_fasta_db_files
#             make_single_fasta_files, write_single_fasta, write_single_fasta_files
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub split_fasta_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@each_single_seq_names, $out_single_fasta_file_name );
	for($i=0; $i< @file; $i++){
		open(FASTA, "$file[$i]");
		while(<FASTA>){
		   if(/\> *(\S+)/){
			   $out_single_fasta_file_name="$1\.fa";
			   open(OUT_SINGLE, ">$out_single_fasta_file_name");
			   print OUT_SINGLE $_;
			   push(@each_single_seq_names, $1);
		   }elsif(/\w+/){
			   print OUT_SINGLE $_;
		   }

		}
		close OUT_SINGLE;
	}
	return(\@each_single_seq_names);
}



#_______________________________________________________________
# Title     : divide_string
# Usage     : &show_array(&divide_string(\$input, 6));
# Function  : divides any string to the denominator given.
# Example   : &show_array( &divide_string(\$input, 3) );
#              while  $input is '12345789ABCDEFHIJKLMN'
# Warning   :
# Keywords  : divide_string, split_string, chop_string, divide_sequence
# Options   :
# Returns   :
# Argument  :
# Version   : 1.2
#-----------------------------------------------------------
sub divide_string{
	my (@array, $i, $j, $denominator, @temp, @string, $frag_str_size,
	   $remaining, $frag);

	for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		  push(@array, @{$_[$i]});
	   }elsif(ref($_[$i]) eq 'SCALAR'){
		  if(${$_[$i]} =~/^(\d+)$/){
			 $denominator = $1;
		  }else{
			 push(@array, ${$_[$i]});
		  }
	   }elsif($_[$i]=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
		  $denominator = int($1);            # like  9.5
	   }else{
		  push(@array, $_[$i]);
	   }
	}
	if($denominator == 0){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}
	for($i=0; $i< @array; $i++){
	   @temp;
	   @string = split(//, $array[$i]);
	   $frag_str_size = int(@string/$denominator);
	   if($debug eq 1){ print "\n Frag str size is :  $frag_str_size \n" }
	   $remaining = @string % $denominator;
	   for($j=0; $j < $denominator; $j++){
	 	    if($remaining > 0){
			 $frag=join('', splice(@string, 0, ($frag_str_size+1) ) );
	          push(@temp, $frag);
			    $remaining --;
		    }elsif(($remaining == 0)&&(@string>0)){
			 $frag=join('', splice(@string, 0, $frag_str_size,) );
	          push(@temp, $frag);
		    }
	   }
	   push(@final_array_ref, \@temp);
	}
	wantarray? \@final_array_ref : $final_array_ref[0];
}


#_______________________________________________________________
# Title     : write_html_headbox
# Usage     : &write_html_headbox($outfilename, \%entries);
# Function  : write html format headbox explanation with given hashes of headbox
#             content.
# Example   :
# Warning   : This does not print which has empty description
# Keywords  : write_headbox_html, write headbox in html
# Options   : 'd' for date inclusion at the top of the page
# Returns   :
# Argument  :
# Version   : 1.3
#-----------------------------------------------------------
sub write_html_headbox{

	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	$|=1;
	my(%in, $name, $name1);
	my $URL='ftp://sonja.acad.cai.cam.ac.uk/pub/Perl/';
	for ($f=0; $f<@file; $f++){
		 $output_file=$file[$f];
		 open(FILE, ">$output_file");
	    if($char_opt =~/d/i){
		    print FILE "\<html\>\n";
		    print FILE `date`, "\<br\>\n";
	    }

	    for($i=0; $i< @hash; $i++){
		    my %in =  %{$hash[$i]};
		    my @keys = sort keys %in;
		    $name= $in{'Title'};
		    if($name=~/(\S+)\.pl/){ ## to prevent making 'xxxx.pl.pl'
		       $name1=$1;
		    }else{
		       $name1=$name;
		    }
		    print FILE "\<H2\>\<A href=\"${URL}$name1\.pl\"\>$name\<\/A\>\<\/H2\>\n";
		    print FILE "\<pre\>\n";
		    for($j = 0; $j < @keys; $j ++){
			    if($keys[$j]=~/(title)/i){
			    }elsif( ($keys[$j]=~/\w+/i)&&( $in{$keys[$j]}=~/\w+/) ){
				   chomp( $in{$keys[$j]} );
	            printf FILE ("\<b\>%-10s\<\/b\> %s\n", $keys[$j], $in{$keys[$j]});
			    }
		    }
		    print FILE "\<\/pre\>";
		    print FILE "\<hr\>\n\n";
	   }
	   print FILE "\<\/html\>\n";
	   close FILE;
	}
}



#________________________________________________________________________
# Title     : open_sdb_files
# Usage     : %entries = %{&open_sdb_files(\$file_to_read )};
# Function  :
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : read_sdb_files,read_sdb,
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Version   : 1.1
#--------------------------------------------------------------------
sub open_sdb_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($title_found, %Final_out, $variable_string, $TITLE, $title, @keys,
	  $end_found, $line, $entry, $entry_match, $End_line_num, $remove_blank,
	  $title_entry_null, $end_found, $Enclosed_entry, $Enclosed_var,
	  $blank_counter, $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

	for($r=0; $r<@file; $r++){
		open(SEQ_IN, "$file[$r]");
		my @whole_file =<SEQ_IN>;

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		for($i=0; $i<@whole_file; $i++){
	$whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	#"""""""""""""""""""""""""""""""""""""""""""
	##  The first and second line of box 1 ##
	#"""""""""""""""""""""""""""""""""""""""""""
	if( ($whole_file[$i]=~/^[_\*\-\/]{55,}$/)&&    ##  '______' is discarded
	   ($whole_file[$i+1]=~/^ {0,4}([TitlNam]+e) {0,8}:? {0,20}(\S[\-\w\.:]*) *(Copyright.*)/i) ){
	   $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
	   $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
	   if($TITLE =~ /^Title|Name$/i){   #
	       if( ($title=~/^\s+$/)||( $title eq "\n") ){
		  $title_entry_null =1;  $title = '';  }    }
	   $Final_out{$TITLE}=$title;
	   $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
	}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	## The first and second line of box 2, #__________ or #**************
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($whole_file[$i]=~/^[_\*]{20,}$/)&&
	   ($whole_file[$i+1]=~/^ *(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */i) ){
	   $title_found ++ ;        $i++;
	   $entry_match=$1;       $entry_value=$2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
	   $Final_out{$entry_match}.= "$entry_value\n";
	   last if $title_found > 1;  next;   }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,9}(\S.*) */i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= "$entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	#  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= " $entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  all space line matching                 ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
	   ($title_found==1)&&($whole_file[$i]=~/^ {0, 110}$/) ){
	   $blank_counter++;
	   if($blank_counter > 2){ $blank_counter--; }
	   else{ $Final_out{$entry_match}.= " \n";  }     }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 3 space to 14 positions eg: '#           HHHHHHHHH'
	###  To match 'examples' etc. INC. ':'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^( {0,50})(\S.+)/) ){
	   $Final_out{$entry_match}.= "$2\n"; $blank_counter=0; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 1 space to 11 positions  ##
	###  To match 'examples' etc. EXC. ':'       ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^ {0,16}([^:.]+)/) ){
	   $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###-------End of the read_box reading--------##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($title_found==1)&&
	   ($whole_file[$i]=~ /^[\*\-_]{15,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
	   $End_line_num = $i;       $end_found++;
	   last;      }
		} ## < End of for loop


		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### If title is not there at all     ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@keys=keys %Final_out;
		for(@keys){
	 if(/^Title$/i){    ## No Entry of Title at all??
	    $TITLE =$&;
	    $title_entry_exist = 1;
	    if($Final_out{$_}=~/^ *$/){   ## if Title => Null or just space
	       $title_entry_null = 1;    }  }  }

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title entry is not there    ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if( $title_entry_exist != 1){
	  for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
		$Final_out{'Title'} = "$1\n";   last;       }
	     elsif( $whole_file[$s] =~/^________________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title is blank              ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
	  ### $End_line_num is the last line read.
	  for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
		$Final_out{$TITLE} = "$1\n";    last;     }
	     elsif( $whole_file[$s] =~/^#________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		## Error handling, if no head box is found   ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
		push(@ref_of_seq_entry, \%Final_out);
	}
	if(@ref_of_seq_entry > 1){
		@ref_of_seq_entry;
	}else{ return (\%Final_out) }
}

#_______________________________________________________________
# Title     :  open_stride_files
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#-----------------------------------------------------------
sub open_stride_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($file, $name, %seq);
  if($debug==1){ print __LINE__, " INPUT file were @file \n"; }
  for($i=0; $i< @file; $i++){
	 $file = $file[$i];
	 open(IN, "$file");
	 while(<IN>){
		if(/NM (\S+)/){
			$name=$1;
		}elsif( defined($name)&&(/SS (\w+)/) ){
		  $seq{$name}=$1; undef $name;
		}
	 }
  }
  return(\%seq);
}

#_______________________________________________________________
# Title     : get_pdb_file_start_number
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : start_number_of_pdb, startnumber, start number of PDB,
#             get_start_number_of_pdb_file,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#-----------------------------------------------------------
sub get_pdb_file_start_number{
  my($start_number, $pdb_file);
  if( ref($_[0]) ){
	 $pdb_file = ${$_[0]};
  }else{
	 $pdb_file = $_[0];
  }
  open (PDB_FILE, "$pdb_file");
  while(<PDB_FILE>){
	 if(/^ATOM      1 +\w+ +\w+ +[\w]* +(\d+)  +/){
		 $start_number = $1; last;
	 }
  }
  \$start_number;
}
#_______________________________________________________________
# Title     : write_modeller_top_file
# Usage     : &write_modeller_top_file(\%hash, [v]);
# Function  : Writes Modeller command file format.
# Example   :
#     $modelname = 'gfct';
#     $template = '1ovt';
#     %hash=($modelname, $template);
#     &write_modeller_top_file(\%hash);
# Warning   :
# Keywords  :
# Options   : v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.top form.
# Argument  : 1 hash ref which has model name and template name -> (\%hash)
#             while %hash is (modelname, tempalatename)
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_top_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_TOP_FORMAT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
format STDOUT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
	########## Program starts ####################
	for($i=0; $i<@hash;$i++){
		($model, $pdb_file) = each %{$hash[$i]};
		$out_file = "$model.top";
		$ali_file = "$model.ali";
		open (MODELLER_TOP_FORMAT, ">$out_file");
		$pdb1 = "$ENV{'PDB'}\/$pdb_file.brk";
		$pdb2 = "$ENV{'PDB'}\/$pdb_file.pdb";
		if( !(-e $pdb1 ) && !( -e $pdb2 ) ){
			print "\n Error the file $pdb1  or  $pdb2\n";
		}
		$model ="\'$model\'";
		$pdb_file ="\'$pdb_file\'";
		$ali_file ="\'$ali_file\'";
		write MODELLER_TOP_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}
#_______________________________________________________________
# Title     : write_modeller_ali_file
# Usage     : &write_modeller_ali_file(\%model, \%template, [\$outfilename], [v]);
# Function  : Writes Modeller alignment format.
# Example   :
#             $out = 'test.ali';
#             %model =    qw(model AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAccccccccccc);
#             %template = qw(templ CCAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCC 3 42);
#             &write_modeller_ali_file(\%model, \%template, \$out);
# Warning   :
# Keywords  :
# Options   : You can put 2 numbers for the second set of key and element for
#             the second hash input as the starting and ending points of
#             template(i.e. pdb file seq). Unless I calculate the size of seq.
#             By default, it reads PDB file defined by ENV setting of 'PDB' and
#             gets the starting number of pdb. If starting number is defined
#             explicitly at input hash, the given starting number is used instead
#             of PDB's.
#             v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.ali form.
# Argument  : 2 ref. of hash for seq. and optional output.name and option(s).
#             If second input hash (for template) has 3rd and 4th element which are
#             numbers they are regarded as the starting and ending number of the
#             template(i.e. pdb file seq)
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_ali_file{
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.2 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	if($debug==1){ print "   \@hash has \"@hash\"\n   \@raw_string has \"@raw_string\"
	\@array has \"@array\"\n   \@char_opt has \"@char_opt\"\n   \@file has \"@file\"\n"; }
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_ALI_FORMAT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
format STDOUT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
	########## Program starts ####################
	if($file[0]){
		 open (MODELLER_ALI_FORMAT, ">$file[0]");
	}else{
		@seq  = %{$hash[0]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		open (MODELLER_ALI_FORMAT, ">$name.ali");
	}
	for($i=0; $i<@hash;$i++){
		@seq  = %{$hash[$i]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		$seq  = $seq[1]; print "\n\$seq  is $seq" if $debug ==1;
		$seq_leng = length($seq);
		## checking PDB entry of the template ##
		$pdb1 = "$ENV{'PDB'}\/$name.brk";
		$pdb2 = "$ENV{'PDB'}\/$name.pdb";
		if( (-e $pdb1 ) || ( -e $pdb2 ) ){
			$pdb_file = $pdb1;
			$start_seq = ${&get_pdb_file_start_number( $pdb_file )};
		}
		## Handling the starting and ending seq points of template
		if( defined($seq[2])&&($seq[2]=~/^\d+/) ){
		  $start_seq=$seq[2]; $seq_leng+=$start_seq; }
		elsif( defined($start_seq) ){
		  $seq_leng+=$start_seq; }
		else{ $start_seq =1; }
		if( (defined($seq[2])) &&($end_seq != $seq_leng ) ){
		  print "\n Your template seq length does not match with actual seq size
					\n I will put the calculated value \"$seq_leng\" as the template length\n\n";
		}
		print "\n\$seq_leng is $seq_leng\n" if $debug ==1;
		write MODELLER_ALI_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}

#_______________________________________________________________
# Title     : make_template_from_sec_str
# Usage     : %target   = %{&make_template_from_sec_str(\%seq)};
# Function  : makes template of sec. str. like: 'H5 E4 E2' out of '__HHHHH__EEEE__EE__'
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#-----------------------------------------------------------
sub make_template_from_sec_str{
  my(%out, @name, @fragments, %in, $name, $leng, $frag_seq, $name2);
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  for($j = 0; $j < @hash ; $j ++){
	 %in = %{$hash[$j]};

	 @name = keys  %in;
	 print "\@name is @name\n", if($debug eq 1);

	 for($t=0; $t < @name; $t++){
		$name = $name[$t];
			print "\$name is $name\n", if($debug eq 1);
		$leng=length($in{$name});
			print "$leng\n", if($debug eq 1);
		$name2 ="$name"."$leng"; # to attach sequence length
		@fragments = split(/_+/, $in{$name});
			print "\@fragments is @fragments\n", if($debug eq 1);
		for($i = 0; $i < @fragments; $i++){
		  if($fragments[$i] =~/(\w)\w+/){
			  $fraglength = length($fragments[$i]);
			  $frag_seq .= "$1"."$fraglength "; # space is delimiter  'H5 E3 E5 E4'
			  print "\$frag_seq is $frag_seq\n", if($debug eq 1);
		  }
		  $out{$name2}=$frag_seq;
		}
	 }
  }
  return(\%out);
}



#_______________________________________________________________
# Title     : calculate_protein_volume
# Usage     : %volumes=%{&calculate_protein_volume(\%seq)}
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#-----------------------------------------------------------
 sub calculate_protein_volume{
	my %final_volume;
	my %volume=("A","88.6" ,"C","108.5","D","111.1","E","138.4",
					"F","189.9","G","60.1" ,"H","153.2","I","166.7",
					"K","168.6","L","166.7","M","162.9","N","117.7",
					"P","122.7","Q","143.9","R","173.4","S","89.0",
					"T","116.1","V","140.0","W","227.8","Y","193.6",
					"a","88.6" ,"c","108.5","d","111.1","e","138.4",
					"f","189.9","g","60.1" ,"h","153.2","i","166.7",
					"k","168.6","l","166.7","m","162.9","n","117.7",
					"p","122.7","q","143.9","r","173.4","s","89.0",
					"t","116.1","v","140.0","w","227.8","y","193.6");
  #""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my %seq_hash =  %{$hash[0]};
  my @names = keys %seq_hash;
  #if($debug == 1){
	  print "\n ",__LINE__, " Seq names are @names\n";
  # }

  for( $i=0 ; $i < @names; $i++){
	  my @seq=split(//, $seq_hash{$names[$i]});
	  for( $j=0; $j < @seq; $j ++){
		  $final_volume{$names[$i]} += $volume{$seq[$j]};
	  }
  }
  \%final_volume;
}




#_______________________________________________________________
# Title     : extract_words
# Usage     : @words = @{&extract_words(\$string)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#-----------------------------------------------------------
sub extract_words{
  #""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >"""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]}; my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]}; my(@file)   =@{$A[4]}; my(@dir   )  =@{$A[3]}; my(@array)=@{$A[1]};
	my(@string)=@{$A[2]}; my(@num_opt)=@{$A[5]}; my(@char_opt)=@{$A[6]}; my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if($debug==1){
	  print __LINE__, " Args to extract_words are: \"@raw_string\"\n";
  }
  for($i=0; $i < @raw_string; $i ++){
	  if(ref($raw_string[$i]) eq 'SCALAR'){
		 $line = ${$raw_string[$i]};
	  }else{
		 $line = $raw_string[$i];
	  }
	  push( @words, split(/[\W\-\_]+/, $line) );
  }
  if($debug==1){
	  $num = @words;
	  print __LINE__, " Num of words are : \"$num\"\n";
  }
  \@words;
}

#________________________________________________________________________
# Title     : replace_subroutines
# Usage     :
# Function  : replaces subroutines of given file(s) with supplied subs.
#             Doesn't care version
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub replace_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 1.6 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my($i,$j,$c,$d,$e,$f,$g,$h,$k,
	$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($each_sub, %out_subs, $VER, %left_out, @lib, $ver, $sub_name,
	  $real_sub_entry_found);
  my %hash=%{&merge_hash(@hash)};
  my @array= keys %hash;
  my @values= values %hash;

  for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
			  for($s=0; $s < @array; $s++){
				  if($array[$s] =~/^([_a-zA-Z\-]+)(\d*\.*\d*)$/){
					  $sub_name=$1;
					  $VER =$2;
				  }
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#[_\-\*]{10,130} *$/)&&($lib[$j+1]=~/^(# *title *: *$sub_name)[^\.pl]/i) ){
					  $real_sub_entry_found=0;
					  $out_subs{"$sub_name"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||($lib[$j]=~/^#[\-_\*]{10,130} *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$sub_name"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;  ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub +$sub_name *\{/){
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$sub_name"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";  ## to fetch '}'

					  $j++;

					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $out_subs{"$sub_name$ver"}=$out_subs{$sub_name};
						  delete $out_subs{$sub_name};
					  }
				  }
			  }
			  $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			                                  ## just in case you want the left out ones.
		}
		close LIB_FILE;
		open (LEFT_FILE, ">$file[$i]");
		for($h= 0; $h < @values; $h++){### appending the new subs.
			 $left_out{$file[$i]} .= $values[$h];
		}
		print LEFT_FILE $left_out{$file[$i]};
		close LEFT_FILE;

	}#""""""""""""" end of for (@file)

	@no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}
	return( \%left_out ); # this has all the sub routines and other lines.
}
#________________________________________________________________________
# Title     : read_subroutines
# Usage     :
# Function  : retunrns ALL subroutines with the keys as subroutine names
#             with version like ('show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s)
# Example   :
# Warning   :
# Keywords  :
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
#             't'  for leaving the original file without the sub routines taken.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub read_subroutines{
	my($each_sub, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);

	#"""""""""""""""""< handle_arguments{ head Ver 1.6 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my($i,$j,$c,$d,$e,$f,$g,$h,$k,
	$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my  %final_out_subs, %out_subs;

  for($i=0; $i < @file; $i++){

		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){

				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
				  &&($lib[$j+1]=~/^(#+ *title *: *([\w\-]+))[^\.pl]/i) ){
					  $sub_name=$2;
					  $out_subs{"$sub_name"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^#+[\-\*_]{10,140} *$/)||($lib[$j]=~/^sub *\w+ *\{/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$sub_name"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line
				  }
				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub {1,9}([\w\-]+) *\{/){
					  $sub_name=$1;
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$sub_name"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";  ## to fetch '}'
					  $j++;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $final_out_subs{"$sub_name$ver"}=$out_subs{$sub_name};
					  }
				  }
			  }
			  if($char_opt =~/[rt]/i){
			     $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			  }
		     close LIB_FILE;
		     if($char_opt =~/t/i){
		       open (LIB_FILE, ">$file[$i]");
		       print LIB_FILE $left_out{$file[$i]};
			    close LIB_FILE;
			  }
print $final_out_subs;

	}#""""""""""""" end of for (@file)

 #s	@no_of_subs_fetched = keys %final_out_subs;
	if($char_opt =~ /r/i){
	   return( \%left_out ); # to get the files sans the subroutines.
	}else{
	   return( \%final_out_subs );
	}
}

#________________________________________________________________________
# Title     : fetch_subroutines
# Usage     :
# Function  : retunrns subroutines with the keys as subroutine names with version
#             like in the form( 'show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s). This
#             requires the names of sub you want while read_subroutines will
#             read any subroutines with their headbox to a hash.
# Example   :
# Warning   :
# Keywords  :
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
#             't'  for leaving the original file without the sub routines taken.
#             'h'  for headbox only output.
# Returns   :
# Argument  :
# Version   : 2.4
#--------------------------------------------------------------------
sub fetch_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($each_sub, %hash2, %out_subs, %left_out, @lib, $ver);

  @array=@{&remove_dup_in_array(\@string)};

  for($i=0; $i < @file; $i++){

		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
		     my $title_found;

			  for($s=0; $s < @array; $s++){
				  $each_sub = $array[$s];
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{8,140} *$/)
				  &&($lib[$j+1]=~/^(#+ *Title[ \t]*\:[ \t]*$each_sub\b).*/i) ){

					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2; $title_found=1;
					  until( ($lib[$j]=~/^#+[\-_\*]{8,150} *$/)||
					  ($lib[$j]=~/^sub *$each_sub *\{/)  ){

							 $lib[$j]=~s/ *$//;  #<-- removing ending space

							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i)
							 && ($lib[$j]=~/^# *Version *\: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line

				  }
				  if($char_opt =~ /h/i){
					  goto SPLICE2;
				  }
				  #"""""""" Reading sub {  } """""""
				  if(($title_found==1)&&($lib[$j]=~/^sub +$each_sub\b *\{/) ){
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++; $title_found='';
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
					  $j++;

					  SPLICE2:
					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $hash2{"$each_sub${ver}"}=$out_subs{$each_sub};
						  %out_subs=();
					  }else{
					     $hash2{"$each_sub"}=$out_subs{$each_sub};
					     %out_subs=();
					  }
				  }
			  }
			  if($char_opt =~/[rt]/i){
			     $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			  }
		}
		close LIB_FILE;
		if($char_opt =~/t/i){
		   open (LIB_FILE, ">$file[$i]");
		   print LIB_FILE $left_out{$file[$i]};
			close LIB_FILE;
		}

	}#""""""""""""" end of for (@file)

	$no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}

	if($char_opt =~ /r/i){
	   return( \%left_out ); # to get the files sans the subroutines.
	}else{
	   return( \%hash2 );
	}
}


#________________________________________________________________________
# Title     : update_subroutines
# Usage     : &update_subroutines(\@file, \%fetched);
# Function  : replaces subroutines of given file(s) with supplied subs.
#             If the given subroutine versions are not higher than the
#             ones in the program, no upgrade would happen.
# Example   : &update_subroutines($file, \%fetched);
# Warning   :
# Keywords  : upgrade_subroutines,
# Options   :
# Returns   :
# Argument  :
# Version   : 2.6
#--------------------------------------------------------------------
sub update_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my %hash=%{&merge_hash(@hash)};
  my @SUBS = keys %hash;            # subroutine names
  for($i=0; $i < @file; $i++){
		open(TARGET_FILE, "<$file[$i]") or die  "\n $file[$i]  <- $! \n";
		my @lines =<TARGET_FILE>;
		close TARGET_FILE;
		my (%temp, %final_out, %hash2, $VER, $sub_name,$ver, $first_line, @updated, $sub_name2);

		#~~~~~ READING lines ~~~~~~~~~~~~~~~~~~~
 		for($j=0; $j < @lines; $j++){
 		      my ($loop_count, $title_found, %temp, $sub_name, $sub_name, $title_found, $sub_found, $VER, $ver);
			  if( $lines[$j]=~/^(#\!\/\w+)/){ ## first line match
			      $final_out{$file[$i]}.=$lines[$j];
			      $j++;

				  ##~~~~ replacing the update info ~~~~~~~~
				  if($lines[$j]=~/^# *last *update */i){ $final_out{$file[$i]}.="# Last Update by $0: ".`date`;  $j++;
				  }elsif($lines[$j+1]=~/^# *last *update */i){ $final_out{$file[$i]}.="$lines[$j]# Last Update by $0: ".`date`;  $j=$j+2;
				  }else{ $final_out{$file[$i]}.="# Last Update by $0: ".`date`.$lines[$j]; $j++; }  }
			  if($lines[$j]=~/^__END__/){ last }


			  until( (($lines[$j]=~/^#+[_\-\*\~]{8,160} *$/)  #~~~~~ look for delimiters ~~~~~~~~
			     &&($lines[$j+1]=~/^(#+ *Title *[:]* *\S+\b)[^\.pl] */i) ) ## NO 'xxxx.pl'
			     || ($lines[$j]=~/^sub +[\w\-\.]+ *\{/) ||(EOF) ){          ## when htere is no headbox
				 $final_out{$file[$i]}.=$lines[$j];
				 $j++;
			  }

			  for($s=0; $s < @SUBS; $s++){
				    if($SUBS[$s] =~/^([_a-zA-Z\-\d]+)(\d+\.*\d*) *$/){ $sub_name=$1; $VER =$2;  }
				    if( ($lines[$j]=~/^#+[_\-\*\~]{8,160} *$/) &&
				        ($lines[$j+1]=~/^(#+ *Title *[:]* *$sub_name\b)[^\.] */i) ){
					    $temp{"$sub_name"}.="$lines[$j]$1\n";
					    $j+=2; $title_found=1;
					    until( $lines[$j]=~/^#+[\-_\*]{10,140} *$/ ){ $lines[$j]=~s/ *$//;
							 if($lines[$j] =~ /^# *Version *\: *([\d*\.*\d*]*) */i){
								  if( $1 =~/^[ ]*$/){ $ver = '1.0';  }     ##  make null to 1.0
								  elsif($1 =~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1 =~/^(\d+\.\d+)$/){ $ver = $1; }  }
							 $temp{"$sub_name"}.="$lines[$j]";	 $j++;   }
					    $temp{"$sub_name"}.="$lines[$j]";   $j++;	  }
				    if($lines[$j]=~/^sub +$sub_name *\{/){
					    $sub_found =1; $temp{"$sub_name"}.="$lines[$j]"; $j++;
					    until( $lines[$j] =~/^\}/){ $temp{"$sub_name"}.="$lines[$j]"; $j++;
						   if( ($lines[$j] =~/^#[_\-\*]{8,150} *$/)
						   &&($lines[$j+1]=~/^(# *Title *[:]* *$sub_name)[^\.pl]/i) ){
								$temp{"$sub_name"}.="\n\}\n"; $j++; # fixes missing '}' in the read sub hash.
								goto SPLICE;  }
						   elsif($lines[$j+1]=~/^sub +[\w\-]+ *\{/){
								$temp{"$sub_name"}.="\n\}\n";
								goto SPLICE;  }
						   $loop_count++;
						   if($loop_count > 10000){
								$final_out{$file[$i]} .="\n\}\n";    # fixes missing '}' in the input file
								$temp{"$sub_name"}.="\n\}\n";   # fixes missing '}' in the read sub hash.
						  	   goto SPLICE;  }  }
					    if($lines[$j]=~/^\}/){ $temp{"$sub_name"}.=$lines[$j];  }
					    SPLICE:
					    push(@updated, splice(@SUBS, $s, 1) );
					    $s--;
					    unless(defined($ver)){ $ver = '1.0' }
					    $temp{"$sub_name$ver"}=$temp{$sub_name};
					    undef( $temp{$sub_name} );
					    last;
				    }# end of  if ($lines[$j]=~/sub xxxxxx{)

			    }## END of for(@SUBS)
				if( ($title_found==1)&&($sub_found==1)&&( $VER < $ver) ){
					  print "\nTitle Yes SUB Yes for $sub_name VER $sub_name${VER} \< ver $ver";
				      $hash2{ "$sub_name$ver" }= $temp{ "$sub_name$ver" };
					  undef($temp{"$sub_name$ver"} );
					  undef($hash{"$sub_name$VER"} );
					  undef($VER, $ver, $title_found, $sub_found);
			    }elsif( ($title_found==1) && ($sub_found==1) && ($VER >=$ver) ){
					  print "\nTitle Yes  SUB Yes for $sub_name  VER \>= ver ";
					  $hash2{ "$sub_name$VER" }= $hash{"$sub_name$VER"};
					  delete($temp{"$sub_name$ver"});
					  delete($hash{"$sub_name$VER"});
					  undef($VER, $ver,$title_found, $sub_found);
				}elsif( ($title_found != 1)&&($sub_found==1) ){
					  print "\nTitle NO SUB Yes for $sub_name";
					  $hash2{ "$sub_name$VER" }= $hash{"$sub_name$VER"};
					  delete($hash{"$sub_name$VER"});
					  delete($temp{"$sub_name$ver"});
					  undef($VER, $ver,$title_found, $sub_found);
				}elsif( ($title_found ==1)&&($sub_found !=1 ) ){
					  print "\nTitle Yes SUB No for $sub_name";
				      delete( $temp{$sub_name} );
				      undef( $VER, $ver, $title_found, $sub_found);
				      next;
				}elsif(  ($title_found !=1)&&($sub_found !=1) ){
				      $final_out{$file[$i]}.=$lines[$j];
				}
			    ## $title_found and $sub_found are relevant only to names in @SUBS !!!!!!!!!
			    #""""""""""""""" Two things to consider """""""""
			    # both title and sub found or none of them found
			    #""""""""""""""""""""""""""""""""""""""""""""""""
			    for($t=0; $t< @updated; $t++){
			        if($updated[$t] =~/^([\w\-]+)(\d+\.\d*)$/){
			  		    $sub_name2=$1;   }
					    if( ($lines[$j]=~/^#+[_\-\*]{8,150} *$/)
				  	    &&($lines[$j+1]=~/^(#+ *Title *[:]* *${sub_name2})\b/i) ){  $j++;
			  		    until($lines[$j]=~/^#[\-_\*]{10,140} *$/ ){  $j++;   }   $j++;     }
			        if($lines[$j]=~/^sub +${sub_name2}\b *\{/){
			            until($lines[$j]=~/^\}/){   $j++; }
			            $j++;
				    }
			    }
		} # for (@lines)

		@values= (values %hash2, values %hash);
		print "@values";
		open (LEFT_FILE, ">$file[$i]");
		for($h= 0; $h < @values; $h++){ ### appending the new subs.
			  $final_out{$file[$i]} .= $values[$h];		}
		print LEFT_FILE $final_out{$file[$i]};
		close LEFT_FILE;	}#""""""""""""" end of for (@file)
	return( \%final_out ); # this has all the sub routines and other lines.
}


#________________________________________________________________________
# Title     : takeout_subroutines
# Usage     :
# Function  : retunrns subroutines with the keys as subroutine names with version
#             like in the form( 'show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s)
#             fetch_subroutines  also has this feature.
# Example   :
# Warning   : If there is no headbox and version no. It thinks the version
#             is 1.0
# Keywords  : take_out_subroutines, take_subroutines, cut_subroutines,
#             cutout_subroutines, remove_subroutines
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
# Returns   :
# Argument  :
# Version   : 1.5
#--------------------------------------------------------------------
sub takeout_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($each_sub, %duplicate, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);
  @array = @ori_array = grep { ! $duplicate{$_}++ } @raw_string;  ## removing duplicates

  for($i=0; $i < @file; $i++){
		@array=@ori_array;
		%left_out;
		open(LIB_FILE, "$file[$i]") or die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
			  for($s=0; $s < @array; $s++){
				  $each_sub = $array[$s];
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#[_\-\*\~]{20,140} *$/)&&
				      ($lib[$j+1]=~/^(# *title *: *$each_sub)[^\.pl]/i) ){
					  $real_sub_entry_found=0;
					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||
					         ($lib[$j]=~/^#[\-_\*\~]{20,140} *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;  ##<< essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub +$each_sub *\{/){
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'

					  $j++;

					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $out_subs{"$each_sub$ver"}=$out_subs{$each_sub};
						  delete $out_subs{$each_sub};
					  }
				  }
			  }
			  $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			                                  ## just in case you want the left out ones.
		}
		close LIB_FILE;
		open (LEFT_FILE, ">$file[$i]");
		print LEFT_FILE $left_out{$file[$i]};
		close LEFT_FILE;
	}#""""""""""""" end of for (@file)

	@no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}
	if($char_opt =~ /r/i){
	   return( \%left_out ); # to get the files without the subroutines.
	}else{
	   return( \%out_subs );
	}
}

#________________________________________________________________________
# Title     : get_subroutine_calls
# Usage     : @sub_name_array= @{&get_subroutine_calls(\@AR))};
# Function  : gets all the subroutine calls( like &show_hash ) in the given
#             file name or array of lines which is the content of a file,
#             text etc. If there is no input arg, it reads the running
#             program as default input
# Example   :
# Keywords  : get_sub_names,get_subroutine_names, get_sub_calls,
#             get_subroutine_calls, find_sub_calls, find_subroutine_calls
# Options   :
# Version   : 2.0
#--------------------------------------------------------------------
sub get_subroutine_calls{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my(@keywords)=qw(shift system warn undef abs cmp close fork caller
				   eval time chdir connect disconnect wait main);
  my($keywords)=join(' ', @keywords);
  my(@arr, @sub_names, @nondup, %duplicate );
  if(@_== 0){  open(FILE, "$0"); @arr = <FILE>;  } # open self
  elsif( @file > 0){
	 for($i=0; $i < @file; $i++){
		open(FILE, "$file[$i]");
		push(@arr, <FILE>);
	 }
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
  # When subroutine names are specified as input
  #_______________________________________________
  if(@array>0){
	  for($i=0; $i<@array; $i++){
		 push(@arr, @{$array[$i]} ) if ref($array[$i]) eq 'ARRAY';
	  }
  }
  if( @raw_string>0){ push(@arr, @raw_string) }

  for($i=0; $i< @arr; $i ++){
	  if($arr[$i]=~/^#+/){ next }
	  $arr[$i]=~s/^(.+)(# ..+)$/$1/;  # I have to remove all COMMENTS as comments can have &xxx

	  if($arr[$i]=~/\$\~ = *\S/){
		  next;
	  }elsif($arr[$i]=~/^format *=/){
		  until($arr[$i]=~/^\./){  $i++ }
	  }elsif($arr[$i]=~/\&([^\&][\w\-\.\:]+) {0,3}.*[\;\,]/){    # for   =&xxxx(\$xxx, $yyy);  and =&xxxx(\xx,
		  push(@sub_names, $1);                              #                                        \yy);
	  }elsif($arr[$i]=~/= *([a-zA-Z][\w\-]+) {0,3}\;/){      # for   =xxxx;
		  push(@sub_names, $1) unless($keywords=~/\b$1\b/);
	  }elsif($arr[$i]=~/= *[\@\%\$] *\{ *([a-zA-Z_\-]+[\d]*).+\} *\;/){ # for =${ xxxx }; or =${&xxxxx};
		  push(@sub_names, $1);
	  }
  }
  @nondup = grep { ! $duplicate{$_}++ } @sub_names;  ## removing duplicates
  for($i=0; $i< @keywords; $i++){
	  for($j=0; $j < @nondup; $j ++){
		  if($keywords[$i] eq $nondup[$j]){
			 splice(@nondup, $j, 1); $j--;
		  }
	  }
  }
  return(\@nondup);
}



#________________________________________________________________________
# Title     : set_special_options   (derived from set_debug_option)
# Usage     : &set_special_options;
# Function  : If you put special chars like '#' or  '##', '###..' at the
#             prompt of any program which uses
#             this sub you will get verbose printouts for the program if
#             the program has a lot of comments.
# Example   : &set_special_options.pl  ##    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of debugging printouts
#             ##  for even more debugging printouts
#             +   for more outputs(more calculations are shown, like statistics)
#             ++  even more outputs.(
#    $DEBUG    becomes 1 by '#'
#    $DEBUG2   becomes 1 by '##'
#    $VERBOSE  becomes 1 by '+'
#    $VERBOSE2 becomes 1 by '++'
#
# Returns   :  $debug, $verbose
# Argument  : Nothing in a program.
# Version   : 1.0
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_special_options{
  my($j, $i, $level, $key, %special_chars);
  %special_chars=('DEBUG'=>'#', 'VERBOSE'=>'+');

  for $key (keys %special_chars){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/([$special_chars{$key}]+)/){
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 ${"$key"}=1; print chr(7);
			 print __LINE__," \$$key  is set to ", ${"$key"}, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"$key$i"}=1;
				 print __LINE__," \$${key}${i} is set to ", ${"$key$i"}, "\n";
			 }
		 }
	 }
  }
}

#________________________________________________________________________
# Title     : set_debug
# Usage     : &set_debug;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Version   : 1.8
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_debug{
  my($j, $i, $level);
  unless( defined($debug) ){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 $debug=1;
				  print chr(7);
			 print __LINE__," \$debug  is set to ", $debug, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"debug$i"}=1;
				 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
			 }
		 }
	 }
  }
}

#________________________________________________________________________
# Title     : open_self
# Usage     : @lines =  &open_self;
# Function  :
# Example   :
# Warning   :
# Keywords  : read self, read_self, open self, open itself
# Options   :
# Returns   : one array
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub open_self{
  open(SELF, "$0");
  my(@Line)=<SELF>;
  return( \@Line );
}



#________________________________________________________________________
# Title     : tell_seq_length
# Usage     : %hash_out = %{&tell_seq_length(\%hash_in)};
# Function  : tells the sequence sizes of given sequences
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub tell_seq_length{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my(@out_hash, %hash);
  for($i=0; $i < @hash; $i++){
	 %hash = %{$hash[$i]};
	 @keys = keys %hash;
	 for ($j=0; $j < @keys; $j ++){
		if($hash{$keys[$j]}=~/\,\S+\,/){ @string= split(/\,/, $hash{$keys[$j]});
		}else{ @string= split(//, $hash{$keys[$j]}); }
		$h -> {$keys[$j]} = @string;  ## $h is the ref. of the anonymous hash
	 }                               ## This is equivalent to "$h{$keys[$j]}= $length;"
	 push(@out_hash , $h ) ;
  }
  if(@out_hash == 1){ $out_hash[0]; }
  elsif(@out_hash < 1){ die "\nSomething is wrong at tell_seq_length\n"; }
  elsif(@out_hash > 1){ return(@out_hash); }
}

#________________________________________________________________________
# Title     : do_window_scan
# Usage     : @out_array = @{&do_window_scan(\@input_array, $win_size)};
#             Often, bioters(Bio Computer Scientists) need to scan a long sequences
#             of DNA or Protein like(ABADFAFASDFASFASDFDFA or 109384717817947) to
#             caculate something out of them.
#             This routine is providing such scanning
#             function in perl.
# Function  : This is the core part of any window (of sequences)
#             scanning function.
# Example   :
# Warning   :
# Keywords  : scan_sequence, scan_window
# Options   :
# Returns   :
# Argument  :
# Version   : 1.3
#--------------------------------------------------------------------
sub do_window_scan{
  my(@string1) = @{$_[0]};
  my($win_size) = ${$_[1]} || $_[1];
  my($w, $start, $end, $average1, @window_1, $w_abs, @out_string),

  $start = -($win_size - ($win_size%2))/2; ## If the window size is odd numbers (eg 5),
									 ## the starting average position is 0  from (-2,-1,0,1,2)
									 ## If it is even numbers (eg 4)
									 ## the starting position is 0 from (-2,-1,0,1)
  $end = @string1 - ($win_size - ($win_size%2))/2;
								 ## End point is also dependent on oddity of the numbers
								 ## for window size.
	 for ($w= $start; $w < $end ; $w ++){
		$moving_window = $w + $win_size - 1;
		@window_1= @string1[$w .. $moving_window ];
		if($w < 0){          #### This if is to prevent the circularization of the array
			$w_abs = abs($w); #### (like  6 7 1 2 3 4 5 , for  . . 1 2 3 4 5 from 1234567 )
			splice(@window_1, 0, $w_abs);  # $w_abs is the absolute value of $w
		}

		################ PUT YOUR calc HERE #####

		 $average1= ${average_of_array(\@window_1, 'int')};

		################ PUT YOUR calc HERE #####

		push(@out_string, $average1);
		print "\nWinSize:$win_size halfwin: $half_win_size str size: $string_size \(from offset: $offset to  mov_wind: $moving_window \) AV: $average1 of  win1:", @window_1, "\n";
	 }
	 \@out_string;
}

#________________________________________________________________________
# Title     : scan_window_and_calc_something
# Usage     :
# Function  : scans any given length window of sequence and computes something.
# Example   :
# Warning   :
# Keywords  :
# Options   : average for getting average of given window size.
#             sum for getting sum of given window size.
# Returns   :
# Argument  :
# Version   : 1
#--------------------------------------------------------------------
sub scan_window_and_calc_something{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  my $win_size = $num_opt[0];
  print "\n $win_size\n";
  for($o = 0; $o < @char_opt; $o ++){
	  if($char_opt[$o] eq ""){
		  splice(@char_opt, $o, 1); $o --;
	  }
	  elsif($char_opt[$o] eq "a"){
		  if(@hash ==1){
			  print "\n Now I am doing  scan_window_and_calc_average with \n\t\t
							@hash, and $win_size ";
			  %out_hash_final = %{scan_window_and_calc_average(@hash, \$win_size)};

		  }elsif(@hash > 1){
			  @out_hash_final = @{scan_window_and_calc_average(@hash, \$win_size)};
		  }
	  }
	  elsif($char_opt[$o] eq "s"){
		  if(@hash ==1){
			  %out_hash_final = %{scan_window_and_calc_sum(\@hash, \$win_size)};
		  }elsif(@hash > 1){
			  @out_hash_final = @{scan_window_and_calc_sum(\@hash, \$win_size)};
		  }
	  }
  }

  #_______________________________________________
  # Options  : 1, 2
  #######################################################
  sub scan_window_and_calc_sum{
		my(@hash)=@{$_[0]};
		my($win_size)  =${$_[1]};
		my(%out_hash, $i, $j, $w, $string1, @string, @window_1, $moving_window,
			$sum, $out_string, @keys, %input, $half_win, $actual_win_size);

		for($i = 0; $i < @hash; $i++){
			%input = %{$hash[$i]};
			@keys = sort keys %input;
			for ($j=0; $j < @keys; $j ++){
				$string1 = $input{$keys[$j]};
				@string1 = split(//, $string1);

				##### This small for loop is important for AVERAGE calc. To return the original residue value
				##### as there is no point in calculating segment smaller than window size.
				if ($return_type_option eq ''){
					&main_calc_from_half_win_size_pos;
				}elsif($return_type_option == 1){
					&return_the_av_for_small_win_size;
					&main_calc_from_half_win_size_pos;
				}

				#############################################
				sub return_the_resicue_value{
					$half_win = int($win_size/2);
					for($pre = 0; $pre < $half_win; $pre ++){
						$out_string .= "$string1[$pre]\,";

						print "\nWindow size $win_size \(from 0 to  $pre \)  of  ", @window_1, "\n";
					}
				}

				###################################################################
				#### The actual Window scanning and summing part.           ####
				###################################################################
				sub main_calc_from_half_win_size_pos{
				  for ($w=0; $w < @string1; $w++){

					 my($offset) = $w - int($win_size/2);  # $offset starts from -5 when window_size is 10.
					 my($half_win_size)= int($win_size/2);
					 $offset = 0 if ($offset < 0);

					 $moving_window = $w + $half_win_size - 1;

					 $actual_win_size = $moving_window - $offset + 1;

					 @window_1= @string1[$offset..($moving_window)]; ### This is the segment.

					 $sum= ${sum_of_array(\@window_1, 'int')};
					 $out_string .="$sum\,";
					 print "\nWindow size $actual_win_size \(from $offset to  $moving_window \) $sum of  ", @window_1, "\n";

				  }
				  $out_hash{$keys[$j]} = $out_string;
				  $out_string='';
				}
				#############################################
			}
			push(@out_hash, \%out_hash);
		}
		if(@out_hash ==1){ \%out_hash; }elsif(@out_hash >1){ \@out_hash; }
  }
  ############### Sub end ########################################
  if(@hash ==1){ \%out_hash_final; }elsif(@hash >1){ \@out_hash_final; }

}

#________________________________________________________________________
# Title     : scan_window_and_calc_average
# Usage     : %out_hash_final = %{scan_window_and_calc_average(\@hash, \$win_size)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub scan_window_and_calc_average{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my (%input, @string1, $string1, $start, $end, $moving_window, $average1, $out_string);
  my ($win_size)            =$num_opt[0];

  for($i = 0; $i < @hash; $i++){
	  %input = %{$hash[$i]};
	  @keys = sort keys %input;
	  for ($j=0; $j < @keys; $j ++){
		  $string1 = $input{$keys[$j]};
		  @string1 = split(//, $string1);
		  &do_window_scan_for_average;

		  sub do_window_scan_for_average{
			 ##### Following $start and $end are critically important for correct calc.
			 $start = -($win_size - ($win_size%2))/2; ## If the window size is odd numbers (eg 5),
																	## the starting average position is 0  from (-2,-1,0,1,2)
																	## If it is even numbers (eg 4)
																	## the starting position is 0 from (-2,-1,0,1)
			 $end = @string1 - ($win_size - ($win_size%2))/2;
																	## End point is also dependent on oddity of the numbers
																	## for window size.
			 #################################################################
			 for ($w= $start; $w < $end ; $w ++){
				$moving_window = $w + $win_size - 1;
				@window_1= @string1[$w .. $moving_window ];
				if($w < 0){          #### This if is to prevent the circularization of the array
					$w_abs = abs($w); #### (like  6 7 1 2 3 4 5 , for  . . 1 2 3 4 5 from 1234567 )
					splice(@window_1, 0, $w_abs);  # $w_abs is the absolute value of $w
				}
				$average1= ${average_of_array(\@window_1, 'int')};
				$out_string .="$average1\,";
				print "WinSize:$win_size halfwin: $half_win_size str size: $string_size \(from offset: $offset to  mov_wind: $moving_window \) AV: $average1 of  win1:", @window_1, "\n";
			 }
			 $out_hash{$keys[$j]} = $out_string;
			 $out_string='';
			 print "\n";
		  }
		  #############################################
	  }
	  push(@out_hash, \%out_hash);
  }
  if(@out_hash ==1){ \%out_hash; }elsif(@out_hash >1){ \@out_hash; }
}


#________________________________________________________________________
# Title     : read_blast_hits
# Usage     : @array_of_names = @{&read_blast_hits(\$file_name, \$threshold)};
# Function  : This reads the output of blastp program(xxxx.bla or whatever file extension
#             you attatched). And produces the names of found sequences which are
#             above(smaller in probability) a certain threshold in the blast result.
#             For example, it will produce a reference of an array (@hits, in the code)
#             which contains (1mbs, 1pmb, 1ymb) from the example in this header box(down the
#             lines) with the given (you give!) threshold of, say, 0.0001.
# Example   :
# Warning   :
# Keywords  : bla2fasta, take_blast_hits
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#      - - - - -  EXample of blastp file  - - - - - - - - - - - - - - - - - - - - - - - - -
#      BLASTP 1.4.8 [19-Dec-94] [Build 16:06:14 Jul 26 1995]
#      Reference:  Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers,
#      and David J. Lipman (1990).  Basic local alignment search tool.  J. Mol. Biol.
#      215:403-10.
#      Query=  1mbs
#      (153 letters)
#      Database:  /nfs/ind4/ccpe1/people/A Biomatic /jpo/align/all_in_fasta.fas
#      406 sequences; 77,134 total letters.
#      Searching..................................................done
#      WARNING:  -hspmax 100 was exceeded with 13 of the database sequences, with as
#      many as 173 HSPs being found at one time.
#      Smallest
#      Sum
#      High  Probability
#      Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
#      1mbs                                                          804  2.0e-109  1
#      1pmb                                                          718  1.4e-97   1
#      1ymb                                                          707  4.7e-96   1
#      2xxx                                                           31  0.55      1
#--------------------------------------------------------------------
sub read_blast_hits{
  my ($threshold);
  my(@hits);
  my($file)= ${$_[0]};
  $threshold = ${$_[1]} ;

  open(BLA_FILE, "$file");
  while(<BLA_FILE>){
	  if(/^ *(\w+)[\t ]+\d+ +(\d+[\.]*[\d]+[\w\-\d]*) +\d+/){
		 $probability = $2;
		 if($probability <= $threshold){
			push(@hits, $1);
		 }elsif( $probability > 0.6 ){ ### In Blast, p value over 0.3 or 0.4 is too high.
			last;                       ### So, to reduce the file reading time.
		 }
	 }
  }
  \@hits;  ## I am returning a ref. of the array rather than array.
}          ## In fact, just @array is fine enough, but for future addition of arg.

#________________________________________________________________________
# Title     : put_gaps_every_x_position_in_string.pl  (operator function)
# Usage     :
# Function  :
# Example   : "1234567890123456789012345678901234567890"  will be
#             "1234567890 1234567890 1234567890 1234567890"
#             with
#                &put_gaps_every_x_position_in_string(\$test, 10, ' ')
# Warning   : it does not returns reference
# Keywords  : put_space_in_sequence, put_gaps_in_sequence, put_gaps,
#             put_space
# Options   :
# Returns   :
#             every char.
# Argument  : 3 arg. One is the string, second is the interval number, third is
#             the gap separater
# Version   : 1.1
#--------------------------------------------------------------------
sub put_gaps_every_x_position_in_string{
	my($string);
	if(ref($_[0])){
	  $string = ${$_[0]};
	}else{ $string = $_[0]; }

	my($interval) = $_[1];     my($gap_char) = $_[2];
	$string =~ s/(.{$interval,$interval})/$1$gap_char/g;
	$string;  ### Not a reference ###
}


#________________________________________________________________________
# Title     : transform_values
# Usage     : Used in predict_secondary_structure
# Function  : transform any value to another value with given table, matrix..
#             This is used to transform Amino Acid to its various propensities
#             If you feed a sequence 'ACDEDA', this transforms it to '
#             '124741' if the table given is 'A->1, C->2, D->4, E->7'
# Example   :
#             IN =>  to transform E and H to 9 and 4
#
#             1cdg_6taa      -------EEE-----------HH--HHHH------EE---------EEE-
#             1cdg_2aaa      -------EEE-----------HH--HHHH------EE---------EEE-
#             2aaa_6taa      -------EEEEE------EE-HHHHHHHH----EEEE-------EEEEE-
#
#             OUT
#             1cdg_6taa      -------999-----------44--4444------99---------999-
#             1cdg_2aaa      -------999-----------44--4444------99---------999-
#             2aaa_6taa      -------99999------99-44444444----9999-------99999-
#
# Warning   :
# Keywords  :
# Options   :
# Returns   : hash(es)
#             Sheraga_alpha_matrix
#             Richardson_alpha_matrix  or any conversion table made in a hash.
#
# Argument  : hash(es) and Matrix or table for conversion.
# Version   : 1.0
#--------------------------------------------------------------------
sub transform_values{
  my($a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r,
	  $s, $t, $u, $v, $w, $x, $y, $z, $pwd, $file, $dir, $output, $in_dir,
	  %hash, @keys, @array, @hash, $option_string, $string, @in,
	  $name, %out, $gap_chr, @str1, @str2, @hash_output, $given_matrix
  );

  ##########################################
  #####   general argument handling   ######
  ##########################################
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^(\w)$/) ){
		  $option_string  .= $1;
	  }elsif( ( !ref($_[$k]) )&&($_[$k]=~ /^(\d*\w+matrix)/) ){
		  $given_matrix  = $1;
	  }elsif( ( !ref($_[$k]) )&&($_[$k]=~ /^(\d*\w+table)/) ){
		  $given_table  =  $1;
	  }elsif((ref($_[$k]) eq "ARRAY")&&(${$_[$k]}=~ /^(\w)$/) ){
		  $option_string  .= $1;
	  }elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(\w)$/) ){
		  $option_string  .= $1;
	  }elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(\d*\w+matrix)$/) ){
		  $given_matrix  = $1;
	  }elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(\d*\w+table)$/) ){
		  $given_table  =  $1;
	  }elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(\w+)$/) ){
		  $dummy{'dummy'}=${$_[$k]};       ## When mere sequence string is given, it
		  push(@hash,  \%dummy);     # makes a dummy hash to mimick hash input
	  }elsif(ref($_[$k]) eq "HASH") { push(@hash,  $_[$k]); }
  }

  if( defined( $given_matrix ) ){   ## calling given matrix.
	  &{"$given_matrix"};
  }elsif( defined( &{"$given_table"} ) ){  ## calling given table.
	  &{"$given_table"};
  }

  for( $k=0; $k < @hash; $k ++){
	  my(%hash) = %{$hash[$k]};
	  my(@keys) = keys %hash;
	  my(@out_string);
	  for($i=0; $i < @keys; $i++){
		  @string = split(/|\,/, $hash{$keys[$i]} ); ## splitting the string(value of
																	## the hash into array.
		  for($j=0; $j < @string; $j ++){
			  $out_string[$j]= ${"$given_matrix"}{ $string[$j] };
		  }
		  $out_hash{$keys[$i]} = join(',', @out_string);
	  }
	  push(@hash_output, \%out_hash);
  }
  if(@hash_output > 1){ @hash_output }else{ $hash_output[0] }
}

#________________________________________________________________________
# Title     : Sheraga_alpha_matrix
# Usage     :
# Function  : an alpha matrix propensity table.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub Sheraga_alpha_matrix{
  %Sheraga_alpha_matrix = ( ## ref: Protein Eng. V.8, no9, pp905-913, 1995
		  'A', 	1.07,
		  'C', 	0.99,
		  'D', 	0.68,
		  'E', 	0.97,
		  'F', 	1.09,
		  'G', 	0.59,
		  'H', 	0.69,
		  'I', 	1.14,
		  'K', 	0.94,
		  'L', 	1.14,
		  'M', 	1.20,
		  'N', 	0.78,
		  'P',    0.19,
		  'Q', 	0.98,
		  'R', 	1.03,
		  'S', 	0.76,
		  'T', 	0.82,
		  'V', 	0.95,
		  'W', 	1.11,
		  'Y',    1.02
  );
  return(%Sheraga_alpha_matrix);
}
#________________________________________________________________________
# Title     : Richardson_alpha_matrix
# Usage     :
# Function  : an alpha matrix propensity table.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub Richardson_alpha_matrix{
  %Richardson_alpha_matrix = ( ## ref: Protein Eng. V.8, no9, pp905-913, 1995
		  'A', 	1.80,
		  'C', 	0.70,
		  'D', 	1.00,
		  'E', 	0.80,
		  'F', 	1.30,
		  'G', 	0.,     ## <<----
		  'H', 	0.69,
		  'I', 	1.14,
		  'K', 	0.94,
		  'L', 	1.14,
		  'M', 	1.20,
		  'N', 	0.78,
		  'P',    0.19,
		  'Q', 	0.98,
		  'R', 	1.03,
		  'S', 	0.76,
		  'T', 	0.82,
		  'V', 	0.95,
		  'W', 	1.11,
		  'Y',    1.02
  );
  return(%Richardson_alpha_matrix);
}


#________________________________________________________________________
# Title     : get_segment_shift_rate
# Usage     : &get_segment_shift_rate(\%hash_for_errors, \%hash_for_sec_str);
# Function  : calculates the secondary structure segment shift rate.
# Example   : <input example> First block is for the first hash input
#                             and Second is for the second hash input.
#
#             1cdg_6taa      00000442222222222242222222222777700000007000000000
#             1cdg_2aaa      00000442222222222242222222222777700000007000000000
#             2aaa_6taa      00000000000000000000000000000000000000000000000000
#
#             1cdg_6taa      -------EEE-----------EE--EEEE------EE---------EEE-
#             1cdg_2aaa      -------EEE-----------EE--EEEE------EE---------EEE-
#             2aaa_6taa      -------EEEEE------EE-EEEEEEEE----EEEE-------EEEEE-
#
#             <intermediate output example>
#             2aaa_6taa      -------00000---------00000000----0000-------00000-
#             1cdg_6taa      -------442---------------2222-----------------000-
#             1cdg_2aaa      -------222---------------2222-----------------000-
#
#             <Final output>
#             2aaa_6taa      0%
#             1cdg_6taa      67%
#             1cdg_2aaa      67%
#
# Warning   :
# Keywords  :
# Options   : 'p' or 'P' for percentage term(default)
#             'r' or 'R' for ratio term (0.0 - 1.0), where 1 means all the
#              segments were wrongly aligned.
#             's' or 'S' for Shift rate (it actually caculates the position shift
#              rate for the secondary structure segment.
#             'h' or 'H' for position Shift rate (it actually caculates the position
#              shift rate for helical segments). If this is the only option, it
#              will show the default percentage term rate for helical segments.
#              If used with 'r', it will give you ratio (0.0 - 1.0) for helical
#              segment. If used with 's' option, it will give you position shift
#              rate for only helical segments.
#             'e' or 'E' for position Shift rate (it actually caculates the position
#              shift rate for beta segments). If this is the only option, it will
#              show the default percentage term rate for beta segments. If used
#              with 'r', it will give you ratio (0.0 - 1.0) for beta. If used
#              with 's' option, it will give you position shift rate for only
#              beta segments.
# Returns   :
# Argument  : Two references of hashes. One for error rate the other for sec.
#             assignment.
# Version   : 1.1
#--------------------------------------------------------------------
sub get_segment_shift_rate{
  my($i, $k, $j, @hash, $option_string, %h, %superposed_hash,
	  $name, %out, $gap_chr, @str1, @str2, %temp, %hash_error, %hash_secondary);
  #"""""""""""""""""""""""""""""""""""""""""
  #       general argument handling        #
  #"""""""""""""""""""""""""""""""""""""""""
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^(\w)$/) ){
		  $option_string  .= $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(\w)$/) ){
		  $option_string  .= $1;    }
	  elsif(ref($_[$k]) eq "HASH") {
		  %temp = %{$_[$k]};
		  my(@keys)= sort keys (%temp);
		  my($temp_seq) = $temp{$keys[0]};

		  if($temp_seq=~/\d\d+/){
			  %hash_error = %temp; }
		  else{ %hash_secondary = %temp; }
	  }
  }#### OUTPUT are  : %hash_error  &  %hash_secondary
  #"""""""""""""""""""""""""""""""""""""""""
  #       general argument handling end    #
  #"""""""""""""""""""""""""""""""""""""""""
  %hash_secondary =%{&tidy_secondary_structure_segments(\%hash_secondary)};
  %superposed_hash =%{&superpose_seq_hash(\%hash_error, \%hash_secondary)};
  %h=%{&get_wrong_segment_rate(\%superposed_hash)};
  \%h;
}

#________________________________________________________________________
# Title     : get_wrong_segment_rate
# Usage     : print_seq_in_block( &get_wrong_segment_rate(\%superposed_hash) );
# Function  : Treats the segment as one single big error.
#             calculates the wrong segment number compared to the correct ones.
# Example   : <input example> hash of 3 keys and values.
#             2aaa_6taa      -------00000---------00000000----0000-------00000-
#             1cdg_6taa      -------442---------------2222-----------------000-
#             1cdg_2aaa      -------222---------------2222-----------------000-
#
#             In the above there are two segments wrong in 3 segment blocks = 2/3
#             <output example> hash of 3 percentage rates.
#
#             2aaa_6taa      0 %
#             1cdg_6taa      66.6666666666667 %
#             1cdg_2aaa      66.6666666666667 %
#
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_wrong_segment_rate{
  my($a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r,
	  $s, $t, $u, $v, $w, $x, $y, $z, %h, $seg_min,
	  %hash, @keys, @array, @hash, $option_string, $string,
	  $name, %out, $gap_chr, @str1, @str2, $seg, $len, $wrong_seg, $correct_seg
  );
  %hash=%{$_[0]};
  $seg_min =$_[1];
  if($seg_min !~/\d+/){ $seg_min = 3; } ### Default segmin is 3
  @keys = sort keys (%hash);
  for $k (@keys){
	 my($string) = $hash{$k}; $string =~s/\,//g;
	 my(@segments) = split(/[\-\.\ ]+/, $string);
	 for $seg (@segments){
		$len=length($seg);
		if( $len >= $seg_min){
			if($seg =~/[1-9]/){
				$wrong_seg ++;  }
			else{ $correct_seg ++; }
		}
	 }
	 $h{$k}= ($wrong_seg/($wrong_seg + $correct_seg)*100).' %';
	 $wrong_seg=$correct_seg='';
  }
  \%h;
}


#________________________________________________________________________
# Title     : tidy_secondary_structure_segments
# Usage     : print_seq_in_block(&tidy_secondary_structure_segments(\%hash, 'e4', 'h4'), 's');
#
# Function  : receives any secondary structure assignment hashes and
#             tidys up them. That is removes very shoft secondary structure
#             regions like( --HH--, -E-, -EE- ) according to the given minimum
#             lengths(threshold) of segments by you.
# Example   : print_seq_in_block(&tidy_secondary_structure_segments(\%hash, 'e4', 'h4'), 's');
#             <makes following into the next block>
#
#             1cdg_2aaa      -------EEE-----------EE--EEEE------EE---------EEE-
#             1cdg_6taa      -------EEE-----------EE--EEEE------EE---------EEE-
#             2aaa_6taa      -------EEEEE------EE-EEEEEEEE----EEEE-------EEEEE-
#
#             <example output>
#
#             1cdg_6taa      -------------------------EEEE---------------------
#             1cdg_2aaa      -------------------------EEEE---------------------
#             2aaa_6taa      -------EEEEE---------EEEEEEEE----EEEE-------EEEEE-
#
# Warning   :
# Keywords  :
# Options   : something like 'H3' or 'E3' for minimum segment length set to 3 positions.
# Returns   : array of references of hashes.
# Argument  : hashes and [options]. No options result in default of 'H3', 'E3'
# Version   : 1.0.0
#--------------------------------------------------------------------
sub tidy_secondary_structure_segments{
  my($i, $k,$a, $j, $helix_min, $beta_strand_min, %hash, @keys, @hash,
	  $option_string, @hash_out, $string1, $name, %out, $gap_chr, @str1, @str2,
	  @stringout, @string_segH, @string_segE, $countH, $countE
	  );

  #### Default helix and beta strand segment length setting #####
  $helix_min=3;
  $beta_strand_min=3;

  ########################################################################
  #####   general argument handling  for options of segment length  ######
  ########################################################################
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^[Hh](\d+)$/) ){
		  $helix_min  = $1;    }
	  elsif( ( !ref($_[$k]) )&&($_[$k]=~ /^[Ee](\d+)$/) ){
		  $beta_strand_min  = $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^[Hh](\d+)$/) ){
		  $helix_min  = $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^[EeBb](\d+)$/) ){
		  $beta_strand_min  = $1;    }
	  elsif(ref($_[$k]) eq "HASH") { push(@hash,  $_[$k]); }    }

  for($i=0; $i < @hash; $i++){
	  my(%hash) = %{$hash[$i]};
	  @keys = sort keys( %hash );
	  for($j=0; $j < @keys; $j++){
		  my(@string_segH, @string_segE, @stringout);
		  $string1=$hash{$keys[$j]};
		  $gap_char = $1 if ($string1=~ /(\W)/);

		  ##### actual cleaning ####
		  my(@string) = split(//, $string1);
		  for($a = 0; $a < @string; $a++){
			 if($string[$a] !~/[HE]/){ ### if the splited element doesn't match 'H' or 'E'

				 ##### If any of the HH or EE counter is over the given minimum($helix_min,,)
				 if((@string_segH >= $helix_min)||( @string_segE >=$beta_strand_min)){
					 push(@stringout, @string_segH, @string_segE, '-');
					 @string_segH=@string_segE=();     }   ## just resetting.
				 else{  ### if the accumulated 'HH' or 'EE' is smaller than the minimum
					 for(0.. (@string_segH + @string_segE) ){
						push(@stringout, '-'); ### replace the short 'EE' etc with '-'
					 }
					 @string_segH=@string_segE=();  ## just resetting.
				 }
			 }
			 elsif($string[$a] =~ /^([Hh])$/){
				 push(@string_segH, $1); }
			 elsif($string[$a] =~ /^([Ee])$/){
				 push(@string_segE, $1); }
		  }
		  $hash{$keys[$j]}=join("", @stringout);
	  }
	  push(@hash_out, \%hash);
  }
  if(@hash_out == 1){ return($hash_out[0]);
  }elsif(  @hash_out > 1 ){ return(@hash_out); }
}

#________________________________________________________________________
# Title     : define_secondary_structure_segments, synonim of tidy_secondary_structure_segments
# Usage     : print_seq_in_block(&define_secondary_structure_segments(\%hash, 'e4', 'h4'), 's');
#
# Function  : receives any secondary structure assignment hashes and
#             tidys up them. That is removes very shoft secondary structure
#             regions like( --HH--, -E-, -EE- ) according to the given minimum
#             lengths of segments.
# Example   : print_seq_in_block(&define_secondary_structure_segments(\%hash, 'e4', 'h4'), 's');
#             <makes following into the next block>
#
#             1cdg_2aaa      -------EEE-----------EE--EEEE------EE---------EEE-
#             1cdg_6taa      -------EEE-----------EE--EEEE------EE---------EEE-
#             2aaa_6taa      -------EEEEE------EE-EEEEEEEE----EEEE-------EEEEE-
#
#             <example output>
#
#             1cdg_6taa      -------------------------EEEE---------------------
#             1cdg_2aaa      -------------------------EEEE---------------------
#             2aaa_6taa      -------EEEEE---------EEEEEEEE----EEEE-------EEEEE-
#
# Warning   :
# Keywords  :
# Options   : something like 'H3' or 'E3' for minimum segment length set to 3 positions.
# Returns   : array of references of hashes.
# Argument  : hashes and [options]. No options result in default of 'H3', 'E3'
# Version   : 1.0
#--------------------------------------------------------------------
sub define_secondary_structure_segments{
  my($i, $k,$a, $j, $helix_min, $beta_strand_min, %hash, @keys, @hash,
	  $option_string, @hash_out, $string1, $name, %out, $gap_chr, @str1, @str2,
	  @stringout, @string_segH, @string_segE, $countH, $countE
	  );

  #### Default helix and beta strand segment length setting #####
  $helix_min=3;
  $beta_strand_min=3;

  ########################################################################
  #####   general argument handling  for options of segment length  ######
  ########################################################################
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^[Hh](\d+)$/) ){
		  $helix_min  = $1;    }
	  elsif( ( !ref($_[$k]) )&&($_[$k]=~ /^[Ee](\d+)$/) ){
		  $beta_strand_min  = $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^[Hh](\d+)$/) ){
		  $helix_min  = $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^[EeBb](\d+)$/) ){
		  $beta_strand_min  = $1;    }
	  elsif(ref($_[$k]) eq "HASH") { push(@hash,  $_[$k]); }    }

  for($i=0; $i < @hash; $i++){
	  my(%hash) = %{$hash[$i]};
	  @keys = sort keys( %hash );
	  for($j=0; $j < @keys; $j++){
		  my(@string_segH, @string_segE, @stringout);
		  $string1=$hash{$keys[$j]};
		  $gap_char = $1 if ($string1=~ /(\W)/);

		  ##### actual cleaning ####
		  my(@string) = split(//, $string1);
		  for($a = 0; $a < @string; $a++){
			 if($string[$a] !~/[HE]/){ ### if the splited element doesn't match 'H' or 'E'

				 ##### If any of the HH or EE counter is over the given minimum($helix_min,,)
				 if((@string_segH >= $helix_min)||( @string_segE >=$beta_strand_min)){
					 push(@stringout, @string_segH, @string_segE, '-');
					 @string_segH=@string_segE=();     }   ## just resetting.
				 else{  ### if the accumulated 'HH' or 'EE' is smaller than the minimum
					 for(0.. (@string_segH + @string_segE) ){
						push(@stringout, '-'); ### replace the short 'EE' etc with '-'
					 }
					 @string_segH=@string_segE=();  ## just resetting.
				 }
			 }
			 elsif($string[$a] =~ /^([Hh])$/){
				 push(@string_segH, $1); }
			 elsif($string[$a] =~ /^([Ee])$/){
				 push(@string_segE, $1); }
		  }
		  $hash{$keys[$j]}=join("", @stringout);
	  }
	  push(@hash_out, \%hash);
  }
  if(@hash_out == 1){ return($hash_out[0]);
  }elsif(  @hash_out > 1 ){ return(@hash_out); }
}






#________________________________________________________________________
# Title     : overlay_seq_by_certain_chars
# Usage     : %out =%{&overlay_seq_by_certain_chars(\%hash1, \%hash2, 'HE')};
# Function  : (name1 000000112324)+(name1  ABC..AD..EFDK ) => (name1 000..00..12324)
#             (name2 000000112324)+(name2  --HHH--EEEE-- ) => (name1 ---000--1123--)
#             uses the second hash a template for the first sequences. gap_char is
#             '-' or '.' or any given char or symbol.
#             To insert gaps rather than overlap, use insert_gaps_in_seq_hash
# Example   : %out =%{&overlay_seq_by_certain_chars(\%hash1, \%hash2, 'E')};
#             output> with 'E' option >>> "name1     --HHH--1232-"
# Warning   : If gap_chr ('H',,,) is not given, it replaces all the
#             non-gap chars (normal alphabet), ie,
#             it becomes 'superpose_seq_hash'
# Keywords  : Overlap, superpose hash, overlay, superpose_seq_hash
# Options   : E for replacing All 'E' occurrances in ---EEEE--HHHH----, etc.
#             : H for replacing all 'H'  "     " "
# Returns   : one hash ref.
# Argument  : 2 ref for hash of identical keys and value length.
# Version   : 1.0
#--------------------------------------------------------------------
sub overlay_seq_by_certain_chars{
  my($i, $k,$j, $name, @in, %out, $gap_chr, @str1, @str2);
  ######################################
  ####### Sub argument handling ########  $gap_chr here can be 'HE' etc.
  ######################################
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^(.+)$/) ){
		  $gap_chr  .= $1;
	  }elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(.)+$/) ){
		  $gap_chr  .= $1;
	  }elsif(ref($_[$k]) eq "HASH") { push(@in,  $_[$k]); }
  }

  if($#in < 1){
	  print "\n overlay_seq_by_certain_chars needs 2 hashes. Error \n"; exit; }
  my(%hash1)=%{$in[0]};
  my(%hash2)=%{$in[1]};
  my(@names1)= sort keys %hash1;
  my(@names2)= sort keys %hash2;
  (@names1 > @names2)? $bigger=@names1 : $bigger=@names2;
  for ($j=0; $j < $bigger; $j++){
	  @str1=split(//, $hash1{$names1[$j]});
	  @str2=split(//, $hash2{$names2[$j]});
	  if( ($gap_chr eq '') && ($hash2{$names2[$j]}=~/(\W)/) ){
		  $gap_chr=$1;
		  for($i=0; $i < @str2; $i++){
			  if($str2[$i] =~ /$gap_chr/){ $str1[$i]=$gap_chr;}     }
		  $out{$names1[$j]}=join(",", @str1);
	  }else{
		  for($i=0; $i < @str2; $i++){
			  if($gap_chr =~ /$str2[$i]/){ $str2[$i]=$str1[$i];}    }
		  $out{$names1[$j]}=join(",", @str2);    }
  }
  return(\%out);
}



#________________________________________________________________________
# Title     : rev_lines_pdb
# Usage     : &rev_lines_pdb(\$ARGV[0]);
# Function  : reorders the lines of any pdb files, but takes only C alpha positions.
# Example   :
#             The INPUT example >
#
#             ATOM    191  CA  ALA   195      -2.566   8.099  42.827  1.00 12.42      1ENG 256
#             ATOM    192  CA  ARG   196      -1.401  11.546  41.629  1.00  8.63      1ENG 257
#             ATOM    193  CA  THR   197      -4.073  13.846  43.107  1.00  9.93      1ENG 258
#
#             The OUTPUT example >             <first file, called  xxxx1.atm >
#
#             ATOM      1  CA  ALA     1      -2.566   8.099  42.827  1.00 12.42      1ENG 256
#             ATOM      2  CA  ARG     2      -1.401  11.546  41.629  1.00  8.63      1ENG 257
#             ATOM      3  CA  THR     3      -4.073  13.846  43.107  1.00  9.93      1ENG 258
#
#                                           <2nd file, called  xxxx2.atm >
#             ATOM      1  CA  THR     1      -4.073  13.846  43.107  1.00  9.93      1ENG 258
#             ATOM      2  CA  ARG     2      -1.401  11.546  41.629  1.00  8.63      1ENG 257
#             ATOM      3  CA  ALA     3      -2.566   8.099  42.827  1.00 12.42      1ENG 256
#
# Warning   : A Biomatic
# Keywords  :
# Options   : None
# Returns   : directly writes two output files  xxxx1.atm  xxxx2.atm
# Argument  : one pdb coordinate file reference
# Version   : 1.0
#--------------------------------------------------------------------
sub rev_lines_pdb{
	 my(@lines, $i, $c, @line_rev, $ATOM, $RES );
	 my($input_file_name) = ${$_[0]};
	 open(INPUT, "$input_file_name");
	 while(<INPUT>){  push(@lines, $_); $whole++;  }
	 my($base_name) =${&get_base_name(\$input_file_name)};
	 my($file1) = "${base_name}1\.atm";
	 my($file2) = "${base_name}2\.atm";

	 ##################################################################################
	 # This section is for forward C alpha lines (it renumbers residues and atoms)
	 ##################################################################################
	 open(F1, ">$file1");
	 open(F2, ">$file2");

	 for($i=0; $i < $whole ; $i ++){
		  if($lines[$i]=~ /^(ATOM +)(\d+)(  CA  \w+ +)(\d+)( +.+)$/){
				$c++;
				if( length($2)== length($c) ){
					 unshift(@lines2, "$1$c$3$c$5\n");
					 print F1 "$1$c$3$c$5\n"; next;
				}
				if( length($2) != length($c) ){
					 if( abs( ( length($2)- length($c) )) ==1){
						  unshift(@lines2, "$1 $c$3 $c$5\n");
						  print F1 "$1 $c$3 $c$5\n"; next;
					 }
					 if( abs( (length($2)-length($c))) ==2){
						  unshift(@lines2, "$1  $c$3  $c$5\n");
						  print F1 "$1  $c$3  $c$5\n"; next;
					 }
				}
		  }
	 }
	 close F1;

	 ##################################################################################
	 # This section is for reversed C alpha lines (it renumbers residues and atoms)
	 ##################################################################################
	 $c=0;
	 for($i= 0; $i < $whole; $i ++){
		  if($lines2[$i]=~ /^(ATOM +)(\d+)(  CA  \w+ +)(\d+)( +.+)$/){
				$c++; $num = $1;
				if( length($2)== length($c) ){  # these are for column position adjustment
					 printf F2 "$1$c$3$c$5\n";
					 next;
				}
				elsif( length($2) != length($c) ){
					 if( (length($2)- length($c)) ==1){
							 print F2 "$1 $c$3 $c$5\n";
							 next;
					 }
					 elsif( (length($2)- length($c)) == -1){
							 $ATOM=$1; $RES=$3;
							 chop($ATOM); chop($RES);
							 print F2 "$ATOM$c$RES$c$5\n";
							 next;
					 }
					 elsif( (length($2)- length($c)) ==2){
							 print F2 "$1  $c$3  $c$5\n";
							 next;
					 }
					 elsif( (length($2)- length($c)) == -2){
							 $ATOM=$1; $RES=$3;
							 chop($ATOM); chop($RES); chop($ATOM); chop($RES);
							 print F2 "$ATOM$c$RES$c$5\n";
							 next;
					 }
				}
		  }
	 }
	 close F2;

	 ##################################################################################
	 #   Final result
	 ##################################################################################

	 print "\n Files   $file1, $file2  are created \n\n\n";

}


#________________________________________________________________________
# Title     : tally_2_hashes (used for get_cs_rate_for_pairs_stat.pl )
# Usage     : ($ref1, $ref2) = &tally_2_hashes(\%hash1, \%hash2, ['n', 'a', 'p', 'i']);
#              %tally_addedup=%{$ref1};    '0' position had addedup value of 1000
#              %tally_occurances=%{$ref2}; '0' position had occurred 100 times,
#                                          '0' on average had 10 in its
#                                              corresponding hash positions
# Function  : Makes hashes of tallied occurances and summed up values for disits in
#             positions.
#             calculates the occurances or occurance rates of CS rate positions.
#             The hashes should have numbers.
# Example   : you put two hash refs. (ass. array) as args (\%hash1, \%hash2)
#             The hashes are like; hash1  (name1, 0000011111, name2, 0000122222 );
#                                  hash2  (name3, 1324..1341, name4, 13424444.. );
#
#             1) The resulting 1st hash output is (0, 20,   1, 13,     2, 12)
#             which means that 0 added up to 24 in the second arg hash positions
#                              1 added up to 15 in the second arg hash positions
#                              2 added up to 18 in the second arg hash positions
#             'p' option only works with 'n' or 'a'
#             2) The resulting 2nd hash output is (0, 5,   1, 5)
#             which means that 0 occurred 5 times in the first input hash
#                              1 occurred 5 times in the first input hash
#             'p' option only works with 'n' or 'a'
# Warning   :
# Keywords  :  tally two hashes of numbers.
# Options   : [a n i p]
# Returns   : ($ref1, $ref2), ie, two references of hash
#             averaging option causes division of 20(added up value)
#                                                by 9(occurance) in the above
#             for '0' of the first hash, so (0, 2.222,  1, 2.1666,  2, 2.4 )
#             Average is the average of numbers
#             average value in 0-9 scale (or 0-100 with 'p' option)
#             So, if there are
#                  seq1 00111110000,   The 'a' value of 0 and 1 as in the seq2
#                  seq2 33000040000    is 0-> 6/6, 1-> 4/5, while the 'n'
#                                        calc would be, 0-> 6 (60%), 1-> 4(40%)
#
# Argument  : (\%hash1, \%hash2) or optionally (\%hash1, \%hash2, ['n', 'i', 'p', 'a'])
#             'n' => normalizing, 'p' => percentage out, 'i' => make int out, 'a'=> averaged
# Version   : 1.2
#--------------------------------------------------------------------
sub tally_2_hashes{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	%hash0 = %{$hash[0]};
	%hash1 = %{$hash[1]};
	@keys1=  keys %hash0;  ### No need to sort here as you will return hash at the end
	@keys2=  keys %hash1;

  if($char_opt =~ /p/i ){ $factor =100; }

  for($i=0; $i < @keys1; $i++ ){

	  ###################################################
	  ##  Gap char detection
	  ###################################################
	  if($hash0{$keys1[$i]} =~ /([\,\-])\S+[\,\-]/){ $gap_char1 = $1; }else{ $gap_char1=''; }
	  if($hash1{$keys2[$i]} =~ /([\,\-])\S+[\,\-]/){ $gap_char2 = $1; }else{ $gap_char2=''; }


	  ###################################################
	  ##  Split the value string by gap char
	  ###################################################
	  @string1=split(/$gap_char1/, $hash0{$keys1[$i]});
	  @string2=split(/$gap_char2/, $hash1{$keys2[$i]});
	  ### @string1 => (0,0,0,0,1,1,1,1,1) @string2 => (3,4,2,13,2,1,23,3)


	  ################################################################
	  ##  Main calc part, you get %tally_all_occur and %tally_occur
	  ################################################################
	  for($j=0; $j < @string1; $j++){
		  $tally_all_occur{$string1[$j]}++ ; ## <-- number of all the positions
		  if( ($string2[$j]=~/[\d\^]+/)&&($string1[$j]=~/[\d\^]+/) ){
			  $tally_occur{$string1[$j]}+=$string2[$j] ; # %tally_occur is for added up counts
		  }                                             # %tally_all_occur is for only the position
	  }                                                #  occurances of '0', '1' or whatever. To know
																		#  how many '0' entry were you should use this.
	  ####################################################################################
	  ##  When options were put, do more calc on %tally_all_occur and %tally_occur
	  ####################################################################################
	  if($char_opt =~ /a/i ){
		 print "\n           $char_opt ";
		 my(@cs_rates) = sort keys %tally_all_occur;
		 for($k=0; $k < @cs_rates; $k++){
			 if($tally_all_occur{$cs_rates[$k]} == 0){
				 $tally{$cs_rates[$k]} =0; next;}
			 if($char_opt =~ /i/i ){
				 $tally{$cs_rates[$k]}=int($tally_occur{$cs_rates[$k]}/$tally_all_occur{$cs_rates[$k]}); }
			 elsif($char_opt !~ /i/i ){
				 $tally{$cs_rates[$k]}= $tally_occur{$cs_rates[$k]}/$tally_all_occur{$cs_rates[$k]};
			 }
		 }
	  }
	  elsif($char_opt =~ /[np]/i){
		 my($big_sum, @cs_digits);
		 @cs_digits = sort keys %tally_occur;  # @cs_digits are (0, 1, and 2 )
		 for(@cs_digits){ $big_sum+=$tally_occur{$_}/$tally_all_occur{$_};   }
		 for($t=0; $t < @cs_digits; $t++){
			if($big_sum ==0){ $tally{$cs_digits[$t]}=0; next; }
			else{
			  if($char_opt =~ /i/i){
				 $tally{$cs_digits[$t]}= int(($tally_occur{$cs_digits[$t]}/$tally_all_occur{$cs_digits[$t]}/$big_sum*$factor)+0.4999);}
			  elsif($char_opt !~ /i/i ){
				 $tally{$cs_digits[$t]}= $tally_occur{$cs_digits[$t]}/$tally_all_occur{$cs_digits[$t]}/$big_sum*$factor;}
			}
		 }
	  }
  }
  if($char_opt =~ /[an]/i){
		 print "\n           $char_opt ";
	  return(\%tally, \%tally_all_occur);}
  else{ return(\%tally_occur, \%tally_all_occur);}
}
#________________________________________________________________________
# Title     : superpose_seq_hash   ( first to second hash) ## the oldest version.
# Usage     : %out =%{&superpose_seq_hash(\%hash1, \%hash2)};
# Function  : (name1 000000112324)+(name1  ABC..AD..EFD ) => (name1 000..01..324)
#             uses the second hash a template for the first sequences. gap_char is
#             '-' or '.'
#             To insert gaps rather than overlap, use insert_gaps_in_seq_hash
# Example   :
# Warning   : Accepts only two HASHes and many possible gap_chr. Default gap is '-'
# Keywords  : overlay sequence, overlay alphabet, superpose sequence,
# Options   :
# Returns   : one hash ref.
# Argument  : 2 refs. for hash of identical keys and value length and gap_chr.
# Version   : 1.0
#--------------------------------------------------------------------
sub superpose_seq_hash{
  if($debug eq 1){ print __LINE__, " # superpose_seq_hash : \n"; }
  my($gap_chr)='-';
  my($i, $j, %hash1, %hash2, $name, %out, @str1, @str2);

  if((ref($_[0]) eq 'HASH')&&(ref($_[1]) eq 'HASH')){
	  %hash1=%{$_[0]}; %hash2=%{$_[1]}; }
  else{ print "\n superpose_seq_hash needs hash ref\n"; print chr(007); exit; }

  my(@names1)=sort keys %hash1; my(@names2)=sort keys %hash2;
  (@names1 > @names2)? $bigger=@names1 : $bigger=@names2;

  for ($j=0; $j < $bigger; $j++){
	 if($hash2{$names2[$j]}=~/(\W)/){ $gap_chr = $1; }
		@str1=split(/|\,/, $hash1{$names1[$j]});
		@str2=split(/|\,/, $hash2{$names2[$j]});
		for($i=0; $i < @str2; $i++){
		  if($str2[$i] ne $gap_chr){ $str2[$i]=$str1[$i];  } }
		$out{$names1[$j]}=join(",", @str2);
  }
  return(\%out);
}


#________________________________________________________________________
# Title     : overlay_seq_hash   ( first to second hash) ## the oldest version.
# Usage     : %out =%{&overlay_seq_hash(\%hash1, \%hash2)};
# Function  : (name1 000000112324)+(name1  ABC..AD..EFD ) => (name1 000..01..324)
#             uses the second hash a template for the first sequences. gap_char is
#             '-' or '.'
#             To insert gaps rather than overlap, use insert_gaps_in_seq_hash
# Example   :
# Warning   : Accepts only two HASHes and many possible gap_chr. Default gap is '-'
# Keywords  :
# Options   :
# Returns   : one hash ref.
# Argument  : 2 refs. for hash of identical keys and value length and gap_chr.
# Version   : 1.0
#--------------------------------------------------------------------
sub overlay_seq_hash{
  my($gap_chr)='-'; my($i, $j, $name, %out, @str1, @str2);

  if((ref($_[0]) eq 'HASH')&&(ref($_[1]) eq 'HASH')){
	  my(%hash1)=%{$_[0]}; my(%hash2)=%{$_[1]}; }
  else{ print "\n overlay_seq_hash needs hash ref\n"; print chr(007); exit; }

  my(@names1)=keys %hash1; my(@names2)=keys %hash2;
  (@names1 > @names2)? $bigger=@names1 : $bigger=@names2;

  for ($j=0; $j < $bigger; $j++){
	 if($hash2{$names2[$j]}=~/(\W)/){ $gap_chr = $1; }
		@str1=split(//, $hash1{$names1[$j]}); @str2=split(//, $hash2{$names2[$j]});
		for($i=0; $i < @str2; $i++){
		  if(($str2[$i] =~ /\W/)||($str2[$i] =~ //)){ $str1[$i]="$gap_chr";}}
		$out{$names1[$j]}=join(",", @str1);
  }
  \%out;
}

#________________________________________________________________________
# Title     : insert_gaps_in_seq_hash  ( first to second hash)
# Usage     : %out_extended_seq =%{&insert_gaps_in_seq_hash(\%hash1, \%hash2)};
# Function  : superpose two hashes of the same sequence or same seq. length sequences,
#             but unlike 'superpose_seq_hash', this inserts gaps and extend the
#             sequences.
#             (name1_sec  hHHHHHH EEEEEEE) +
#             (name1_seq  .CDEABC..AD..EFD..EKST) => (name1_ext  .hHHHHH..H...EEE..EEEE)
#             In the example, the undefined sec. str. position is replaced as gaps('.')
#             Uses the second hash a template for the first sequences. gap_char is
#             '-' or '.'
# Example   :
# Warning   : coded by A Biomatic
# Keywords  : superposing sequences with gaps
# Options   :
# Returns   : one hash ref.
# Argument  : 2 ref for hash of identical keys and value length.
# Version   : 1.1
#--------------------------------------------------------------------
sub insert_gaps_in_seq_hash{
  my($gap_char)='-';
  my($i, $j, $t, %hash1, %hash2, $bigger, $name, %out, @str1, @str2);
  my($join_char) =',';  ## <<-- This is for the final output joined by this var.

  if((ref($_[0]) eq 'HASH')&&(ref($_[1]) eq 'HASH')){
	  %hash1=%{$_[0]};
	  %hash2=%{$_[1]};
  }
  else{
	  print "\n superpose_seq_hash needs hash ref\n";
	  print chr(007); exit;
  }
  my(@names1)=keys %hash1;
  my(@names2)=keys %hash2;
  (@names1 > @names2)? $bigger=@names1 : $bigger=@names2;

  for ($j=0; $j < $bigger; $j++){
		if( $hash2{$names2[$j]}=~/(\W)/){
			$gap_char = $1; } ## <<-- finding the gap_char
		$hash1{$names1[$j]}=~ s/ /$gap_char/g; ## <<-- replacing space with 'gap_char';

		@str1=split(/|\,/, $hash1{$names1[$j]});
		@str2=split(/|\,/, $hash2{$names2[$j]});
		for($i=0 ; $i < @str2; $i++){
			if($str2[$i] =~ /\w/){
				$str2[$i] = shift @str1;
			}
		}
		$out{$names1[$j]}=join(",", @str2);
  }
  \%out;
}




#________________________________________________________________________
# Title     : scan_win_get_average (gets averages of windows of sequences of num)
# Usage     : %out1 = %{&scan_win_get_av(\%input, \$window_size, \%input2,,,,)};
#             The order of the arguments doesn't matter.
# Function  :
# Example   : input hash: ( seq1,  '13241234141234234',      (2 or more sequences accepted)
#                           seq2,  '1341324123413241234')
#             input winsize : 5;
#
#             output hash; (seq1, 1234123413241234);
#             output hash; (seq2, 1344234123412341);
#                  The numbers are ratios(compos/seqid) with given window size.
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub scan_win_get_average{
  my($window_size)=6; my($k,$r, @in_hash);
  for($k=0; $k < @_; $k++){ ######### Arg handling.
	 if(ref($_[$k]) eq 'HASH'){  push(@in_hash, $_[$k]);  }
	 elsif( ref($_[$k]) eq 'SCALAR'){ $window_size = ${$_[$k]}; }
	 elsif( !(ref($_[$k])) && ( $_[$k] =~ /\d+/)){ $window_size=$_[$k]; }
  }
  for($r=0; $r < @in_hash; $r++){ my($i,$window_1, $sepa,%out_hash,$offset, $sum, $w,@win_elem_array);
	 my(%input)=%{$in_hash[$r]};  my(@keys)= keys %input;
	 for ($i=0; $i< @keys; $i++){ my($av);
		 $input{$keys[$i]}=~ s/,//g; $input{$keys[$i]}=~ s/\./0/g;
		 $input{$keys[$i]}=~ s/\-/0/g;
		 for($w=0; $w < length($input{$keys[$i]}); $w++){
			$offset = $w - int($window_size/2);  if($offset < 0){ $offset = 0; }
			$window_1= substr($input{$keys[$i]}, $offset, $window_size);
			@win_elem_array= split(//,$window_1);
			for(@win_elem_array){    if(/^\d[\.\d]*/){   $sum+=$_;  }    }
			$av.=int($sum/@win_elem_array);         $sum=0;
		 }
		 $out_hash{$keys[$i]}=$av;
	 }
	 push(@final_out_ref, \%out_hash);
  }
  if( @final_out_ref == 1){  return($final_out_ref[0]); }
  elsif(  @final_out_ref > 1){  return(@final_out_ref); }
}
#________________________________________________________________________
# Title     : scan_win_and_get_sc_rate_pairs
# Usage     : %out1 = %{&scan_win_and_get_sc_rate_pairs(\%input, \$window_size)};
# Function  : scans input sequences(arg1) in a given(arg2) window size and gets
#             each composition and sequence identity rate(sc_rate) of the window.
#             sc rate = Sequence Id(%)/ Composition Id(%)
# Example   : input hash: ( seq1,  'ABCDEFG.HIK',      (2 or more sequences accepted)
#                         seq2,  'DFD..ASDFAFS',
#                         seq3,  'DDDDD..ASDFAFS' );
#             input winsize : 5;
#
#             output hash; (seq1seq2, 1,2,2,2,1,1,2,2); <-- joined by ',';
#             output hash; (seq1seq3, 1,2,2,2,1,1,2,2); <-- joined by ',';
#                  The numbers are ratios(compos/seqid) with given window size.
# Warning   : when $seqid is zero  the rate becomes $compos_id/10   !!!
# Keywords  :
# Options   :
# Returns   : a reference of a hash.
# Argument  : One ref. for hash, one ref. for a scalar.
# Version   : 1.0
#--------------------------------------------------------------------
sub scan_win_and_get_sc_rate_pairs{
  my($base_l)=1; my($scale)=1;
  my(%input, $i, $j, $window_size, $show_calculation, $redu_window,
	  $variable_win_size);
  for($i=0; $i < @_; $i ++){
	 if( (ref($_[$i])) eq 'HASH'  ){ %input=%{$_[$i]};}
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /^d+/)){ $window_size= ${$_[$i]}; }
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /^[vV]+/)){ $variable_win_size= 'v' }
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /[sS]+/)){ $show_calculation = 's'}
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /[rR]+/)){ $redu_window  = 'r'}
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /[fF]+/)){ $apply_factor  = 'f'}
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /[dD]+/)){ $make_gap_dot  = 'd'}
	 elsif( (ref($_[$i]) eq 'SCALAR' )&&(${$_[$i]} =~ /[mM]+/)){ $minus_whole_cs  = 'm'}
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^\d+/) ){ $window_size= $_[$i]; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[vV]+/) ){ $variable_win_size = 'v'; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[sS]+/) ){ $show_calculation  = 's'; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[rR]+/) ){ $redu_window  = 'r'; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[fF]+/) ){ $apply_factor  = 'f'; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[dD]+/) ){ $make_gap_dot  = 'd'; }
	 elsif( (!(ref($_[$i])))&&($_[$i] =~ /^[mM]+/) ){ $minus_whole_cs  = 'm'; }
  }

  if(defined(${$_[2]})){ $base_l=${$_[2]}; }
  if(defined(${$_[3]})){ $scale =${$_[3]}; }
  my(@sequences, @out_rate, $i, $j, $title, $window_1, $window_2,
	  $ratio_compos_vs_seqid, @array_of_2_seq,%out_hash );
  my(@keys)= sort keys %input;
  for ($i=0; $i<@keys; $i++){    # putting sequences from hash to an array
	 for($j=$i+1; $j< @keys; $j++){
		push(@sequences, $input{$keys[$i]}, $input{$keys[$j]});

		######################################################################
		#################  PASSING OVER TO the next SUB routine ##############
		######################################################################
		#---> @sequences will have ('ABCDEFG.HIK', 'DFD..ASDFAFS'); ##########
		($out_rate_arr_ref,$whole_rate_ref)= &get_windows_sc_rate_array(

						\@sequences,\$window_size, $variable_win_size, \$apply_factor,
						\$redu_window, $make_gap_dot, $show_calculation, \$minus_whole_cs );

		undef(@sequences);
		@out_rate=@{$out_rate_arr_ref};
		$whole_rate=${$whole_rate_ref};
		$title = "$keys[$i]_$keys[$j]\($whole_rate\)";
		$out_hash{$title}=join(",", @out_rate);
	 }
  }
  return( \%out_hash );
}

#________________________________________________________________________
# Title     : scan_win_and_get_cs_rate_pairs
# Usage     : %out1 = %{&scan_win_and_get_cs_rate_pairs(\%input, \$window_size)};
# Function  : scans input sequences(arg1) in a given(arg2) window size and gets
#             each composition and sequence identity rate(cs_rate) of the window.
#             CS rate = Composition Id / Sequence Id
# Example   :     input hash: ( seq1,  'ABCDEFG.HIK',      (2 or more sequences accepted)
#                               seq2,  'DFD..ASDFAFS',
#                               seq3,  'DDDDD..ASDFAFS' );
#                 input winsize : 5;
#
#                 output hash; (seq1seq2, 1,2,2,2,1,1,2,2); <-- joined by ',';
#                 output hash; (seq1seq3, 1,2,2,2,1,1,2,2); <-- joined by ',';
#             The numbers are ratios(compos/seqid) with given window size.
#
# Warning   : when $seqid is zero  the rate becomes $compos_id/10   !!!
# Keywords  :
# Options   :
# Returns   : a reference of a hash.
#             It is getting the entropy of the column and calculates something after.
# Argument  : One ref. for hash, one ref. for a scalar.
# Version   : 1.0
#--------------------------------------------------------------------
sub scan_win_and_get_cs_rate_pairs{
  my($base_l)=1;
  my($scale)=1;   # these are default params.
  my(%input)=%{$_[0]};
  my($window_size)=${$_[1]};

  if(defined(${$_[2]})){ $base_l=${$_[2]}; } ### <---$base_c is the baseline controller for sensitivity.
  if(defined(${$_[3]})){ $scale =${$_[3]}; } ### <---$base_c is the baseline controller for sensitivity.

  my(@sequences, @out_rate, $i, $j, $title, $window_1, $window_2,
	  $ratio_compos_vs_seqid, @array_of_2_seq,  %out_hash, $whole_rate );
  my(@keys)= keys %input;

  for ($i=0; $i < @keys; $i++){    # putting sequences from hash to an array
	 for($j=$i+1; $j < @keys; $j++){
		push(@sequences, $input{$keys[$i]}, $input{$keys[$j]});
		#---> @sequences will have ('ABCDEFG.HIK', 'DFD..ASDFAFS'); ##########
		($out_rate_arr_ref,$whole_rate_ref)
			 = &get_windows_cs_rate_array(\@sequences,\$window_size,\$base_l,\$scale);
		undef(@sequences);
		@out_rate=@{$out_rate_arr_ref};
		$whole_rate=${$whole_rate_ref};
		$title="$keys[$i]_$keys[$j]\($whole_rate\)";
		$out_hash{$title}=join(",", @out_rate);
	 }
  }
  return( \%out_hash );
}
#________________________________________________________________________
# Title     : get_residue_error_rate  (used in get_posi_rates_hash_out)
# Usage     : %position_diffs =%{&get_residue_error_rate(\@seq_position1, \@seq_position2)};
# Function  : This is the final step in error rate getting.
#             gets a ref. of a hash and calculates the absolute position diffs.
# Example   :
# Warning   : split and join char is ',';
# Keywords  :
# Options   : 'L' for limitting the error rate to 9 to make one digit output
#  $LIMIT becomes 'L' by L, l, -l, -L
# Returns   : one ref. for an array of differences of input arrays. array context.
#             ---Example input (a hash with sequences); The values are differences after
#                                comparion with structural and sequential alignments.
#             %diffs =('seq1', '117742433441...000',   <-- input (can be speparated by '' or ','.
#                      'seq2', '12222...99999.8888',
#                      'seq3', '66222...44444.8822',
#                      'seq4', '12262...00666.772.');
#             example output;
#             seq3_seq4       '0,1,0,0,0,.,.,.,,.,0,,0,0,,0,0,,.,0,,0,0,.'
#             seq1_seq2       '0,1,0,1,1,.,.,.,,.,2,,2,2,,2,2,,.,.,,2,2,1'
#             seq1_seq3       '0,1,0,1,1,.,.,.,,.,1,,1,1,,0,.,,.,.,,1,1,1'
#             seq1_seq4       '0,1,0,,1,1,.,.,.,,.,1,,1,1,0,.,.,,.,1,,2,2'
#             seq2_seq3       '0,1,0,,0,0,,.,.,,.,0,,1,0,,0,0,,.,0,,0,0,0'
#             seq2_seq4       '0,0,0,,1,0,,.,.,,.,0,,1,0,,0,0,,.,0,,0,0,.'
# Argument  : Takes a ref. for hash which have positions of residues of sequences.
# Version   : 1.1
#--------------------------------------------------------------------
sub get_residue_error_rate{
	my ($LIMIT);
	my(%diffs)= %{$_[0]}; my(@names)= keys (%diffs);
	$LIMIT=${$_[1]} if ref($_[1]) eq 'SCALAR';
	$LIMIT= $_[1] unless ref($_[1]);
	my(%seqs_comp_in_pair, @temp, @temp2,$split_char, $i);
	for ($i=0; $i < @names; $i++){
		if($diffs{$names[$i]}=~/\,/){ $split_char =',';}else{ $split_char = ''; }
		(@{"string$i"}) = split(/$split_char/, $diffs{$names[$i]});   }
	for ($i=0; $i < @names; $i++){
		for ($j=$i+1; $j < @names; $j ++){
			for ($k=0; $k < @string0; $k++){
				if ((${"string$i"}[$k] =~ /[-\d+]/) && (${"string$j"}[$k] =~ /[-\d+]/)){
					my($diff) = abs(${"string$i"}[$k] - ${"string$j"}[$k]);
					if( ($LIMIT =~/L/i)&&($diff > 9) ){ push(@temp2, 9);
					}else{ push(@temp2, $diff); }
				}else{ push(@temp2, '.'); } }

			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			#  Following if {} is for sorting output names to make  2aaa_6taa than 6taa_2aaa
			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			if($names[$i] <= $names[$j]){
				$seqs_comp_in_pair{"$names[$i]\_$names[$j]"}=join(",", @temp2); }
			else{ $seqs_comp_in_pair{"$names[$j]\_$names[$i]"}=join(",", @temp2); }

			@temp2=();
		}
	 }
	 return(\%seqs_comp_in_pair);  # permutated
}



#___________________________________________________________________________________
# Title     : get_each_posi_diff_hash , next version is  get_residue_error_rate
# Usage     : %position_diffs =%{&get_each_posi_diff_hash(\@seq_position1, \@seq_position2)};
# Function  : This is the final step in error rate getting.
#             gets a ref. of a hash and calculates the position diffs.
# Example   :
# Warning   : split and join char is ',';
# Keywords  :
# Options   : 'L' for limitting the error rate to 9 to make one digit output
#  $LIMIT becomes 'L' by L, l, -l, -L
#
# Returns   : one ref. for an array of differences of input arrays. array context.
#             ---Example input (a hash with sequences); The values are differences after
#                                comparion with structural and sequential alignments.
#             %diffs =('seq1', '117742433441...000',   <-- input (can be speparated by '' or ','.
#                      'seq2', '12222...99999.8888',
#                      'seq3', '66222...44444.8822',
#                      'seq4', '12262...00666.772.');
#             example output;
#             seq3_seq4       '0,1,0,0,0,.,.,.,,.,0,,0,0,,0,0,,.,0,,0,0,.'
#             seq1_seq2       '0,1,0,1,1,.,.,.,,.,2,,2,2,,2,2,,.,.,,2,2,1'
#             seq1_seq3       '0,1,0,1,1,.,.,.,,.,1,,1,1,,0,.,,.,.,,1,1,1'
#             seq1_seq4       '0,1,0,,1,1,.,.,.,,.,1,,1,1,0,.,.,,.,1,,2,2'
#             seq2_seq3       '0,1,0,,0,0,,.,.,,.,0,,1,0,,0,0,,.,0,,0,0,0'
#             seq2_seq4       '0,0,0,,1,0,,.,.,,.,0,,1,0,,0,0,,.,0,,0,0,.'
# Argument  : Takes a ref. for hash which have positions of residues of sequences.
# Version   : 1.0
#----------------------------------------------------------------------------------
sub get_each_posi_diff_hash{
	my(%diffs)= %{$_[0]}; my(@names)= keys (%diffs);
	my(%seqs_comp_in_pair, @temp, @temp2,$split_char, $i);
	my($LIMIT)=${$_[1]} if ref($_[1]) eq 'SCALAR';
	my($LIMIT)= $_[1] unless ref($_[1]);
	for ($i=0; $i < @names; $i++){
		if($diffs{$names[$i]}=~/\,/){ $split_char =',';}else{ $split_char = ''; }
		(@{"string$i"}) = split(/$split_char/, $diffs{$names[$i]});   }
	for ($i=0; $i < @names; $i++){
		for ($j=$i+1; $j < @names; $j ++){
			for ($k=0; $k < @string0; $k++){
				if ((${"string$i"}[$k] =~ /[-\d+]/) && (${"string$j"}[$k] =~ /[-\d+]/)){
					my($diff) = abs(${"string$i"}[$k] - ${"string$j"}[$k]);
					if( ($LIMIT =~/L/i)&&($diff > 9) ){ push(@temp2, 9);
					}else{ push(@temp2, $diff); }
				}else{ push(@temp2, '.'); } }

			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			#  Following if {} is for sorting output names to make  2aaa_6taa than 6taa_2aaa
			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			if($names[$i] <= $names[$j]){
				$seqs_comp_in_pair{"$names[$i]\_$names[$j]"}=join(",", @temp2); }
			else{ $seqs_comp_in_pair{"$names[$j]\_$names[$i]"}=join(",", @temp2); }

			@temp2=();
		}
	 }
	return(\%seqs_comp_in_pair);  # permutated
}

#________________________________________________________________________
# Title     : get_posi_rates_hash_out (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment.
#             Output >>
#             seq1_seq2  1110...222...2222
#             seq2_seq3  1111....10...1111
#             seq1_seq3  1111....0000.0000
#
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : \%final_posi_diffs;
# Argument  : %{&get_posi_rates_hash_out(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_rates_hash_out{
  my(%array1)=%{$_[0]};
  my(%array2)=%{$_[1]};
  my(@string1,@string2,@seq_position1,@seq_position2,
	  $len_of_seq,@position_diffs, @position_corrected1,
	  @names, @whole_length, %array3, $name);
  %array1 = %{&hash_common_by_keys(\%array1, \%array2)};
  %array2 = %{&hash_common_by_keys(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};
  %array2 = %{&remov_com_column(\%array2)};
  @names=  sort keys %array2;

  for $name (@names){
	  @string1 =split(//, $array1{$name});
	  @string2 =split(//, $array2{$name});
	  @seq_position1 = @{&get_posi_sans_gaps(\$array1{$name})};
	  @seq_position2 = @{&get_posi_sans_gaps(\$array2{$name})};
	  $len_of_seq = @seq_position2;
	  push(@whole_length, $len_of_seq);
	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
	  $array3{$name}=join(",", @position_corrected1);
  }

  my(%final_posi_diffs)=%{&get_each_posi_diff_hash(\%array3)}; undef(@whole_length, $len_of_seq);
  return(\%final_posi_diffs);
}

#________________________________________________________________________
# Title     : get_posi_rates_hash_out_compact (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment.
#             Output >>  something like below but, without gaps, so final one is;
#             seq1_seq2  1110...222...2222     seq1_seq2  11102222222
#             seq2_seq3  1111....10...1111  -> seq2_seq3  1111101111
#             seq1_seq3  1111....0000.0000     seq1_seq3  111100000000
#
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : \%final_posi_diffs_compact;  Compare with  'get_posi_rates_hash_out_jp'
# Argument  : %{&get_posi_rates_hash_out(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_rates_hash_out_compact{
  my(%array1)=%{$_[0]};  my(%array2)=%{$_[1]};
  my(@string1,@string2,@seq_position1,@seq_position2,$len_of_seq,@position_diffs,
	  @position_corrected1,@names, @whole_length, %array3);
  %array1 = %{&hash_common_by_keys(\%array1, \%array2)};
  %array2 = %{&hash_common_by_keys(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};
  %array2 = %{&remov_com_column(\%array2)};
  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});  @string2 =split('', $array2{$name});
	  @seq_position1 = @{&get_posi_sans_gaps(\$array1{$name})};
	  @seq_position2 = @{&get_posi_sans_gaps(\$array2{$name})};
	  $len_of_seq =(@seq_position2);  push(@whole_length, $len_of_seq);
	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  $array3{$name}=join(",", @position_diffs);
  }
  my(%final_posi_diffs_compact)=%{&get_each_posi_diff_hash(\%array3)}; undef(@whole_length, $len_of_seq);
  return(\%final_posi_diffs_compact);
}
#________________________________________________________________________
# Title     : get_posi_rates_hash_out_jp (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment. get_posi_rates_hash_out_jp
#             results in jp template sequence, while get_posi_rates_hash_out_msf does
#             in msf template sequence.
#             Output >>
#             seq1_seq2  1110...222...2222   <--- the alignment template is JPO's
#             seq2_seq3  1111....10...1111        (ie structural)
#             seq1_seq3  1111....0000.0000
#
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : \%final_posi_diffs;
# Argument  : %{&get_posi_rates_hash_out_jp(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_rates_hash_out_jp{ my(%array1)=%{$_[0]};  my(%array2)=%{$_[1]};
  my(@string1,@string2,@seq_position1,@seq_position2,$len_of_seq,@position_diffs,
	  @position_corrected1,@names, @whole_length, %array3);
  %array1 = %{&hash_common_by_keys(\%array1, \%array2)}; %array2 = %{&hash_common_by_keys(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};              %array2 = %{&remov_com_column(\%array2)};
  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});  @string2 =split('', $array2{$name});
	  @seq_position1 = @{&get_posi_sans_gaps(\$array1{$name})}; @seq_position2 = @{&get_posi_sans_gaps(\$array2{$name})};
	  $len_of_seq =(@seq_position2);  push(@whole_length, $len_of_seq);
	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
	  $array3{$name}=join(",", @position_corrected1);
  }
  my(%final_posi_diffs)=%{&get_each_posi_diff_hash(\%array3)}; undef(@whole_length, $len_of_seq);
  return(\%final_posi_diffs);
}



#________________________________________________________________________
# Title     : get_posi_rates_hash_out (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment.
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : \%final_posi_diffs;
# Argument  : %{&get_posi_rates_hash_out(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
#             Output >>
#             seq1_seq2  1110...222...2222
#             seq2_seq3  1111....10...1111
#             seq1_seq3  1111....0000.0000
#
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_rates_hash_out_msf{
  my(%array1)=%{$_[0]};
  my(%array2)=%{$_[1]};
  my(@string1, @string2, @seq_position1, @seq_position2,
	  $len_of_seq,@position_diffs, @position_corrected1,
	  @names, @whole_length, %array3, $name);
  %array1 = %{&hash_common_by_keys(\%array1, \%array2)};
  %array2 = %{&hash_common_by_keys(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};
  %array2 = %{&remov_com_column(\%array2)};
  @names= keys %array2;
  for $name (@names){
	  @string1 =split(/|\,/, $array1{$name});
	  @string2 =split(/|\,/, $array2{$name});
	  @seq_position1 = @{&get_posi_sans_gaps(\$array1{$name})};
	  @seq_position2 = @{&get_posi_sans_gaps(\$array2{$name})};
	  $len_of_seq =(@seq_position2);
	  push(@whole_length, $len_of_seq);
	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
	  $array3{$name}=join(",", @position_corrected1);
  }
  my(%final_posi_diffs)=%{&get_each_posi_diff_hash(\%array3)};
  undef(@whole_length, $len_of_seq);
  # show_hash(\%final_posi_diffs);
  return(\%final_posi_diffs);
}


#________________________________________________________________________
# Title     : normalize_numbers ( from any numbers to  0 - 9 )
# Usage     : %output=%{&normalize_numbers(\%hash1)};
#             originally made to normalize the result of get_posi_rates_hash_out
#             in   'scan_compos_and_seqid.pl'
# Function  : with given numbers in hashes, it makes a scale of 0-9 and puts
#             all the elements in the scale. Also returns the average of the numbs.
# Example   : intputhash>                   Outputhash>
#             ( '1-2', '12,.,1,2,3,4',     ( '1-2',   '9,.,0,1,2,3',
#              '2-3', '12,.,1,5,3,4',       '2-3',   '9,.,0,4,2,3',
#              '4-3', '12,3,1,2,3,4',       '3-1',   '9,3,.,.,2,3',
#              '3-1', '12,4,.,.,3,4' );     '4-3',   '9,2,0,1,2,3' );
# Warning   :
# Keywords  :
# Options   :
# Returns   : (\%norm_hash1, \%norm_hash2, \%norm_hash3,.... )
#
# Argument  : (\%hash1, %hash2, \%hash3, ....)
# Version   : 1.0
#--------------------------------------------------------------------
sub normalize_numbers{
  my(@in)=@_;
  my($split_char)=',';
  my(@out_ref_of_hash, $min, $max, $name, $u,$sum, $av, $range, @num_array,%in);
  ($min, $max, $sum, $av)=&main::hash_stat_for_all(@in);
  if(($max-$min)==0){ $range = 1} else { $range= ($max -$min) };
  for ($u=0; $u< @in ; $u++){
	  %in=%{$_[$u]};
	  my(@keys) = keys %in;
	  if($in{$keys[0]}=~/\,/){ $split_char=','; }  else { $split_char=''; };
	  for $name (@keys){  @num_array = split(/$split_char/, $in{$name});
		  for (@num_array){ $_ = int(($_ / $range)*8) if ($_ =~ /[\-]*\d+/); }
		  $in{$name}=join("$split_char", @num_array); }
	  push(@out_ref_of_hash, \%in);  }
  if( @out_ref_of_hash == 1)  {  return( $out_ref_of_hash[0]); }
  elsif( @out_ref_of_hash > 1){  return( @out_ref_of_hash   ); }
}


#________________________________________________________________________
# Title     : scan_windows_and_get_compos_seqid_rate
# Usage     : %out1 =%{&scan_windows_and_get_compos_seqid_rate(\%input, \$window_size)};
# Function  : scans input sequences(arg1) in a given(arg2) window size and gets
#             each composition and sequence identity rate of the window.
# Example   : input hash: ( seq1,  'ABCDEFG.HIK',    (2 or more sequences accepted)
#                           seq2,  'DFD..ASDFAFS',
#                           seq3,  'DDDDD..ASDFAFS' );
#             input winsize : 5;
#
#             output hash; (seq1seq2, 1,2,2,2,1,1,2,2); <-- joined by ',';
#                  The numbers are ratios(compos/seqid) with given
#                  window size.
# Warning   : when $seqid is zero  the rate becomes $compos_id/10   !!!
# Keywords  :
# Options   :
# Returns   : a reference of a hash.
# Argument  : One ref. for hash, one ref. for a scalar.
# Version   :
#--------------------------------------------------------------------
sub scan_windows_and_get_compos_seqid_rate{
  my($base_l)=1;
  my($scale)=1; # these are default params.
  my(%input)=%{$_[0]};
  my($window_size)=${$_[1]};
  if(defined(${$_[2]})){ $base_l=${$_[2]}; } ### <---$base_c is the baseline controller for sensitivity.
  if(defined(${$_[3]})){ $scale =${$_[3]}; } ### <---$base_c is the baseline controller for sensitivity.
  my(@sequences,@out_rate,$i,$title,$window_1,$window_2,$ratio_compos_vs_seqid,@array_of_2_seq,%out_hash );
  my(@keys)= keys %input;
  my($whole_rate, $whole_rate_ref ,$out_rate_arr_ref);
  for ($i=0; $i<=$#keys; $i++){
	 $sequences[$i]= $input{$keys[$i]};   }
  ($out_rate_arr_ref,$whole_rate_ref)=&get_windows_compos_and_seqid_rate_array(\@sequences,\$window_size,\$base_l,\$scale);
  @out_rate=@{$out_rate_arr_ref};  $whole_rate=${$whole_rate_ref};
  $title="CS_rate\($whole_rate\)";
  $out_hash{$title}=join(",", @out_rate);
  return( \%out_hash );
}


#________________________________________________________________________
# Title     : get_windows_cs_rate_array
# Usage     : @out_rate = @{&get_windows_cs_rate_array(\@seq, \$win_size)};
# Function  : actual working part of scan_windows_and_get_compos_seqid_rate
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : \@ratio_array, \$ratio_whole_seq
# Argument  : (\@input, \$window_size);  @input => ('ABCDEFG.HIK', 'DFD..ASDFAFS', 'ASDFASDFASAS');
#             Input ar => ( 'ABCDEFG
#                'DFD..ASDFAFS'
#                'ASDFASDFASAS' )  as the name of  @sequences.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_windows_cs_rate_array{
  my(@input)=@{$_[0]};
  my($base_level)=1;
  my($scale)=1;
  my($window_size)=${$_[1]};

  if(defined(${$_[2]})){ $base_level =${$_[2]}; }   if(defined(${$_[3]})){ $scale  =${$_[3]}; }

  my(@ratio_array, @array_of_2_seq, $seq_id, $offset, $half_of_w_size, $t, $length, $w,
	  $compos_id, $seq_id, $window_2, $window_1, $compos_whole_seq, $seq_id_whole_seq,
	  $ratio_whole_seq, $win_rate_div_by_whole_rate, $normalizing_factor, $lowest_rate );

  for ($t=0; $t < @input; $t++){
	 $length=length($input[$t]) if (length($input[$t])>$length);   }

  if ($length < $window_size){  $window_size = $length;   }

	  #___________ getting ratio for the whole sequence ___________
  $compos_whole_seq=${&compos_id_percent_array(\@input)};
  $seq_id_whole_seq=${&seq_id_percent_array(\@input)};
  if ($seq_id_whole_seq == 0){  $ratio_whole_seq =$compos_whole_seq/10; }
  else{  $ratio_whole_seq =$compos_whole_seq/$seq_id_whole_seq;  }

	  #___________ getting ratio for each window sequence ___________
  for ($w=0; $w < $length; $w++){
	 $offset = $w - int($window_size/2);  # $offset starts from -5 when window_size is 10.
	 $offset=0 if ($offset < 0);
	 $window_1=substr($input[0], $offset, $window_size);  # window_1 is one segment
	 $window_2=substr($input[1], $offset, $window_size);  # of defined length(size)
	 @array_of_2_seq=($window_1, $window_2); # making an array like this = ('ABCDE', 'BDESA')
	 $compos_id=${&compos_id_percent_array(\@array_of_2_seq)};
	 $seq_id   =${&seq_id_percent_array(\@array_of_2_seq)};
  #print "\n offset = $offset Wind1 = $window_1  Wind2 = $window_2 ";
  #print " Compos1 = $compos_id  Seqid = $seq_id \n";

	 #______  Handle special case when $seqid is zero > the rate becomes $compos_id/10 ______
	 if (($seq_id == 0) && ($compos_id != 0)){ $ratio_compos_vs_seqid = $compos_id/10;   }
	 elsif(($seq_id == $compos_id)&&($seq_id == 0)){ $ratio_compos_vs_seqid = 0;}
	 elsif(($seq_id == $compos_id)&&($seq_id == 100)){ $ratio_compos_vs_seqid = 0;}
	 else{ $ratio_compos_vs_seqid=($compos_id/$seq_id); }
	 push(@ratio_array, $ratio_compos_vs_seqid);  }

  $lowest_rate = ${&min_elem_array(\@ratio_array)};

  if($lowest_rate ==0){ $normalizing_factor=1; $ratio_whole_seq=0; }else{
	 $normalizing_factor=($ratio_whole_seq/$lowest_rate);
  }

  for (@ratio_array){  # the minimum value becomes equal to the whole seq. rate.
	 $_ = int($scale*($_*$normalizing_factor - ($ratio_whole_seq*$base_level))); #<<<----
	 $_=  '^' if($_ > 9); $_=  '_' if($_ < 0);
  }

  $ratio_whole_seq=int($ratio_whole_seq);
  return( \@ratio_array, \$ratio_whole_seq);
}

#________________________________________________________________________
# Title     : read_any_seq_files
# Usage     : %out_seq=%{&read_any_seq_files(\$input_file_name)};
# Function  : Tries to find given input regardless it is full pathname, with or
#             without extension. If not in pwd, it searches the dirs exhaustively.
# Example   : (*out1,  *out2) =&read_any_seq_files(\$input1, \$input2);
#             : (@out_ref_array)=@{&read_any_seq_files(\$input1, \$input2)};
#             : (%one_hash_out) =%{&read_any_seq_files(\$input1)};
# Warning   :
# Keywords  : open_any_seq_files,
# Options   :
# Returns   : 1 ref. for a HASH of sequence ONLY if there was one hash input
#             1 array (not REF.) of references for multiple hashes.
# Argument  : one of more ref. for scalar.
# Version   : 1.1
#--------------------------------------------------------------------
sub read_any_seq_files{
  my(@out_hash_ref_list, $sub, $o, $ext );
  my(@in)=@_;
  for($o=0; $o< @in; $o++){
	 my($found, %out, @file_ext_accepted, $found_file, $sub);
	 if(ref($_[$o])){
		 @file_ext_accepted=('msf', 'fasta','jp','aln','ali','pir',
								  'slx', 'dna','fas','pdb','rms','brk', 'dssp');
		 if( ! -e ${$in[$o]}  or -B ${$in[$o]} or -z ${$in[$o]}  ){
			 print "\n#SUB read_any_seq_files: ${$in[$o]} no seq file exists(or not passed at all) for $0 \n\n",
			 chr(7);
			 exit;
		 }
		 $found_file=${&find_seq_files($in[$o])};
		 print "# in read_any_seq_files, \$found_file => $found_file\n";

		 for $ext(@file_ext_accepted){
			$sub ="open\_$ext\_files";
		    print "# Trying subroutine $sub\n";
			if($found_file =~/\.$ext$/){
			   %out=%{&{"$sub"}(\$found_file)} if (defined &{"$sub"}); $found =1;
			}
			if($found_file =~/\.$ext$/ and  ! defined &{"$sub"} ){
			   print "\n# $sub is not defined in $0. I want it!!\n\n";
			}
		 }
		 if($found==0){
		    my($sub)="open\_$ext\_files"; #<--- this is the last resort !!
			for $ext(@file_ext_accepted){
			   %out=(%out, %{&{"$sub"}(\$found_file)}) if (defined &{"$sub"});
			}
		 }
	  }elsif( !(ref($_[$o])) ){
	     print "\nread_any_seq_files in $0 files accepts only REFERENCES\n\n";
	     exit;
	  }
	  push(@out_hash_ref_list, \%out);
  }
  if(@out_hash_ref_list == 1){  ### If only single hash output is,
	  return($out_hash_ref_list[0]);
  }elsif( @out_hash_ref_list > 1){
	  return(@out_hash_ref_list);   # <-- contains (\%out_seq0, \%out_seq1, \%out_seq2, .... )
  }
}



#________________________________________________________________________
# Title     : seq_to_regexp
# Usage     :
# Function  : given an array and a start and end length,
#              return an array of regular expressions, where each element of the original
#              array has been expanded to a set of regular expressions that match the
#              original exactly num times, for num between the start and end length
#
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub seq_to_regexp{
	my($num1,$num2,@seq) = @_;
	my($begin,$char,$num,$new,@output,$seen,$seq,@splitseq,$template);

	$template = '(BEGIN)(SEQUENCE){NUMBER}(END)';
	foreach $seq (@seq) {
	   @splitseq = reverse(split('',$seq));
	   $begin = '^';
	   $seen = '';
	   foreach $char (@splitseq) {
		  $begin =  $begin . '|[^' . $char . ']' . $seen;
		  $seen .= $char;
	   }
	   @splitseq = split('',$seq);
	   $end = '$';
	   $seen = '';
	   foreach $char (@splitseq) {
		  $end = $end . '|' . $seen . '[^' . $char . ']';
		  $seen .= $char;
	   }
	   for($num = $num1; $num <= $num2 ; $num++) {
		  $new = $template;
		  $new =~ s/BEGIN/$begin/e;
		  $new =~ s/END/$end/e;
		  $new =~ s/SEQUENCE/$seq/eg;
		  $new =~ s/NUMBER/$num/eg;
		  push(@output,$new);
	   }
	}
	return(\@output);
}

#________________________________________________________________________
# Title     : strip_rotated_seq
# Usage     :
# Function  : remove all but one string of each set of rotations
#             (reverse of rotated_seq )
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. for
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub strip_rotated_seq{
  my(@input) = @_;
  my(@output,%output,@rotations,$string);
  foreach $string (@input) {
	 @rotations = @{&rotate_seq($string)};
	 if( ! grep($output{$_},@rotations) ) {
		push(@output,$string);
		$output{$string} = 1;
	 }
  }
  return(\@output);
}

#________________________________________________________________________
# Title     : rotate_seq
# Usage     : @out_array=@{&rotate_seq($string)};
# Function  : given a string, return all the rotations of that string
#             e.g. given 'abcd', return ('abcd','bcda','cdab','dabc')
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall   ##### RevCom
# Keywords  :
# Options   :
# Returns   : a ref. for reverse complement
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub rotate_seq{
  my($string) = @_;
  my($i, $length, @rotations);

  $length = length($string);
  push(@rotations,$string);
  for($i = 1 ; $i < $length ; $i++ ) {
	 $string =~ s/^(.)(.*)/$2$1/;
	 push(@rotations, $string);
  }
  return(\@rotations);
}


#________________________________________________________________________
# Title     : convert_to_anti_sense
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall   ##### RevCom
# Keywords  :
# Options   :
# Returns   : a ref. for reverse complement
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub convert_to_anti_sense{
  my($seq)=@_;
  $seq =~ tr/acgtrymkswhbvdnACGTRYMKSWHBVDN/tgcayrkmswdvbhnTGCAYRKMSWDVBHN/;
  scalar reverse $seq;
  \$seq;
}

#________________________________________________________________________
# Title     : convert_rna_to_protein
# Usage     :
# Function  : translate RNA seq to protein seq.
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  : rna2protein, rna_2_protein, RNA2protein, translate_rna
#             dna2protein, convert_RNA_to_protein, RNA_2_PROTEIN, RNA_2_protein
# Options   :
# Returns   : a ref. of an array for protein translation
# Argument  : a scalar for RNA sequence data
# Version   : 1.1
#--------------------------------------------------------------------
sub convert_rna_to_protein{         ##### RnaToProtein
  my(%seq) = %{$_[0]} or @_;
  my($i,$len, $seq, %out, $output, $codon, @keys);
  @keys=keys %seq;

  for($i=0; $i < @keys ; $i++){
	 my $output;
	 $seq=$seq{$keys[$i]};
	 $len=length($seq);

	 for($j=0; $j < ($len-2) ; $j+=3){
		$codon = substr($seq,$j,3);
		if   ($codon =~ /^UC/i)     {$output .= 'S'; }  # Serine
		elsif($codon =~ /^UU[UC]/i) {$output .= 'F'; }  # Phenylalanine
		elsif($codon =~ /^UU[AG]/i) {$output .= 'L'; }  # Leucine
		elsif($codon =~ /^UA[UC]/i) {$output .= 'Y'; }  # Tyrosine
		elsif($codon =~ /^UA[AG]/i) {$output .= '_'; }  # Stop
		elsif($codon =~ /^UG[UC]/i) {$output .= 'C'; }  # Cysteine
		elsif($codon =~ /^UGA/i)    {$output .= '_'; }  # Stop
		elsif($codon =~ /^UGG/i)    {$output .= 'W'; }  # Tryptophan
		elsif($codon =~ /^CU/i)     {$output .= 'L'; }  # Leucine
		elsif($codon =~ /^CC/i)     {$output .= 'P'; }  # Proline
		elsif($codon =~ /^CA[UC]/i) {$output .= 'H'; }  # Histidine
		elsif($codon =~ /^CA[AG]/i) {$output .= 'Q'; }  # Glutamine
		elsif($codon =~ /^CG/i)     {$output .= 'R'; }  # Arginine
		elsif($codon =~ /^AU[UCA]/i){$output .= 'I'; }  # Isoleucine
		elsif($codon =~ /^AUG/i)    {$output .= 'M'; }  # Methionine
		elsif($codon =~ /^AC/i)     {$output .= 'T'; }  # Threonine
		elsif($codon =~ /^AA[UC]/i) {$output .= 'N'; }  # Asparagine
		elsif($codon =~ /^AA[AG]/i) {$output .= 'K'; }  # Lysine
		elsif($codon =~ /^AG[UC]/i) {$output .= 'S'; }  # Serine
		elsif($codon =~ /^AG[AG]/i) {$output .= 'R'; }  # Arginine
		elsif($codon =~ /^GU/i)     {$output .= 'V'; }  # Valine
		elsif($codon =~ /^GC/i)     {$output .= 'A'; }  # Alanine
		elsif($codon =~ /^GA[UC]/i) {$output .= 'D'; }  # Aspartic Acid
		elsif($codon =~ /^GA[AG]/i) {$output .= 'E'; }  # Glutamic Acid
		elsif($codon =~ /^GG/i)     {$output .= 'G'; }  # Glycine
		else {print "\n# convert_rna_to_protein: unrecognized codon $codon \n";}
	 }
	 $out{$keys[$i]}=$output;
  }
  return (\%out);
}


#________________________________________________________________________
# Title     : convert_dna_to_protein
# Usage     :
# Function  : translate DNA or RNA seq to protein seq.
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  : dna2protein, dna_2_protein, DNA2protein, translate_dna
#             dna2protein, convert_DNA_to_protein, translate_nucleic_acid
#             rna2protein, rna_2_protein, RNA2protein, translate_rna
#             dna2protein, convert_RNA_to_protein
# Options   :
# Returns   : a ref. of an array for protein translation
# Argument  : a scalar for DNA sequence data
# Version   : 1.2
#--------------------------------------------------------------------
sub convert_dna_to_protein{         ##### RnaToProtein
  my(%seq) = %{$_[0]};
  my($seq, $i,$len, $j, $PY, %out, $codon, @keys);
  @keys=keys %seq;
  $PY = '\[UT\]';

  for($i=0; $i < @keys ; $i++){
	 my ($output);
	 $seq=$seq{$keys[$i]};
	 $len=length($seq);

	 for($j=0; $j < ($len-2) ; $j+=3){
		$codon = substr($seq, $j, 3);
		if   ($codon =~ /^[UT]C/i)         {$output .= 'S'; }  # Serine
		elsif($codon =~ /^[UT][UT][UTC]/i) {$output .= 'F'; }  # Phenylalanine
		elsif($codon =~ /^[UT][UT][AG]/i)  {$output .= 'L'; }  # Leucine
		elsif($codon =~ /^[UT]A[UTC]/i)    {$output .= 'Y'; }  # Tyrosine
		elsif($codon =~ /^[UT]A[AG]/i)     {$output .= '_'; }  # Stop
		elsif($codon =~ /^[UT]G[TUC]/i)    {$output .= 'C'; }  # Cysteine
		elsif($codon =~ /^[UT]GA/i)        {$output .= '_'; }  # Stop
		elsif($codon =~ /^[UT]GG/i)        {$output .= 'W'; }  # Tryptophan
		elsif($codon =~ /^C[UT]/i)         {$output .= 'L'; }  # Leucine
		elsif($codon =~ /^CC/i)            {$output .= 'P'; }  # Proline
		elsif($codon =~ /^CA[UTC]/i)       {$output .= 'H'; }  # Histidine
		elsif($codon =~ /^CA[AG]/i)        {$output .= 'Q'; }  # Glutamine
		elsif($codon =~ /^CG/i)            {$output .= 'R'; }  # Arginine
		elsif($codon =~ /^A[UT][UTCA]/i)   {$output .= 'I'; }  # Isoleucine
		elsif($codon =~ /^A[UT]G/i)        {$output .= 'M'; }  # Methionine
		elsif($codon =~ /^AC/i)            {$output .= 'T'; }  # Threonine
		elsif($codon =~ /^AA[TUC]/i)       {$output .= 'N'; }  # Asparagine
		elsif($codon =~ /^AA[AG]/i)        {$output .= 'K'; }  # Lysine
		elsif($codon =~ /^AG[TUC]/i)       {$output .= 'S'; }  # Serine
		elsif($codon =~ /^AG[AG]/i)        {$output .= 'R'; }  # Arginine
		elsif($codon =~ /^G[UT]/i)         {$output .= 'V'; }  # Valine
		elsif($codon =~ /^GC/i)            {$output .= 'A'; }  # Alanine
		elsif($codon =~ /^GA[TUC]/i)       {$output .= 'D'; }  # Aspartic Acid
		elsif($codon =~ /^GA[AG]/i)        {$output .= 'E'; }  # Glutamic Acid
		elsif($codon =~ /^GG/i)            {$output .= 'G'; }  # Glycine
		else {print "\n# convert_dna_to_protein: unrecognized codon $codon \n";}
	 }
	 $out{$keys[$i]}=$output;
  }
  return (\%out);
}



#________________________________________________________________________
# Title     : write_staden_file
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for  STADEN formatted sequence record
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_staden_file {				##### PutStaden
  my($seq,$header,$id) = @_;
  my($i,$j,$len,@out);
  $i = 0;
  $len = length($seq);
  $out[$i] = ";\<------------------\>\n";
  substr($out[$i],int((20-length($ID))/2),length($ID)) = $ID;
  $i++;
  for($j=0; $j<$len ; $j+=60) {
	 $out[$i++]=sprintf("%s\n",substr($seq,$j,60));
  }
  return \@out;
}


#________________________________________________________________________
# Title     : write_primer_file
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for PRIMER formatted sequence record
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_primer_file {
  my($seq,$header,$id) = @_;
  y(@out,$len,$i,$j); $i = 0;
  $out[$i++] = sprintf("*seq: %8s\n", $id);
  $out[$i++] = sprintf("%s", length($header) > 8 ? "# ".$header."\n" : "");
  $out[$i++] = sprintf("%s", $id ? "# ". $id."\n" : "");
  $len = length($seq);
  for($j = 0 ; $j < $len ; ) {
		$out[$i] .= sprintf("%s",substr($seq,$j,10));  $j += 10;
		if($j < $len && $j % 50 != 0 ) {  $out[$i] .= " ";}
		elsif($j % 50 == 0 ) { $out[$i++] .= "\n"; } }
  if($j % 50 != 0 ) { $out[$i] .= "\n"; }
  return \@out;
}

#________________________________________________________________________
# Title     : write_gcg_genbank_file
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for GCG-Genbank formatted sequence record
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_gcg_genbank_file {			##### PutGcgGenbank
  my($seq,$header,$id) = @_;
  my(@out,$len,$sum,$cnt,$i,$j);
  $len = length($seq);
  for($i=0; $i<$len ;$i++) {
	 $cnt++;
	 $sum += $cnt * ord(substr($seq,$i,1));
	 ($cnt == 57) && ($cnt=0);
  }
  $sum %= 10000;
  push(@out,"LOCUS $id\n");
  push(@out,"DEFINITION $header\n");
  push(@out,"ACCESSION $Accession\n");
  push(@out,"ORIGIN\n\n");
  push(@out," $id Length: $len (today)  Type: N Check: $sum  ..\n\n");
  $len = length($seq);
  $i = $#out + 1;
  for($j = 0 ; $j < $len ; ) {
	 if( $j % 50 == 0) {
		$out[$i] = sprintf("%8d  ",$j+1);
	 }
	 $out[$i] .= sprintf("%s",substr($seq,$j,10));
	 $j += 10;
	 if( $j < $len && $j % 50 != 0 ) {
		$out[$i] .= " ";
	 }elsif($j % 50 == 0 ) {
		$out[$i++] .= "\n";
		$out[$i++] = "\n";
	 }
  }
  if($j % 50 != 0 ) {
	 $out[$i] .= "\n";
  }
  $out[$i] .= "\n";
  return \@out;
}
#________________________________________________________________________
# Title     : write_pir_file
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for PIR formatted sequence record
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_pir_file {				##### PutPir
  my($seq, $header) = @_;
  my(@out,$len,$i,$j,$cnt,$sum);
  $seq=~s/[\n ]//g;
  $len = length($seq);
  for($i=0; $i< $len ;$i++) {
	 $cnt++;
	 $sum += $cnt * ord(substr($seq,$i,1));
	 ($cnt==57) && ($cnt=0); }
  $sum %= 10000;  $i = 0;
  $out[$i++] = sprintf("ENTRY           %s\n",$id);
  $out[$i++] = sprintf("TITLE           %s\n",$header);
  #JDT ACCESSION out if defined
  $out[$i++] = sprintf("DATE            %s\n",'');
  $out[$i++] = sprintf("REFERENCE       %s\n",'');
  $out[$i++] = sprintf("SUMMARY         #Molecular-weight %d  #Length %d  #Checksum %d\n",0,$len,$sum);
  $out[$i++] = sprintf("SEQUENCE\n");
  $out[$i++] = sprintf("                5        10        15        20        25        30\n");
  for($j=1; $seq && $j < $len ; $j += 30) {
	 $out[$i++] = sprintf("%7d ",$j);
	 $out[$i++] = sprintf("%s\n", join(" ",split(//,substr($seq, $j - 1,length($seq) < 30 ? length($seq) : 30))) );
  }
  $out[$i++] = sprintf("///\n");
  return \@out;
}
#________________________________________________________________________
# Title     : write_genbank_file
# Usage     : @out =  @{&write_genbank_file($sequ, $header)};
# Function  : (This is DNA seq handling routine!)
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
#             stolen from Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for Genbank formatted sequence record
# Argument  : two scalars.
# Version   : 1.0
#--------------------------------------------------------------------
sub write_genbank_file{	my($seq,$header,$id) = @_; my(@out,$len,$i,$j,$cnt,$sum);
  $seq =~ tr/A-Z/a-z/;  $seq =~s/[ \n]//g;  $len = length($seq);
  for($i=0; $i<$len ;$i++) {
	 $cnt++;
	 $sum += $cnt * ord(substr($seq,$i,1));
	 ($cnt == 57) && ($cnt=0);  }
  $sum %= 10000;
  $i = 0;
  $out[$i++] = sprintf("LOCUS       %s       %d bp\n",$id,$len);
  $out[$i++] = sprintf("DEFINITION  %s , %d bases, %d sum.\n", $header, $len, $sum);
  $out[$i++] = sprintf("ACCESSION  %s\n", $Accession);
  $out[$i++] = sprintf("ORIGIN\n");
  for($j = 0 ; $j < $len ; ) {
		if( $j % 60 == 0) {
		  $out[$i] = sprintf("%8d  ",$j+1);
		}
		$out[$i] .= sprintf("%s",substr($seq,$j,10));
		$j += 10;
		if( $j < $len && $j % 60 != 0 ) {
		  $out[$i] .= " ";
		}elsif($j % 60 == 0 ) {
		  $out[$i++] .= "\n";
		}
  }
  if($j % 60 != 0 ) { $out[$i] .= "\n"; }
  $out[++$i] = sprintf("//\n");
  return \@out;
}
#________________________________________________________________________
# Title     : write_gcg_file
# Usage     :
# Function  :
# Example   :
# Warning   : Copyright (C) 1993-1994 by James Tisdall
# Keywords  :
# Options   :
# Returns   : a ref. of an array for GCG formatted sequence record
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_gcg_file {
  my($seq, $header,$id) = @_;
  my(@out,$len,$i,$j,$cnt,$sum);
  $seq=~s/[ \n]//g;
  $len = length($seq);
  for($i=0; $i<$len ;$i++) {
	 $cnt++;
	 $sum += $cnt * ord(substr($seq,$i,1));
	 ($cnt == 57)&& ($cnt=0);
  }
  $sum %= 10000;
  $i = 0;
  $out[$i++] = sprintf("%s\n",$header);
  $out[$i++] = sprintf("    %s Length: %d (today)  Check: %d  ..\n", $id, $len, $sum);
  $len = length($seq);
  for($j = 0 ; $j < $len ; ) {
		if( $j % 50 == 0) {
		  $out[$i] = sprintf("%8d  ",$j+1);
		}
		$out[$i] .= sprintf("%s",substr($seq,$j,10));
		$j += 10;
		if( $j < $len && $j % 50 != 0 ) {
		  $out[$i] .= " ";
		}elsif ($j % 50 == 0 ) {
		  $out[$i++] .= "\n";
		}
  }
  if($j % 50 != 0 ) {
	  $out[$i] .= "\n";
  }
  return (\@out);
}
#________________________________________________________________________
# Title     : write_fasta_array
# Usage     : @output = @{&put_fasta($sequence, $name)};
# Function  : take Single sequence and produce single output array of fasta
# Example   : @out = (
#             $out[0] =>     ">name",
#             $out[1] =>     "ABCDEABCDEBCDEABCDEABCDEABCDEABCDEBCDEABCDE",
#             $out[2] =>     "TTTTTTTTDEBCDEABCDEABCDEABCDEABCDEBCDEABCDE",
#             $out[3] =>     "ABCDEABCDEBCDEABCDEABCDEABCDEABCDEBCDEABCDE",
#                 );
#
# Warning   : Copyright (C) 1993-1994 by James Tisdall
# Keywords  :
# Options   :
# Returns   : ref. for an array of FASTA formatted sequence record
#
# Argument  : \%input
# Version   : 1.0
#--------------------------------------------------------------------
sub write_fasta_array {
   my(%input) = %{$_[0]}; my($seq)=values(%input);
   my($key)=keys(%input); my(@out,$len, $i, $j); $seq=~s/\n//g; $i = 0;
   $out[$i++] = ">" . $key . "\n";
   for($j=0; $j< length($seq) ; $j += 60) {
	  $out[$i++]= sprintf("%.60s\n", substr($seq,$j,60));  }
   return(\@out); # ref. is slightly faster  2.973 vs 2.985
}

#________________________________________________________________________
# Title     : find_seq_files
# Usage     : $found_file = ${&find_seq_files(\$input_file_name)};
# Function  : (similar to find.pl) used in 'read_any_seq_file.pl'
#             seeks given test file in pwd, specified dir, default path etc.
#             If not found yet, it looks at all the subdirectories of path and pwd.
#             PATH environment dirs, then returns full path file name.
# Example   : $found_file=${&find_seq_files(\$input_file_name)};
# Warning   :
# Keywords  : find_anyj_seq_files, find any seq files, find seq files
# Options   :
# Returns   : return( \$final );
# Argument  : (\$input_file_name) while $input_file_name can be  'xxx.xxx', or '/xxx/xxx/xxx/xxy.yyy'
#             or just directory name like 'aat' for  /nfs/ind4/ccpe1/people/A Biomatic /jpo/align/aat
#             then, it tries to find a file with stored seq file extensions like msf, jp, pir etc
#             to make aat.msf, aat.jp, aat.pir ... and searches for these files.
# Version   : 1.0
#--------------------------------------------------------------------
sub find_seq_files{
  my($final, $no_ext_file, $result); my($in_file)=${$_[0]}; my($pwd)=`pwd`; chomp($pwd);
  my( $base, @ENV_dir, $ext, @probable_dir_list, $directory);
  my(@extension_db)=('sst','msf','fasta','jp','fas','aln','brk','pdb', 'rms', 'ent','slx','fa');
  @probable_dir_list=('JPO','ALIGN','PATH','HOME','PIRDIR','PWD','PDBSST','PDBENT','BLASTDB','PIRDIR','SWDIR','PDB');
	if(($in_file=~/\//)&&(-e $in_file)){ $final=$in_file; }
	elsif((-e $in_file)&&(-s $in_file)&&($in_file !~/\//)){ $in_file="$pwd\/$in_file"; $final=$in_file;}
	######## if it was like  '/nfs/ind4/ccpe1/people/A Biomatic /perl.msf'
	elsif($in_file =~ /\/([\w\-\.]+)$/){ $in_file = $1;
		  if(-e $in_file){ $final = "$pwd\/$in_file"; }
		  #### if it has xxxxxx.xxxx  file form. #######
		  elsif($in_file =~ /(([\w\-]+)\.([\w\-]+))$/){ $file=$1; $base=$2; $ext=$3;
				for (@extension_db){ if($_ eq $ext){ shift(@extension_db);}}
				unshift(@extension_db, $ext);
				for (@probable_dir_list){ if($ENV{$_}=~ /\/$/){chop($ENV{$_});}
					push( @ENV_dir, split(/:/, $ENV{$_}));}
					for $dir (@ENV_dir){ $in_file="$dir\/$file";
						if ((-e $in_file) && (-s $in_file)){  $final=$in_file; last;}
						else{
							 for $ext (@extension_db){ $in_file="$dir\/$base\.$ext";
								  if ((-e $in_file) && (-s $in_file)){
									  if ($file =~  /$in_file/){ $final = $in_file; last;}}}}}
					unless(defined ($final)){
						for $dir (@ENV_dir){ $in_file= ${&search_files_in_subdir(\$dir, \$file)};
							 if(-e $in_file){ $final=$in_file; last; }}}}

			### if it has  xxxxxx   file form, ie. not extension #######
			elsif($in_file =~/\/([\w_\-]+)$/){  $base = $1;
			  for (@extension_db){
				 if($_ eq $ext){ shift(@extension_db);  }
				 unshift(@extension_db, $ext);
				 for (@probable_dir_list){
					if ($ENV{$_} =~ /\/$/){  chop($ENV{$_}); }
					push( @ENV_dir, split(/:/, $ENV{$_}) );
					for $dir (@ENV_dir){ $no_ext_file="$dir\/$base";
						 if((-e $no_ext_file) && (-s $no_ext_file)){ $final=$no_ext_file; last;}
						 else{
							for $ext (@extension_db){ $in_file ="$dir\/$base\.$ext";
								if ((-e $in_file) && (-s $in_file)){ $final = $in_file; last;}}}}}}}}

	 #### when the input was like this  'perl.msf'  in any directory.
	 elsif($in_file =~ /^(([\w\-]+)\.([\w\-]+))$/){ $file=$1; $base=$2; $ext=$3;
		  for (@extension_db){ if($_ eq $ext){ shift(@extension_db);}}
		  unshift(@extension_db, $ext);
		  for (@probable_dir_list){ if($ENV{$_}=~ /\/$/){chop($ENV{$_});}
			  push( @ENV_dir, split(/:/, $ENV{$_}));}
			  for $dir (@ENV_dir){ $in_file="$dir\/$file";
				  if ((-e $in_file) && (-s $in_file)){ $final=$in_file; last;}
				  else{
						for $ext (@extension_db){ $in_file="$dir\/$base\.$ext";
							 if ((-e $in_file) && (-s $in_file)){
								 if ($file =~  /$in_file/){ $final = $in_file; last;}}}}}
			  unless(defined ($final)){
				  for $dir (@ENV_dir){ $in_file= ${&search_files_in_subdir(\$dir, \$file)};
						if(-e $in_file){ $final=$in_file; last; }}}}
	 #### when the input was like this  'hemocyan'  in any directory.
	 elsif($in_file =~ /^([\w\-]+)$/){ $file=$1;
		  for (@probable_dir_list){ if($ENV{$_}=~ /\/$/){chop($ENV{$_});}
			  push( @ENV_dir, split(/:/, $ENV{$_}));}
			  for $dir (@ENV_dir){ $in_file="$dir\/$file";
				  if ((-e $in_file) && (-T $in_file)){  $final=$in_file; last;}
				  else{
						for $ext (@extension_db){ $in_file="$dir\/$file\.$ext";
							 if ((-e $in_file) && (-s $in_file)){
								 if ($file =~  /$in_file/){ $final = $in_file; last;}}}}}
			  unless(defined ($final)){
				  for $dir (@ENV_dir){ $in_file= ${&search_files_in_subdir(\$dir, \$file)};
						if(-e $in_file){ $final=$in_file; last; }}}}
	END_POINT:
	return( \$final );
}
#________________________________________________________________________
# Title     : search_files_in_subdir
# Usage     :
#                     $inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files in the dir if you wish,
#             and then go in any other sub
#             if any file(dir) is linked, it skips that file.
# Example   :
# Warning   : the final var $found_from_search_files_in_subdir mustn't be 'my'ed.
# Keywords  :
# Options   :
# Returns   :
# Argument  : gets a ref. of a scaler (dir name) and returns nothing(void).
# Version   : 1.0
#--------------------------------------------------------------------
sub search_files_in_subdir{
	package search_files_in_subdir;
	my $original_dir=${$_[0]}; my($target_file)=${$_[1]};
	my(@read_files)=@{&main::read_any_dir(\$original_dir)};
	print "\n Searching files... , wait or kill me !\n";
	for $file(@read_files){ $realfile1= "$original_dir\/$file";
	  if (-l $realfile1){ next; }
	  elsif (-d $realfile1){ &main::search_files_in_subdir(\$realfile1, \$target_file); }
	  elsif (-f $realfile1){ @split =split(/\//, $realfile1); my($f) = $split[$#split];
		  if($target_file eq $f){ $found_from_search_files_in_subdir =$realfile1;
		  print chr(007); last;}}
	  else{ next; }  }
	return(\$found_from_search_files_in_subdir);
	last;
	package main;
}

#________________________________________________________________________
# Title     : find_seq_file_old (similar to find.pl) used in 'read_any_seq_file.pl'
# Usage     : $found_file=${&find_seq_file_old(\$input_file_name)};
# Function  : seeks text file in pwd. If not found it looks at
#             PATH environment dirs
# Example   :
# Warning   : << This is READABLE old version of  find_seq_file
# Keywords  :
# Options   :
# Returns   : one ref. for SCALAR of a full path filename.
# Argument  : one ref. for SCALAR
# Version   : 1.0
#--------------------------------------------------------------------
sub find_seq_file_old{ # This returns full path file name.
	 my($in_file)=${$_[0]};      my($pwd)=`pwd`; chomp($pwd);
	 if ((-T $in_file) && (-s $in_file) && ($in_file=~/\//))
	 { # If Text, with Size and full path filename.
		  return (\$in_file);
	 }
	 elsif((-T $in_file)&&(-s $in_file)&&($in_file !~ /\//))
	 { # if it is a Text, with size and not full path filename,
		  $in_file="$pwd\/$in_file";
		  return (\$in_file);
	 }
	 elsif(($in_file =~ /\/([\w\-\.]+)$/) && (!(-e $in_file)))
	 {  # This is to handle when wrong full path filename is given, but the filename
		 $in_file=$1;                                            # exists in pwd
		 if ((-T $in_file) && (-s $in_file)){
			 $in_file="$pwd\/$in_file";
			 return (\$in_file);
		 }
	 }
	 #------- When the search fails in pwd with either fullpath and simple filename.
	 else{   # now STDOUT
		print "\n\n Your $in_file was not found in pwd, so I am searching it\n";
		print "\n To stop press Ctrl + c \n\n";
		print "This takes time as I search PATH with different extensions of the file\n";
		# 1. Search in the ENV directories
		# 2. change the extension and check if it is in.
		my($found_seq_file, $base, $ext, @ENV_dir, @probable_dir_list, $directory);
		if ($in_file =~/([\w_\-]+)\.([\w\-]+)$/){
		  $base = $1; $ext =$2;
		}elsif($in_file =~/([\w_\-]+)$/){
		  $base = $1;
		}
		my(@extension_db)=('msf','jp','jpo','aln','sst','fa','fasta','rms',
											'slx','fas','pdb','brk','ent');
		for (@extension_db){
		  if($_ eq $ext){
		  shift(@extension_db);  last;
		}
	 }
	 unshift(@extension_db, $ext);
	 @probable_dir_list=('PATH', 'HOME', 'JPO', 'PIRDIR', 'PDB', 'PDBENT');
	 for (@probable_dir_list){
		 if ($ENV{$_} =~ /\/$/){  # if any dir ends with '/', chops if off
			chop($ENV{$_}); }
		 push( @ENV_dir, split(/:/, $ENV{$_}) );
	 }
	 for $dir (@ENV_dir){
		if ($dir=~/\//){
			for $ext (@extension_db){
			  $in_file ="$dir\/$base\.$ext";
			  print "Searching $in_file\n";
			  if ((-T $in_file) && (-s $in_file)){
				  print "\n\n I found $in_file in $dir \n\n"; print chr(7);
				  print "  If it is the file you want rerun $0 with it\n\n\n";
				  $found_seq_file = $in_file;
			  }
			}
		}#if($dir=~/\//)
	 }
	 return( \$found_seq_file);
  }#<< end of  else-------------------
}

#________________________________________________________________________
# Title     : open_sst_files_with_gap  (but reads jp file as an input, too!!!)
# Usage     : %out_sst_hash =%{&open_sst_files_with_gap(\$jp_file_dir_and_name)};
# Function  : gets the name of a file(jp file) with its absolute dir path
#             reads the sequence names in the jp file and looks up all
#             the sst files in the same directory. Puts sst sequences
#             in a hash with keys of sequence names.
#
# Example   : jp file  ==  seq1 ABDSF--DSFSDFS   <- true sequence
#                              seq2 T--kdf-GAGGGASF     (aligned)
#
#                 sst files ==> 'seq1.sst', 'seq2.sst' (in the same dir)
#
#             original sst format:  seq1 hHHHHHttEEEE  <-- No gaps!
#                                  seq2 hHHHHHHEEhh
#             After this sub ==>
#             (final out hash =   (  seq1 hHHHH--HttEEEE  <-- inserted
#                                  seq2 h--HHH-HHHEEEhh  )     gaps
#
# Warning   : $jp_file_dir_and_name should be absolute dir and file name
#             >> This gets JP file not SST file as input !!!!
# Keywords  :
# Options   :
# Returns   : a ref. for a hash
# Argument  : a ref. for scaler of "jp file name"
# Version   : 1.0
#--------------------------------------------------------------------
sub open_sst_files_with_gap{   # This automatically determines MSF or JP format
  my(%seq_file, $sst_file, %secondary_struc, @keys, $directory);
  %seq_file = %{read_any_seq_files($_[0])};
  ######  Simple reading in of SST files ------------
  ######  Simple reading in of SST files ------------
  for $seq_name (keys %seq_file){
	  $sst_file ="$seq_name\.sst";
	  print $sst_file;
	  %secondary_struc =( %secondary_struc, %{&read_any_seq_files(\$sst_file)});
  }
  print %secondary_struc;
  ### Now we have  1. %jp_file  and  2. %out_sst_seq_hash  -------
  if (!(defined(%secondary_struc))){
	  return(\%seq_file);
  }else{
	 %gap_corrected_out=%{&put_gaps_in_hash(\%seq_file, \%secondary_struc)};
	 return( \%gap_corrected_out );
  }
}

#________________________________________________________________________
# Title     : put_gaps_in_hash  (The order of input hashes DOES matter, in the end)
# Usage     : %out=%{&put_gaps_in_hash(\%hash_with_gap, \%hash_sans_gap)};
#
#             %hash1=('1ctx',  '111111111111111',      <-- hash input without gaps
#                     '2ctx',  '2222222222222222',
#                     '3ctx',  '3333333333');
#
#             %hash2=('1ctx',  'AAA--AAAAAAAAAAAA-',   <-- hash input with template gaps
#                     '2ctx',  'BBBBBBBBBBBB-BBBB',
#                     '3ctx',  'CCCCCC----CCCC');
#
#             >> resulting out hash;
#
#             %hash3=('1ctx',     '111--111111111111-',
#                     '2ctx',     '222222222222-2222',
#                     '3ctx',     '333333----3333 );
#
# Function  :
# Example   :
# Warning   : The keys for hashes should be the same and the two sequences
#             should be identical.
# Keywords  :
# Options   :
# Returns   : one hash reference.
# Argument  : 2 hash references.
# Version   : 1.0
#--------------------------------------------------------------------
sub put_gaps_in_hash{
  my($temp0)= values %{$_[0]};  # finds the hash with gaps
  my($temp1)= values %{$_[1]};  # and assigns to right input hash.
										  # above puts the first values to $temp0 & 1
  if (($temp0=~/\-/)||($temp0=~/\./)){  # compares the leng of the first
	 %hash_gap = %{$_[0]};               # values of hashes and assigns
			%hash_sans_gap=%{$_[1]};            # accordingly.
  }elsif(($temp1=~/\-/)||($temp1=~/\./)){
	 %hash_gap     =%{$_[1]};
	 %hash_sans_gap=%{$_[0]};
  }else{
	 %hash_gap     =%{$_[0]};  # If it can not determine input type, it assumes
	 %hash_sans_gap=%{$_[1]};  # that the first one was for gap, the 2nd for secondary.
  }                           # structure or whatever.
  my(@keys)=sort keys (%hash_gap);
  my($gap_char) = '-';  #  default gap_char is  '-'
  my(@string1, @string2, @gap_pos, %out_hash, $gapped_string, $res);

  if ($hash_gap{$keys[0]}=~/\-/){
	 $gap_char = '-';
  }elsif($hash_gap{$keys[0]}=~/\./){
	 $gap_char = '.';
  }
  ########## Actual exchange part ############
  for (@keys){
	 @string1 = split('', $hash_gap{$_});
	 @string2 = split('', $hash_sans_gap{$_});
	 for ($t=0; $t <=$#string1; $t++){
		  $res=$string1[$t];
		  if(($res =~ /\-/)||($res =~ /\./)||($res =~ /\s/)){
			  splice(@string2, $t, 0, $gap_char);
		  }
	 }
	 $gapped_string = join("", @string2);
	 $out_hash{$_}= $gapped_string;
  }
  return(\%out_hash);
}

#________________________________________________________________________
# Title     : get_gap_positions
# Usage     : @gap_pos=@{&get_gap_positions(\@string1)}; <- ('A','C','D','E')
#             @gap_pos=@{&get_gap_positions(\$string1)}; <- ( ACDE )
# Function  : gets gap positions of seq. and stores in an array
# Example   : for a string '--iu--sdf-j--', it will output  -2 -1 2 3 7 9 10
# Warning   : uses References.
# Keywords  : get_gap_positions_in_seq, get_seq_gap_positions get_gap_positions_in_array
# Options   : p for all positive gaps numbering. No negatives for '---STRING--'
#
# Returns   : 1 ref. of array eg)=(2,3,7,8,10,100,122);
# Argument  : 1 ref. of array eg)=( ABCDE--EF--GH ) while '-' is for gap.
# Version   : 1.4
#--------------------------------------------------------------------
sub get_gap_positions{
   my($res, @gap_posi, $all_positive, $i, @seq_array);
   for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'SCALAR'){      @seq_array=split(//, ${$_[$i]});
	   }elsif(ref($_[$i]) eq 'ARRAY'){   @seq_array=@{$_[$i]};
	   }elsif($_[$i]=~/^ *p *$/){ $all_positive = p ;
	   }else{ print "\n# get_gap_positions: I take ref of a string or an array \n";
	   }
   }
   if($seq_array[0] =~ /[\-\. ]/ and !$all_positive ){
	   for ($i=0; $i < @seq_array; $i++){
		  if ( $seq_array[$i] =~ /^[\-\. ]$/){
			  push (@gap_posi, -$i-1);
		  }else{  @gap_posi= sort {$a<=>$b} @gap_posi; last;   }
	   }
	   splice(@seq_array, 0, (@gap_posi));
	   for ($i=0; $i <  @seq_array; $i++){
		  $res=$seq_array[$i];
		  if ( $res =~ /[\-\. ]/){  push (@gap_posi, $i);  }
	   }
	   return(\@gap_posi);

   }
   if($seq_array[0] !~ /[\-\. ]/ or  $all_positive) {
	   for ($i=0; $i <  @seq_array; $i++){
		  $res=$seq_array[$i];
		  if ( $res =~ /[\-\. ]/){  push (@gap_posi, $i);  }
	   }
	   return(\@gap_posi);
   }
}


#________________________________________________________________________
# Title     : make_pairs_from_hash
# Usage     : @output =@{&make_pairs_from_hash(\%input_sequence_hash);
#             Input example
#             %input =  seq1  ABCDEFAD
#              seq2  SDFSFSDF
#              seq3  SDFSFSDF
#
# Function  : returns all the possible pairs of a set of sequences in
#             an array of references;
#
# Example   : @output=($ref1, $ref2, ....$refn)
#             each $ref is the reference of a hash of a pair of sequence
#             >>  %pair1 = %{$ref1}; %pair2 = %{$ref2}; %pair3 = %{$ref3};
#
#             %pair1 is like;       %pair1 is like;       %pair3 is like;
#
#             seq1  ABCDEFAD     seq1  ABCDEFAD        seq2  SDFSFSDF
#             seq2  SDFSFSDF     seq3  SDFSFSDF        seq3  SDFSFSDF
#
# Warning   :
# Keywords  :
# Options   :
# Returns   : one ref. of array for references for hashes.
# Argument  : one ref. of hash
# Version   : 1.0
#--------------------------------------------------------------------
sub make_pairs_from_hash{  # making sub sequences from a mother sequence
  my(%input)=%{$_[0]};        # <-- actual sequence input
  ### make all pairs from the input sequencs  ####
  my(@keys)= keys %input;
  my(@resultant_references);
  my(%temp_hash);
  for ($i=0; $i <=$#keys; $i++){
	 for ($j=$i+1; $j <=$#keys ; $j++){
		${"$keys[$i]\_$keys[$j]"}{$keys[$i]} =  $input{$keys[$i]};
		${"$keys[$i]\_$keys[$j]"}{$keys[$j]} =  $input{$keys[$j]};
		%temp_hash = %{${"$keys[$i]\_$keys[$j]"}{$keys[$j]}};
		push(@resultant_references, \%temp_hash);
	 }
  }
  return(\@resultant_references);  # the size of the array varies according to input
}

#________________________________________________________________________
# Title     : mail_it
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub mail_it{
#  open (MAIL, "|$sendmail_path $recipient") || die $!";
  #write (MAIL);
#
}


#________________________________________________________________________
# Title     : read_fssp_files
# Usage     : %anyarray = %{&read_fssp_files(\$any_sequence_file_fssp_form)};
# Function  : read hssp file and put sequences in a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_fssp_files{
	 my($input_file)=${$_[0]};	 # $_[0]=>input eg. $dir.$out_seq_no.hssp
	 my(%out_hash, $no_of_seq, $seq_from, $seq_to, $char,
		 @names, $flag1, $flag, $original_string, $length_string,
		 @residues_in_the_line
	 );

	 unless (-e $input_file){
		 print chr(7);		     # beep warning for error
		 print "\n\n\t This is sub read_hssp_file  in th_lib.pl \n\n";
		 print "\n\n\t Fatal: The input file $file1 is not in the directory \n";
		 print "\n\n\t  !!! Program dies abnormally, bugs !!! \n";
		 &caller_info; print "\n"; exit;  # &caller_info is in th_lib.pl
	 }

	 open(FILE_1, "$input_file"); # reading in (hssp)
	  ############## READING ######################
	  while(<FILE_1>){
		 if((/^\#\# SUMMARY/)||(/  NR. STRID1/)){
			 next;
		 }
		 if (/\d+: \w+ +([\w_\-]+) +/){    # matching the structure names
			 push(@names, $1);         # @names are the names of structural sequences;
			 next;                     # do not need to look at further.
		 }
		 if(/\#\# ALIGNMENTS\s+(\d+)\s+\-\s+(\d+)/){
			 @residues_in_the_line=();
			 $seq_from=$1-1;  # to offset $seq_from to match array counting method later
			 $seq_to  =$2-1;
			 $flag2 =1;
			 next;
		 }
		 if((/^SeqNo PDBNo/)||(/^\s\s\s\s\s\s\s\s\s\s\s\s\s\s+/)){
			 next;
		 }
		 if(/^\#\# EQUIVALENCES/){
			 last;
		 }
		 $original_string=substr($_, 42) if ($flag2==1);  # assign from chars from 43(position) to the end
		 chomp($original_string);                         # removes new line at the end.
		 $length_string = length($original_string);
		 for ($i=0; $i < $length_string; $i+=3){   # note that >>> $i+=3;
			$char=substr( $original_string, $i, 1);
			push(@residues_in_the_line, $char) unless ($char=~/[ \t]/);  # There was spaces in the last seq part.
		 }                                                              #  I am removing them.
		 for ($i=$seq_from; $i <= $seq_to; $i++){
			$out_hash{$names[$i]}.=shift(@residues_in_the_line);
		 }
	 }
	 \%out_hash;
}

#________________________________________________________________________
# Title     : get_posi_shift_rms_whole
# Usage     : just type   get_posi_shift_rms_whole.pl
# Function  :
# Example   : (0.284994272623139   0.166781214203895)
#             The first figure is for error rate with out rms consideration
#             The second is for after applying threshold.
# Warning   :
# Keywords  :
# Options   :
# Returns   : two refs. of scalar values (rates)
# Argument  : takes 2 refs. of scalars for dir name (protein group name)
#             and threshold for rms
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_shift_rms_whole{
  my($group)    =${$_[0]};  #<---- for group name
  my($threshold)=${$_[1]};  #<---- for threshold
  my($input1) = "/nfs/ind4/ccpe1/people/A Biomatic /jpo/align/$group/$group\.msf";
  my($input2) = "/nfs/ind4/ccpe1/people/A Biomatic /jpo/align/$group/$group\.jp";
  my($input3) = "/nfs/ind4/ccpe1/people/A Biomatic /jpo/align/$group/$group\.rms";
  if($#_ < 1){ $threshold = 6; } # default is $threshold = 6
  my($average_rate2, $average_rate1, %array3);
  my(%array1)=%{&open_msf_files(\$input1)}; my(%array2)=%{&open_jp_files(\$input2)}; my(%array3);
  if (!(-e $input3)){
		  $average_rate1 = ${&get_posi_shift_hash(\%array1, \%array2)};
		  $average_rate2=$average_rate1; }
  else{ %array3=%{&open_rms_files(\$input3)};
		  ($rate_ref_1, $rate_ref_2) = &get_posi_shift_rms_hash(\%array1, \%array2, \%array3, \$threshold);
		  $average_rate1=${$rate_ref_1};  $average_rate2=${$rate_ref_2};  }
  return(\$average_rate1, \$average_rate2);
}
#________________________________________________________________________
# Title     : write_jp  (essentially the same as print_seq_in_block)
# Usage     : &write_jp(\%input_hash1,\%input_hash2, \%input_hash3.... );
# Function  : gets a ref(s) for hash and prints the content in lines of 60 char
# Example   :
# Warning   : derived from  print_in_block
# Keywords  :
# Options   :
# Returns   : Nothing, i.e. STDOUT
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub write_jp{
  my(%input)=%{$_[0]};
  my(@names)=keys(%input);
  my($larg, $offset, $diff, $gap_char);
  $gap_char='-';  # <-- setting default gap character.
  for ($i=0; $i <=$#names; $i++){
	 $larg = length($input{$names[$i]}) if length($input{$names[$i]}) > $larg;
	 if ($input{$names[$i]} =~ /\-/){
		$gap_char='-';
	 }elsif($input{$names[$i]} =~ /\./){
		$gap_char='.';
	 }
  }
  #%input=%{fill_ending_space(\%input)};
		  ######====== filling the end part gaps in shorter sequences #####
	for ($i=0; $i <=$#names; $i++){
	  if (length($input{$names[$i]}) < $larg){
		 $offset=length($input{$names[$i]});
		 $diff=$larg-$offset;
		 substr($input{$names[$i]}, $offset, $larg)= "$gap_char" x $diff;
	  }
	}
## Using format is slower than using just print ######## 0.075 vs 0.070
##   An xxx.out example -->
##            1       10        20        30        40        50
## 1cdg       CGGDWqGIinkIndgYLtgMgVtAIWISQPVeNIysiInysgvnnTAYhG

format JP_FORMAT =
@<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$names,    $seq
.
  for ($k=0; $k < $larg; $k+=50){     # 50 residues interval
	 for($i=0; $i < @names; $i++){    # number of sequences
		$names =$names[$i];
		$input{$names[$i]}=~ s/\n//g;
		$seq = substr($input{$names[$i]}, $k, 50);
		$~='JP_FORMAT';
		write;
	 }
	 print "\n";                       # next block starts.
  }
}

#________________________________________________________________________
# Title     : convert_num_to_0_or_1_hash (opposite of convert_num_to_0_or_1_hash)
# Usage     : with a variable for threshold ->
#
#             %out = %{&convert_num_to_0_or_1_hash(\%input_hash, \$threshold, \%input_hash2..)};
#
# Function  : changes all the numbers into 0 or 1 according to threshold given.
#             convert_num_0_or_1_hash converts threshold and bigger nums. to
#             '0' while convert_num_0_or_1_hash_opposite converts to '1'.
# Example   : A hash =>  name1  10012924729874924792742749748374297
#                        name2  10012924729874924792710012924729874
#             A threshold => 4
#             !! if numbers are smaller than 4, they become 1 (or true).
#             Outputhash  =>  name1  11111011011111011111011011110101111
#                        name2  11111011010001011001011010010101100
#
#             ($ref1, $ref2)=&convert_num_to_0_or_1_hash(\%hash, \%hash, \$threshold);
#             above is the example when with more than 2 input hashes.
# Warning   : Threshold value is set to 0 as well as all values smaller than that.
# Keywords  :
# Options   :
# Returns   :
# Argument  : two references, one for hash one for scaler for threshold
#
# Version   : 1.0
#--------------------------------------------------------------------
sub convert_num_to_0_or_1_hash{
	my(@output_hash_refs, %input, $c, $i,
		@string, $name, @names, $threshold, %output_hash);
  for($c=0; $c < @_; $c++){
	 if(ref($_[$c]) eq 'SCALAR'){ $threshold =${$_[$c]};} }
  for($i=0; $i < @_; $i++){
	 if(ref($_[$i]) eq 'HASH'){ %input=%{$_[$i]};
		@names=keys (%input);
		my($split_char)=',';
		if ((@_ < 1)&&(ref($_[$a]) eq 'HASH')){  # if input argument is only one (= if no threshold given),
		  $threshold = 1; } # <---- put 1 to $threshold as a default
		for $name (@names){
		  if($input{$name}=~/\,/){  $split_char = ','; }else{ $split_char = ''; }
		  if ($input{$name} =~ /[\.\-\d]+/){ @string=split(/$split_char/, $input{$name});
			 for (@string){
				if(/\d+/){
				  if($_ >= $threshold){ $_=0; } # !! becomes 0 (or false)
				  else{  $_=1;               } # !! becomes 1 (or true)
				}
			 }
		  }
		  $output_hash{$name}=join("", @string);
		}
		push(@output_hash_refs, \%output_hash);
	 }
  }
  if(@output_hash_refs == 1){return($output_hash_refs[0]); }
  elsif(@output_hash_refs > 1){ return(@output_hash_refs) }
}
#________________________________________________________________________
# Title     : convert_num_0_or_1_hash_opposite (opposite of convert_num_to_0_or_1_hash)
# Usage     : with a variable for threshold ->
#
#               %out = %{&convert_num_0_or_1_hash_opposite(\%input_hash, \$threshold)};
#
# Function  : changes all the numbers into 0 or 1 according to threshold given.
#             convert_num_0_or_1_hash converts threshold and bigger nums. to
#             '0' while convert_num_0_or_1_hash_opposite converts to '1'.
# Example   : A hash =>  name1  10012924729874924792742749748374297
#                        name2  10012924729874924792710012924729874
#             A threshold => 4
#             !! if numbers are smaller than 4, they become 1 (or true).
#             Outputhash  =>  name1  11111011011111011111011011110101111
#                        name2  11111011010001011001011010010101100
#
#             ($ref1, $ref2)=&convert_num_to_0_or_1_hash(\%hash, \%hash, \$threshold);
#             above is the example when with more than 2 input hashes.
# Warning   : Threshold value is set to 0 as well as all values smaller than that.
# Keywords  :
# Options   :
# Returns   :
# Argument  : two references, one for hash one for scaler for threshold
#
# Version   : 1.0
#--------------------------------------------------------------------
sub convert_num_0_or_1_hash_opposite{
  my(@output_hash_refs, %input,$c, $i, $split_char,
	  @string, $name, @names, $threshold,%output_hash);
  for($c=0; $c < @_; $c++){
	 if(ref($_[$c]) eq 'SCALAR'){ $threshold =${$_[$c]};}
	 elsif( $_[$c] =~/^\d+$/){ $threshold = $_[$c];}
  }
  for($i=0; $i <=$#_; $i++){
	 if(ref($_[$i]) eq 'HASH'){
		%input=%{$_[$i]};
		#show_hash(\%input);
		@names=keys (%input);
		$split_char=',';
		if ((@_ == 1)&&(ref($_[$a]) eq 'HASH')){  # if input argument is only one (= if no threshold given),
		  $threshold = 1; } # <---- put 1 to $threshold as a default
		for $name (@names){
		  if($input{$name}=~/\,/){  $split_char = ',';
		  }else{ $split_char = ','; }
		  if ($input{$name} =~ /[\.\-\d]+/){
			 @string=split(/$split_char/, $input{$name});
			 for (@string){
				if(/\d+/){
				  if($_ >= $threshold){ $_ = 1; } # !! becomes 0 (or false)
				  else{  $_=0;               } # !! becomes 1 (or true)
				}
			 }
		  }
		  $output_hash{$name}=join(",", @string);
		}
		push(@output_hash_refs, \%output_hash);
	 }
  }
  if(@output_hash_refs == 1){return($output_hash_refs[0]); }
  elsif(@output_hash_refs > 1){ return(@output_hash_refs) }
}
#________________________________________________________________________
# Title     : convert_char_to_0_or_1_hash
# Usage     : with a variable for threshold ->
#
#               %out = %{&convert_char_0_or_1_hash(\%input_hash)};
#
# Function  : changes all the chars into 1, gaps are to 0
# Example   : A hash =>  name1  ABCDSSFDSF..ASDFSD.....ADFASDF...AA
#                        name2  ASDFSD.....ADFBCDSSFDSF..ASASDF...A
#
#             Outputhash  => name1  00000000001100000011111000000011100
#                            name2  00000011111000000000000110000001110
#
# Warning   :
# Keywords  : convert_char, translate_char, convert_char_to_digit,
#             convert_char_to_number
# Options   :
# Returns   : A ref. of a hash
# Argument  : one reference of HASH.
#
# Version   : 1.2
#--------------------------------------------------------------------
sub convert_char_to_0_or_1_hash{
  my(%input) =%{$_[0]};
  my(@string, $name, $elem, $i, %output_hash, $split_char);
  my(@names)=keys (%input);
  for $name (@names){
	  if($input{$name}=~/^\S\,\S/){
	     $split_char=',';
	     @string=split(',', $input{$name}); }
	  else{ $split_char="";
	     @string=split("", $input{$name}); }
	  for ($i=0; $i< @string; $i++){
		 if($string[$i]=~/^[\.\-\s]$/){  $string[$i] = 0 ;
	     }elsif($string[$i]=~/\w/){  $string[$i] = 1;   }
	  }
	  $output_hash{$name}=join("$split_char", @string);
  }
  return(\%output_hash);
}



#________________________________________________________________________
# Title     : digitize_char
# Usage     : with a variable for threshold ->
#
#               %out = %{&digitize_char(\%input_hash)};
#
# Function  : changes all the chars into 1, gaps are to 0
# Example   : A hash =>  name1  ABCDSSFDSF..ASDFSD.....ADFASDF...AA
#                        name2  ASDFSD.....ADFBCDSSFDSF..ASASDF...A
#
#             Outputhash  => name1  00000000001100000011111000000011100
#                            name2  00000011111000000000000110000001110
#
# Warning   :
# Keywords  : convert_char, translate_char, convert_char_to_digit,
#             convert_char_to_number, digitize_sequence, digitize_char
#             digitize_hash
# Options   :
# Returns   : A ref. of a hash
# Argument  : one reference of HASH.
#
# Version   : 1.1
#--------------------------------------------------------------------
sub digitize_char{
  my(%input) =%{$_[0]};
  my(@string, $name, $elem, %output_hash, $split_char);
  my(@names)=keys (%input);
  for $name (@names){
	  if($input{$name}=~/^\S\,\S/){ $split_char=','; @string=split(',', $input{$name}); }
	  else{ $split_char=""; @string=split("", $input{$name}); }
	  for (@string){  if($_ =~/[\.\-\s]/){  $_ = 0 ; } else{  $_ = 1;   } }
	  $output_hash{$name}=join("$split_char", @string);
  }
  return(\%output_hash);
}


#________________________________________________________________________
# Title     : get_posi_diff_and_rms_hash
# Usage     : %position_diffs =\{&get_posi_diff_hash(\%diffs, \%rms_corrected)};
# Function  : gets two ref. of hashes and calculates the position diffs.
# Example   :
# Warning   : split and join char is ",";
# Keywords  :
# Options   :
# Returns   : one ref. for an array of differences of input arrays. array context.
#             ---Example input (a hash with numbers); The values are differences after comparion
#                                            with structural and sequential alignments.
#             %diffs =('seq1', '112342431111
#             'seq2', '12222...09011.1122',
#             'seq3', '13222...00011.1122',
#             'seq4', '12262...00011.112.');
#
#             %rms_corrected_0_or_1 => seq1_seq2  0111011111011101011110100101101010011
#                           seq1_seq3  01111.....111110111111111111100001011
#             example output;
#             seq3_seq4       01040...00000.000.
#             seq1_seq2       01012...1810...122
#             seq1_seq3       02012...1110...122
#             seq1_seq4       01032...1110...12.
#             seq2_seq3       01000...09000.0000
#             seq2_seq4       00040...09000.000.
#
# Argument  : Takes two ref. for hash
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_diff_and_rms_hash{                 # used in 'get_posi_shift_hash'
	my(%diffs)= %{$_[0]};
	my(%rms_corrected_0_or_1)=%{$_[1]};
	my(@names)= keys (%diffs);
	my(@names_rms)= keys (%rms_corrected_0_or_1);
	my(%seqs_compared_in_pair, %seqs_compared_in_pair_rms);
	my(@temp)=();
	my(@temp_rms)=();
	my(@temp2)=();
	for ($i=0; $i <= $#names; $i ++){
	  @temp = split(',', $diffs{$names[$i]});
	  (@{"string$i"})=@temp;
	}
	##########   getting differences for permutated pairs   ##############
	for ($i=0; $i <= $#names; $i++){  # permutation part for pairs
		for ($j=$i; $j <= $#names; $j ++){
			if ($j == $i){ next; } # avoiding seq1_seq1 type match.

			  for ($k=0; $k <= $#string0; $k ++){
				  if ((${"string$i"}[$k] =~ /[-\d+]/) && (${"string$j"}[$k] =~ /[-\d+]/)){

					  my($diff) = abs(${"string$i"}[$k] - ${"string$j"}[$k]);
					  push(@temp2, $diff);
				  }else{
					  push(@temp2, '.');  # @{"diffs$i$j"}
				  }
			  }
			if ( defined $rms_corrected_0_or_1{"$names[$i]\_$names[$j]"}){
			$seqs_compared_in_pair{"$names[$i]\_$names[$j]"}=join(",", @temp2);
		 }elsif(defined $rms_corrected_0_or_1{"$names[$j]\_$names[$i]"}){
			 $seqs_compared_in_pair{"$names[$j]\_$names[$i]"}=join(",", @temp2);
		 }
		 @temp2=();
	  }
	}

	for $pair_names (keys %seqs_compared_in_pair){
		my(@temp_string1)=split(',', $seqs_compared_in_pair{$pair_names});
		my(@temp_string2)=split('' ,  $rms_corrected_0_or_1{$pair_names});  # 0 or 1 values
		my(@final_residue)=();
		for ($i=0; $i <= $#temp_string1; $i++){
			if(($temp_string1[$i] =~ /\d+/) && ($temp_string1[$i] =~ /\d+/)){ # if both are numbers!
				$temp_string1[$i] = ($temp_string1[$i]*$temp_string2[$i]);  # multiplying part
			}
			push(@final_residue, $temp_string1[$i]);
		}
		$seqs_compared_in_pair_rms{$pair_names}=join(",", @final_residue);
	}
	return(\%seqs_compared_in_pair, \%seqs_compared_in_pair_rms);  # returns two refs
}
############

#________________________________________________________________________
# Title     : get_posi_shift_rms_hash
# Usage     : ($rate1_ref,$rate2_ref) =${&get_posi_shift_rms_hash(\%msf_hash, \%jp_hash,
#                                                                 \%rms_file_hash, \$threshold)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : two refs. for scalar values of shift rate of positions for proteins.
#              frirst scalar is rate without correcting rms deviation
#              second scalar is rate with    correcting rms deviation
#             >> example of xx
#
#             1cdg            APDTSVSNKQ NFSTDVIYQI FTDRFSDGNP ANNPTGAAFD GTC.TNLRLY
#             2aaa            ......LSAA SWRTQSIYFL LTDRFGR... ....TDNSTT ATCNTGNEIY
#
#             >> example of xx
#
#             2aaa       ------lsaasWrtqSIYFLLTDRFGrtdns-------ttatCntgneiy
#             1cdg       apdtsvsnkqnFSTDVIYQIFTDRFsdgnpannptgaafdgtCtn-lrly
#
#             >> example of xx
#
#             1cdg         APDTSVSNKQ NFSTDVIYQI FTDRFSDGNP ANNPTGAAFD GTCTN-LRLY
#             2aaa         ------LSAA SWRTQSIYFL LTDRFGRTDN S-------TT ATCNTGNEIY
#             1cdg_2aaa    ------7774 2221210000 0000000148 9-------99 41114-4000
#             1cdg_6taa    ------8674 2232220000 0000011059 9-------99 52114-3000
#
# Argument  : takes 4 hash REFERENCES for (one seq. and one struc. alignment(2nd arg)
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_shift_rms_hash{  # minimum sub of 'get_posi_shift_rate'
  my(%array1)  =%{$_[0]};  # sequential:  %array1 = seq1 BCC..D, seq2 DD..FD, seq3 ..LJK..KJLJ
  my(%array2)  =%{$_[1]};  # structural:  %array2 = seq1 B..CCD, seq2 DD..FD, seq3 KJ..LKKJL..J
  my(%rms_hash)=%{$_[2]};# from rms file (msarms resulting file)
  my($threshold)=${$_[3]};
  my(%array3)=();        # array3 has position differences between the same sequences
  my(@whole_length)=();  # %array2 is the structural alignments(from jp files)
  my(@string1, @string2, @seq_position1,
	  @seq_position2, $len_of_seq, @temp1, @temp2,
	  @position_diffs, @position_corrected1, @names );
  if($#_ < 3){  $threshold=6;  }
  @temp1= keys (%array1);  @temp2= keys (%array2);
  if ($#temp1 == $#temp2){   goto CALC;  };

  %array1 = %{&hash_common(\%array1, \%array2)}; # %array2 is from jp (structural)
  %array2 = %{&hash_common(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)}; # this removes wrong gaps(in '.' form, in MSF)
  %array2 = %{&remov_com_column(\%array2)};

  CALC:
  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});
	  @string2 =split('', $array2{$name}); # ! @string2 is the structural. ! (used)

			 @seq_position1  = @{&get_posi_sans_gaps(\$array1{$name})}; # positions after compacting.
			 @seq_position2  = @{&get_posi_sans_gaps(\$array2{$name})}; # @seq_position2 is structural

			  $len_of_seq =($#seq_position2+1);
					 push(@whole_length, $len_of_seq);

			 @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
			 @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
			 #print "@position_corrected1";
			 $array3{$name}=join(",", @position_corrected1); # array3 is for disply of seq.
  }                      # !! split and join char is ',';

  # %array3 has the form.  These numbers are position differences between the same sequences
  #                        one from str. one from seq.
  # seq1  1,1,2,3,.,2,3,.,1,.,0,0,0,1,1,1,1,1,2
  # seq2  1,1,2,1,.,1,3,.,1,.,0,0,1,0,1,1,1,3,2
  # seq3  1,1,2,3,.,2,3,.,1,.,1,1,0,0,1,1,1,3,2

  %rms_hash=%{&convert_num_to_0_or_1_hash(\%rms_hash, \$threshold)};

  ($final_posi_diffs, $final_posi_diffs_rms)=&get_posi_diff_and_rms_hash(\%array3, \%rms_hash); # get_posi_diff_hash uses
  %final_posi_diffs=%{$final_posi_diffs};
  %final_posi_diffs_rms=%{$final_posi_diffs_rms};

  my($sum_of_posi_diffs)    =${&sum_hash(\%final_posi_diffs)};
  my($sum_of_posi_diffs_rms)=${&sum_hash(\%final_posi_diffs_rms)};

  my($av_of_posi_diffs)     =$sum_of_posi_diffs/($#names+1); # dividing by seq number.
  my($av_of_posi_diffs_rms) =$sum_of_posi_diffs_rms/($#names+1); # dividing by seq number.

  my($sum_seq_length)   =${&sum_array(\@whole_length)};
  my($av_rate)          =$av_of_posi_diffs/($sum_seq_length);
  my($av_rate_rms)      =$av_of_posi_diffs_rms/($sum_seq_length);
  #&print_seq_in_block(\%final_posi_diffs); # <--- leave this
  (\$av_rate, \$av_rate_rms);  #---------> returns two ref. for rates.
}

#________________________________________________________________________
# Title     : open_rms_files
# Function  : open rms files and put sequences in a hash
#             Example of rms (aa
#             1cdg         APDTSVSNKQ NFSTDVIYQI FTDRFSDGNP ANNPTGAAFD GTCTN-LRLY
#             2aaa         ------LSAA SWRTQSIYFL LTDRFGRTDN S-------TT ATCNTGNEIY
#             6taa         ------ATPA DWRSQSIYFL LTDRFARTDG S-------TT ATCNTADQKY
#             1cdg_2aaa    ------7774 2221210000 0000000148 9-------99 41114-4000
#             1cdg_6taa    ------8674 2232220000 0000011059 9-------99 52114-3000
#             2aaa_6taa    ------1000 0000000000 0000000010 0-------00 0000000000
#
#             Example output hash;
#             1nor        LECHNQQSSQPPTTKTCS-GETNCYKKWWSDH----RGTIIERGFFC--GCPKVK-PGVNLNCCRT-DRCNN-------
#             1cdg        APDTSVSNKQNFSTDVASISGLVTSLP-QGSYNDVLGGLLNGNTLSVGSGGAASNFTLAAGGTAVWQYTAATATPTIGH
#             1cdg_2aaa   ------777002112111-----343333---431127----5433234-72354541131211111176899999999
#
# Usage     : %anyarray = {&open_rms_files(\$any_sequence_file_msf_form)};
# Example   :
# Warning   : xxx.rms files are Tim Hubbard's 'msarms' program's output.
# Keywords  :
# Options   :
# Returns   : a ref. of a hash
# Argument  : takes one ref. for a file.
# Version   : 1.0
#--------------------------------------------------------------------
sub open_rms_files{
	my(@names, $n, $s, $n2, $s2, $n3, $s3, %hash);
	unless (-e ${$_[0]}){
	  print chr(7);
	  print "\n\n\t This is sub open_rms_files in th_lib.pl \n\n";
	  print "\n\n\t Fatal: The input file $file1 is not in the directory \n";
	  exit;
	}
	open(FILE_1,"${$_[0]}");  	# reading in (rms file)
	while(<FILE_1>){         	# file1 needs to be xxxx.rms for the moment, automatic later
	  if(/^(\w\w\w\w)[\t]* +([\-\w ]+)[\n]$/){ $n=$1;  $s=$2; $s =~s/ //g; $hash{$n}.= $s;
	  }elsif(/^(\w\w\w\w_\w\w\w\w+)[\t]* +([\-\d( )\-]+)[\n]$/){
		  $n2=$1; $s2=$2; $s2=~s/ //g; $hash{$n2}.= $s2;
	  }elsif(/^(\w\w\w\w\w)(\w\w\w\w+)[\t]* +([\-\d( )\-]+)[\n]$/){
		  $n3="$1_$2"; $s3=$3; $s3=~s/ //g;   $hash{$n3}.= $s3;
	  }
	}
	return( \%hash );
}
#________________________________________________________________________
# Title     : open_rms_files2
# Function  : same as open rms files but returns two hashes.
#             Example of rms (aa
#             1cdg         APDTSVSNKQ NFSTDVIYQI FTDRFSDGNP ANNPTGAAFD GTCTN-LRLY
#             2aaa         ------LSAA SWRTQSIYFL LTDRFGRTDN S-------TT ATCNTGNEIY
#             1cdg_2aaa    ------7774 2221210000 0000000148 9-------99 41114-4000
#             1cdg_6taa    ------8674 2232220000 0000011059 9-------99 52114-3000
#
#             Example output 2 hashes;
#             1nor        LECHNQQSSQPPTTKTCS-GETNCYKKWWSDH----RGTIIERGFFC--GCPKVK-PGVNLNCCRT-DRCNN-------
#             1cdg        APDTSVSNKQNFSTDVASISGLVTSLP-QGSYNDVLGGLLNGNTLSVGSGGAASNFTLAAGGTAVWQYTAATATPTIGH
#
#             1cdg_2aaa   ------777002112111-----343333---431127----5433234-72354541131211111176899999999
#             1cdg_2taa   ------777002112111-----343333---431127----5433234-72354541131211111176899999999
#
# Usage     : ($hash_for_jp, $hash_for_rms) = &open_rms_files(\$any_sequence_file_msf_form);
# Example   :
# Warning   : xxx.rms files are Tim Hubbard's 'msarms' program's output.
# Keywords  :
# Options   :
# Returns   : return(@out); while @out is (\%hash_rms, \%hash_jp)
# Argument  : takes one ref. for a file.
# Version   : 1.0
#--------------------------------------------------------------------
sub open_rms_files2{
	my(@names, $n,$s,@out,$n2,$s2,$n3,$s3, %hash_jp,%hash_rms);
	unless (-e ${$_[0]}){   print chr(7);
	  print "\n\n\t This is sub open_rms_files in th_lib.pl \n\n";
	  print "\n\n\t Fatal: The input file $file1 is not in the directory \n"; exit; }
	open(FILE_1,"${$_[0]}");  	# reading in (rms file)
	while(<FILE_1>){         	# file1 needs to be xxxx.rms for the moment, automatic later
	  if(/^(\w\w\w\w)[\t]* +([\-\w ]+)[\n]$/){ $n=$1; $s=$2; $s =~s/ //g; $hash_jp{$n}.= $s;}
	  elsif(/^(\w\w\w\w_\w\w\w\w+)[\t]* +([\-\d( )\-]+)[\n]$/){
		  $n2=$1; $s2=$2; $s2=~s/ //g; $hash_rms{$n2}.= $s2; }
	  elsif(/^(\w\w\w\w\w)(\w\w\w\w+)[\t]* +([\-\d( )\-]+)[\n]$/){
		  $n3="$1_$2"; $s3=$3; $s3=~s/ //g;   $hash_rms{$n3}.= $s3;  } }
	push(@out, \%hash_rms, \%hash_jp); return(@out);
}



#________________________________________________________________________
# Title     : steve_permute_array  (C) Steve Brenner, copyrighted.
# Usage     : %final_out_hash=%{&steve_permute_array(\@list, \2, \4)};
#                         Above is for pairs, 3 seqs, and 4 seqs.
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : a ref. of a hash.
# Argument  : upto 3 arg. 1st one is for the ref. of an array. 2nd for min
#             element no. 3rd for max element no. 2nd and 3rd are optional.
# Version   : 1.0
#--------------------------------------------------------------------
sub steve_permute_array{
  my(@lst) =@{$_[0]};  my($from)=${$_[1]};  my($to)  =${$_[2]};
  if ($from > $to){
	 print "\n Error, you put higher \$from ($from) than \$to ($to). \$to must be equal or bigger\n
				  than \$from for right permuation\n";  }
  $from--;  # to make min and max numbers sensible.
  my($res, $com, $count, %final_out_hash, @res );
  @res = @{&CombinAll(\@lst, \$from, \$to)};
			#______________________________________________
			 sub CombinAll{
			 my (@lst) = @{$_[0]};
			 my ($from)= ${$_[1]};
			 my ($to)  = ${$_[2]};
			 my ($i, $res, @ret);
				for $i (1 .. $to){  # <<------------  change this to limit the maximum elements
				  push (@ret, @{&Combin(\@lst, $i)});
				  #_____________________________
				  sub Combin{
					 my ($lst)=$_[0];
					 my ($num)=$_[1];
					 my ($i,$elt, @newlst, $res, @ret );
					 return [[]] if $num == 0;
					 foreach $i (0 .. (@$lst-1)) {
						$elt = $lst->[$i];  # the element of interest
						@newlst = @{$lst}[$i+1 .. (@{$lst}-1)];
						$res = &Combin(\@newlst, $num-1);
						foreach $com (@{$res}) {
						splice(@$com,0,0,$elt);
						  push (@ret,$com);
						}
					 }
					 \@ret;
				  }
				  #______________________________
				}
				\@ret;
			  }
			  #_____________________________________________
  foreach $com (@res) {
	 $count++;  my($entry_key)="$count$#{$com}";   # the output will be (eg)  (1 'abc,ccd', 2, 'ccd,efg',,,)
	 $final_out_hash{$entry_key}=join(",", @{$com}) if @{$com} > $from;  }
  \%final_out_hash;
}

#________________________________________________________________________
# Title     : opendir_and_go_in_and_do_something
# Usage     : &opendir_and_go_in_and_do_something(\$input_dir);
#                     $inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files in the dir if you wish,
#             and then go in any other sub
#             if any file(dir) is linked, it skips that file.
# Example   : as in my 'indexing.pl' for perl file indexer.
# Warning   : Seems to work fine., !! Change the name of this sub to shorter one
#                                  !! for your own purpose.
# Keywords  : open_dir_and_go_in_and_do_something,
#             go in there do something, get into subdir and do something.
#             go_in_subdir_and_do_something, recursive execution
# Options   :
# Returns   :
# Argument  : gets a ref. of a scaler (dir name) and returns nothing(void).
# Version   : 1.0
#--------------------------------------------------------------------
sub opendir_and_go_in_and_do_something{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	$original_dir = ${$dir[0]};  ## $dir[0] is a ref. $original_dir
	print ($original_dir);
	@read_files=@{&read_any_dir_simple($original_dir)};
	foreach $file(@read_files){
	  my($realfile1)=$original_dir.'/'.$file;

	  if (-l $realfile1){
		  print "\n\n$realfile1 is linked, skipping. \n";
		  next;
	  }elsif (-d $realfile1){  ## If it is a directory.

		 &opendir_and_go_in_and_do_something(\$realfile1);   # RECURSION occurrs here!!

	  }elsif (-f $realfile1){     #<<------ This is where things match
		 chdir($original_dir);
		 @dir=split(/\//, $realfile1);
		 $dir=$dir[($#dir-1)];  # $dir is the name of directory in where you are.
		  ###  put any program which does something here ###
		  #if(($realfile1 =~/(\.tem)$/)&&(-s $realfile1)){
		  #   print $realfile1, "\n";
		  #   @ref_of_seq = &open_tem_files(\$realfile1);
		  #   print_seq_in_block($ref_of_seq[0]);
		  #}
	  }else{  next;  }
	}
}
#________________________________________________________________________
# Title     : open_subdir_and_go_in_and_do
# Usage     : &opendir_and_go_in_and_do_something(\$input_dir);
#                     $inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files in the dir if you wish,
#             and then go in any other sub
#             if any file(dir) is linked, it skips that file.
# Example   : as in my 'indexing.pl' for perl file indexer.
# Warning   : Seems to work fine., !! Change the name of this sub to shorter one
#                                  !! for your own purpose.
# Keywords  :
# Options   :
# Returns   :
# Argument  : gets a ref. of a scaler (dir name) and returns nothing(void).
# Version   : 1.0
#--------------------------------------------------------------------
sub open_subdir_and_go_in_and_do{
	my($original_dir)=${$_[0]};
	my(@read_files)=@{&read_any_dir(\$original_dir)};
	my(%array_msf,%array_jp, $jp_file , $dir);
	my($id_compos)=0;
	my($error_rate)=0;
	foreach $file(@read_files){
	  my($realfile1)=$original_dir.'/'.$file;
	  if (-l $realfile1){
		  print "\n\n$realfile1 is linked, skipping. \n";
		  next;
	  }elsif (-d $realfile1){  ## If it is a directory.

		 &opendir_and_go_in_and_do_something(\$realfile1);   # RECURSION occurrs here!!

	  }elsif (-f $realfile1){     #<<------ This is where things match
		 chdir($original_dir);
		 @dir=split(/\//, $realfile1);
		 $dir=$dir[($#dir-1)];  # $dir is the name of directory in where you are.
		  ###  put any program which does something here ###
		  #if(($realfile1 =~/(\d+\-$no\.msf)$/)&&(-s $realfile1)){
		  #}
	  }else{  next;  }
	}
}
#________________________________________________________________________
# Title     : get_occurances_of_shift_type_hash
# Usage     : for single protein group
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one reference  of hash. (eg, 0=>1000, 1=>888, 2=>83, ...
#                                          0,1,2... are position shift types
#                                          1000, 888, 83... are occurances in
#                                          the comparision between str. and seq.
#                                          alignments.)
# Argument  : Two references of hashes.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_occurances_of_shift_type_hash{  # minimum sub of 'get_posi_shift_rate'
  my(%array1)=%{$_[0]};   # %array1 = seq1 BCC..D, seq2 DD..FD, seq3 ..LJK..KJLJ
  my(%array2)=%{$_[1]};   # %array2 = seq1 B..CCD, seq2 DD..FD, seq3 KJ..LKKJL..J
  my(@whole_length)=();    # %array2 is the structural alignments(from jp files)
  my(%array3)=();
  my(@string1, @string2, @seq_position1,
	  @seq_position2, $len_of_seq,
	  @position_diffs, @position_corrected1, @names
  );
  %array1 = %{&hash_common(\%array1, \%array2)};
  %array2 = %{&hash_common(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)}; # this removes wrong gaps(in '.' form, in MSF)
  %array2 = %{&remov_com_column(\%array2)};

  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});
	  @string2 =split('', $array2{$name}); # ! @string2 is the structural. ! (used)

			 @seq_position1  = @{&get_posi_sans_gaps(\$array1{$name})};
			 @seq_position2  = @{&get_posi_sans_gaps(\$array2{$name})}; # @seq_position2 is structural

			  $len_of_seq =($#seq_position2+1);
					 push(@whole_length, $len_of_seq);

			 @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
			 @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
			 #print "@position_corrected1";
			 $array3{$name}=join(",", @position_corrected1); # array3 is for disply of seq.
  }                      # !! split and join char is ',';

  # %array3 has the form.  These numbers are position differences between the same sequences
  #                        one from str. one from seq.
  # seq1  1,1,2,3,.,2,3,.,1,.,0,0,0,1,1,1,1,1,2
  # seq2  1,1,2,1,.,1,3,.,1,.,0,0,1,0,1,1,1,3,2
  # seq3  1,1,2,3,.,2,3,.,1,.,1,1,0,0,1,1,1,3,2
  my(%final_posi_diffs) =%{&get_posi_diff_hash(\%array3)};
  my($sum_of_posi_diffs)=${&sum_hash(\%final_posi_diffs)};
  my($av_of_posi_diffs) =$sum_of_posi_diffs/($#names); # dividing by seq number.
  my($sum_seq_length)   =${&sum_array(\@whole_length)};
  my($av_rate)          =$av_of_posi_diffs/($sum_seq_length);
  &print_seq_in_block(\%final_posi_diffs); # <--- leave this
  for (values %final_posi_diffs){
	 my(@splited) = split(',', $_);
			for (@splited){
			  $out{$_}++ if ($_ =~ /\d+/);
			}
  }
  return(\%out);
}
#________________________________________________________________________
# Title     : get_occurances_of_shift_type_hash_all
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_occurances_of_shift_type_hash_all{  # minimum sub of 'get_posi_shift_rate'
  my(%array1)=%{$_[0]};   # %array1 = seq1 BCC..D, seq2 DD..FD, seq3 ..LJK..KJLJ
  my(%array2)=%{$_[1]};   # %array2 = seq1 B..CCD, seq2 DD..FD, seq3 KJ..LKKJL..J
  my(@whole_length)=();    # %array2 is the structural alignments(from jp files)
  my(%array3)=();
  my(@string1, @string2, @seq_position1,
	  @seq_position2, $len_of_seq,
	  @position_diffs, @position_corrected1, @names
  );
  %array1 = %{&hash_common(\%array1, \%array2)};
  %array2 = %{&hash_common(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)}; # this removes wrong gaps(in '.' form, in MSF)
  %array2 = %{&remov_com_column(\%array2)};

  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});
	  @string2 =split('', $array2{$name}); # ! @string2 is the structural. ! (used)

			 @seq_position1  = @{&get_posi_sans_gaps(\$array1{$name})};
			 @seq_position2  = @{&get_posi_sans_gaps(\$array2{$name})}; # @seq_position2 is structural

			  $len_of_seq =($#seq_position2+1);
					 push(@whole_length, $len_of_seq);

			 @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
			 @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
			 #print "@position_corrected1";
			 $array3{$name}=join(",", @position_corrected1); # array3 is for disply of seq.
  }                      # !! split and join char is ',';

  # %array3 has the form.  These numbers are position differences between the same sequences
  #                        one from str. one from seq.
  # seq1  1,1,2,3,.,2,3,.,1,.,0,0,0,1,1,1,1,1,2
  # seq2  1,1,2,1,.,1,3,.,1,.,0,0,1,0,1,1,1,3,2
  # seq3  1,1,2,3,.,2,3,.,1,.,1,1,0,0,1,1,1,3,2
  my(%final_posi_diffs) =%{&get_posi_diff_hash(\%array3)};
  my($sum_of_posi_diffs)=${&sum_hash(\%final_posi_diffs)};
  my($av_of_posi_diffs) =$sum_of_posi_diffs/($#names); # dividing by seq number.
  my($sum_seq_length)   =${&sum_array(\@whole_length)};
  my($av_rate)          =$av_of_posi_diffs/($sum_seq_length);
  &print_seq_in_block(\%final_posi_diffs); # <--- leave this
  for (values %final_posi_diffs){
	 my(@splited) = split(',', $_);
			for (@splited){
			  $out{$_}++ if ($_ =~ /\d+/);	 }
  }
  # the final result is %out which has accumulated entries with occurances
}



#________________________________________________________________________
# Title     : get_occurances_of_char
# Usage     : %occurances_shft_type=%{&get_occurances_of_char(\%final_posi_diffs)};
#             %char_occur=%{&get_occurances_of_char(\@ref_array_of_chars)};
#             %char_occur=%{&get_occurances_of_char(\$ref_string_of_chars)};
#             %char_occur=%{&get_occurances_of_char($string_of_chars)};
#
# Function  : gets the numbers of occurances for 1, 2, 3 ... position shifts.
#             If hash is given, it only looks at the values.
#             If multiple string, array, hash or combinations of these
#              are given, it will add up to one single result
# Example   :
# Warning   :
# Keywords  : composition of chars, composition table making,
#             make_composition, make composition table
#             occurances_of_char, get_char_occurances, occurances
#             get_percentage_occurances_of_char, percentage_occurances_of_char
# Options   : 'p' for percentage output of the char among others
#             'n' for NO name option when HASH input is given
# Returns   : one ref. of hash  (a =>5, b=>6, c=>4,,,,,)
# Argument  : one ref. of hash (seq1 alsdfjlsj
#                               seq2 asldfjsld
#                               seq3 owiurouou);
# Version   : 1.3
#--------------------------------------------------------------------
sub  get_occurances_of_char{
  my ($i, %H, $no_name, %out, $N,@splited, $val,$percentage_out,
	 $split, $sum);
  for($i=0; $i< @_; $i++){
	if($_[$i]=~/^[\-]?p$/i){
	   $percentage_out=1;   splice(@_, $i, 1); 	   $i--;
	}elsif($_[$i]=~/^[\-]?n$/i){
	   $no_name=1;	        splice(@_, $i, 1);	   $i--;
	}
  }

  for($i=0; $i< @_; $i++){
	if(  ref($_[$i]) eq 'HASH'){
	  my %H=%{$_[$i]};
	  my @names=keys %H;
	  for $key (@names){
		 for $split ( split(//, $H{$key}) ){
			if($no_name==1){ $N=$split
			}else{ $N="$key\_$split"; }
		    $out{$N}++; $sum++
		 }
	  }
	}elsif(ref($_[$i]) eq 'ARRAY'){
	  @splited=@{$_[$i]};
	  for $split (@splited){  $out{$split}++; $sum++ }
	}elsif(ref($_[1]) eq 'SCALAR'){
	   @splited = split(//, ${$_[$i]});
	   for $split (@splited){  $out{$split}++; $sum++ }
	}elsif( !(ref($_[$i])) ){
	   @splited = split(//, $_[$i]);
	   for $split (@splited){  $out{$split}++; $sum++ }
	}
  }
  if($percentage_out==1){
	 my @keys=keys %out;
	 my %percent;
	 for($i=0; $i< @keys; $i++){
		$percent{$keys[$i]} = $out{$keys[$i]}/$sum*100;
	 }
	 return(\%percent);
  }else{
	 return(\%out);
  }
}


#________________________________________________________________________
# Title     : make_composition_table
# Usage     : %occurances=%{&make_compos_table(\%key_and_value_for_seq)};
# Function  : gets the numbers of occurances for 1, 2, 3 ... position shifts.
# Example   :
# Warning   :
# Keywords  : composition of chars, composition table making, make composition table
#             make_composition_table, get_composition, get_amino_acid_composition
#             protein_composition, make_aa_composition_tablem, aa_composition
# Options   :
# Returns   : one ref. of hash  (a =>5, b=>6, c=>4,,,,,)
# Argument  : one ref. of hash (seq1 alsdfjlsj
#                               seq2 asldfjsld
#                               seq3 owiurouou);
# Version   : 1.2
#--------------------------------------------------------------------
sub  make_composition_table{
  my %input = %{$_[0]};
  my (@splited, $split, %out );
  for (values %input){
	 @splited = split(//, $_);
	 for $split (@splited){  $out{$split}++; }
  }
  return(\%out);
}
#________________________________________________________________________
# Title     : make_composition_ratio_table_simple
# Usage     : %occurances=%{&make_compos_ratio_table(\%final_posi_diffs)};
# Function  : gets ratio of the numbers of occurances for any chars.
# Example   :
# Warning   : This pools all the sequences, to not distinct seq composition if
#              you put more than one seq.
# Keywords  : composition table, composition of chars, composition table making,
#             make composition table, make_composition_table
# Options   :
# Returns   : one ref. of hash  (a =>0.05, b=>0.06, c=>0.04,,,,,)
# Argument  : one ref. of hash (seq1 alsdfjlsj
#                               seq2 asldfjsld
#                               seq3 owiurouou);
# Version   : 1.0
#--------------------------------------------------------------------
sub  make_composition_ratio_table_simple{
  my %input = %{$_[0]};
  my %out;
  my (@keys, $i, %ratio_out, $each_char_occur, @splited, $split, $all_occur );
  for (values %input){
	 @splited = split(//, $_);
	 for $split (@splited){  $out{$split}++; $all_occur ++; }
  }
  @keys = keys %out;
  for ($i=0; $i < @keys; $i ++){
	 $each_char_occur = $out{$keys[$i]};
	 $ratio_out{$keys[$i]} = $each_char_occur/$all_occur;
  }
  \%ratio_out;
}

#________________________________________________________________________
# Title     : make_composition_ratio_table
# Usage     : %rate=%{&make_compos_ratio_table(\%hash1, \%hash2, ,,,)};
# Function  : gets ratio of the numbers of occurances for any chars.
# Example   :
# Warning   : This produces each composition ration table for each seq
# Keywords  : composition table, composition of chars, composition table making,
#             make composition table, make_composition_table
#             aa_composition_ratio, composition_ratio, protein_composition,
#             get_composition_ratio, get_aa_composition_ratio
# Options   :
# Returns   : one ref. of hash  ('seq_name', { a =>0.05, b=>0.06, c=>0.04,,,,, } )
# Argument  : one or more ref. of hash (seq1 alsdfjlsj
#                                       seq2 asldfjsld
#                                       seq3 owiurouou);
# Version   : 1.3
#--------------------------------------------------------------------
sub  make_composition_ratio_table{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my %final_out, %ratio_out;

  for($i=0; $i < @hash; $i ++){ ## @hash has [\%seq1, \%seq2,....]
	 my %input = %{$hash[$i]};      ## taking single hash
	 print "\n",__LINE__, " \%input has ", %input, "\n" if ($debug == 1);
	 my (%out, $all_occur);
	 my @seq_names = keys %input;     ## taking Seq names from each hash input.
	 print "\n",__LINE__, " \@seq_names has ", @seq_names, "\n" if ($debug == 1);
	 for( $j=0; $j < @seq_names; $j++){
		 %ratio_out=();
		 my $split;
		 my $name = $seq_names[$j];
		 my $seq = $input{$seq_names[$j]};
		 my @splited = split(//, $seq);
		 for $split (@splited){  $out{$split}++; $all_occur ++; }
		 my @amino_residue = keys %out;
		 print "\n",__LINE__, " \@amino_residue ", @amino_residue , "\n" if ($debug == 1);
		 for ($k=0; $k < @amino_residue; $k ++){
	      my $each_char_occur = $out{$amino_residue[$k]};
	      $ratio_out{$amino_residue[$k]} = $each_char_occur/$all_occur;
		 }
		 $final_out{$name}=\%ratio_out;
	 }
  }
  if(@hash==1){ return(\%ratio_out); }
  else{ \%final_out } #  \%final_out has ('seqname', \%hash_of_ratio_of_amino_acids )
}

#________________________________________________________________________
# Title     : get_position_shift_rate (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_position_shift_rate(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment. Takes two file names of seq.
#             Output >>
#             seq1_seq2  1110...222...2222
#             seq2_seq3  1111....10...1111
#             seq1_seq3  1111....0000.0000
#
# Example   : my(%error_rate)=%{&get_position_shift_rate(\%input, \%input2)};
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   : 'ss' for secondary structure regions(Helix and Beta region only
#                 calculation for error rate). There is specialized sub called
#              get_segment_shift_rate for sec. str. only handling.
#
#    $ss_opt            becomes    ss by  ss, SS, -ss, -SS     #  for secondary structure only
#    $H                 =         'H' by   -H or -h or H       # to retrieve only H segment
#    $S                 becomes   'S' by   -S or  S            # to retrieve only S segment
#    $E                 becomes   'E' by   -E or  E            # to retrieve only E segment
#    $T                 becomes   'T' by   -T or -t or T or t  # to retrieve only T segment
#    $I                 becomes   'I' by   -I or  I            # to retrieve only I segment
#    $G                 becomes   'G' by   -G or -g or G or g  # to retrieve only G segment
#    $B                 becomes   'B' by   -B or -b or B or b  # to retrieve only B segment
#    $HELP              becomes    1  by   -help   # for showing help
#    $simplify          becomes    1  by   -p or P or -P, p
#    $simplify          becomes    1  by   -simplify or simplify, Simplify SIMPLIFY
#    $comm_col          becomes   'C' by   -C or C or common
#    $LIMIT             becomes    L  by   -L, L               # to limit the error rate to 9 .
#
# Returns   : \%final_posi_diffs;
# Argument  : %{&get_position_shift_rate(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
# Version   : 1.5
#--------------------------------------------------------------------
sub get_position_shift_rate{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	print __LINE__," \$char_opt is  \"$char_opt\" in get_position_shift_rate\n" if $debug eq 1;
	print __LINE__," \@string is  \"@string\" in get_position_shift_rate\n" if $debug eq 1;
	print __LINE__," \$LIMIT is  \"$LIMIT\" in get_position_shift_rate\n" if $debug eq 1;

	my(%arraySEQ)=%{$hash[0]};
	my(%arraySTR)=%{$hash[1]};
	my($gap_char, %final_posi_diffs, @stringSTR,@stringSEQ,@seq_positionSEQ,
		@seq_positionSTR,$len_of_seq, @position_diffs, @position_corrected1,
		@names, @whole_length, %array3, @keys_common, %DSSP_common, @stringDSSP_common);

	$gap_char='.';

	%arraySTR = %{&hash_common_by_keys(\%arraySTR, \%arraySEQ)};
	%arraySEQ = %{&hash_common_by_keys(\%arraySEQ, \%arraySTR)};
	%arraySEQ = %{&remov_com_column(\%arraySEQ)};
	%arraySTR = %{&remov_com_column(\%arraySTR)};

	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	if($debug eq 1){
		print __LINE__,
		" ## sorting sequence names. To make things constant. \n\n";  }
	@names= sort keys %arraySTR;
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	#  If common column of secondary structure representation option $comm_col is set
	#  open_dssp_files sub routine will get the common seq parts of all the sequences.
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	if($comm_col =~ /C/i){
		%DSSP_common=%{&open_dssp_files( @names, $H, $S, $E, $T, $I, $G, $B, $simplify, 'C')};
		@keys_common= keys %DSSP_common;
		@stringDSSP_common = split(/|\,/, $DSSP_common{$keys_common[0]});
		if($debug2 eq 1){ print __LINE__," \$comm_col is set to: $comm_col \n";
			print __LINE__," \@stringDSSP_common is :@stringDSSP_common \n";
		}
	}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	# Comparing two hashes
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	for $name (@names){
		#"""""""""""""""" Splitting the sequence string
		if($arraySEQ{$name}=~/\,\S+\,/){
			@stringSEQ =split(/\,/, $arraySEQ{$name});
			@stringSTR=split(/\,/, $arraySTR{$name});  }
		else{
			@stringSEQ =split(//, $arraySEQ{$name});
			@stringSTR=split(//, $arraySTR{$name});
		}
		print "\n",__LINE__, " \@stringSEQ  is  @stringSEQ \n" if $debug2 eq 1;
		print "\n",__LINE__, " \@stringSTR  is  @stringSTR \n" if $debug2 eq 1;

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		#   Contracting  the SEQ.
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@seq_positionSEQ = @{&get_posi_sans_gaps(\$arraySEQ{$name})};
		@seq_positionSTR = @{&get_posi_sans_gaps(\$arraySTR{$name})};

		#"""""""""""""""" To get secondary structure only calc  """"""""""""""""""""""""""""
		# It superposes the NON sec. region on  @seq_positionSTR to nullify positions.
		#  get_posi_diff ignores non char positions in calc.
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if( ($ss_opt =~ /ss$/i) && ($comm_col !~ /C/i) ){
			%DSSP=%{&open_dssp_files($name, $H, $S, $E, $T, $I, $G, $B, $simplify, $comm_col)};
			if($debug1 eq 1){
			   print "\n",__LINE__," open_dssp_files has options \$H ->$H \$S->$S \$E->$E \n";
			   print "\n",__LINE__," \$T->$T \$I->$I \$G->$B \$simplify->$simplify \$comm_col ->$comm_col\n";
			   &show_hash( \%DSSP );
			}
			if(ref(\%DSSP) eq 'HASH'){ # to check it %DSSP was valid, If not it skips overlaying
				@stringDSSP = split(/|\,/, $DSSP{$name});
				$size_of_stringDSSP = @stringDSSP;
				$size_of_seq_positionSTR = @seq_positionSTR;
				if($debug2 eq 1){
					  print "\n",__LINE__," \@stringDSSP is \n @stringDSSP\n";
					  print "\n",__LINE__," Size of \@stringDSSP      is $size_of_stringDSSP\n" ;
					  print "\n",__LINE__," Size of \@seq_positionSTR is $size_of_seq_positionSTR\n";
					  print "\n",__LINE__," \$gap_char is \"$gap_char\" \n" ;
				}
				#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
				#   When the sec. str is not defined in DSSP, I delete the position of
				#   @stringDSSP to gap(ie. make it blank to exclude error rate calc)
				#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
				for($i=0; $i < @stringDSSP; $i++){
					if($stringDSSP[$i] =~ /\W/){ $seq_positionSTR[$i]= $gap_char;}
				}
			}
		}elsif( $comm_col =~ /C/i){
				print __LINE__, " Replacing position with \gap_char \"$gap_char\"\n" if $debug2 eq 1;
				$ss_opt = 'ss'; # whether it was set or not, make it 'ss'
				for($i=0; $i < @stringDSSP_common; $i++){
					if($stringDSSP_common[$i] =~ /\W/){ $seq_positionSTR[$i]= $gap_char;}
				}
		}

		if($debug2 eq 1){
			print __LINE__,
			print " \@seq_positionSTR is  @seq_positionSTR\n";
		}

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		#   getting Position differences.
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@position_diffs  = @{&get_posi_diff(\@seq_positionSEQ, \@seq_positionSTR)};

		if($debug2 eq 1){
			print __LINE__,
			print " \@position_diffs is  @position_diffs\n";
		}

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		#  You can have two types of output according to which alignment you compare your
		#   error rates. (1) Compare to @stringSEQ   (2) @stringSTR
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@position_corrected1 = @{&put_position_back_to_str_seq(\@stringSEQ, \@position_diffs)};
		$array3{$name}=join(",", @position_corrected1);

	}
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	# The final Step for error rate, $LIMIT is to confine error rate in one digit (ie, less than 10)
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	%final_posi_diffs =%{&get_residue_error_rate(\%array3, $LIMIT)};

	undef(@whole_length, $len_of_seq);
	return(\%final_posi_diffs);
}

#________________________________________________________________________
# Title     : get_posi_rates_hash_out (derived from 'get_posi_shift_hash' )
# Usage     : %rate_hash = %{&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  : This is to get position specific error rate for line display rather than
#             actual final error rate for the alignment.
#             Output >>
#             seq1_seq2  1110...222...2222
#             seq2_seq3  1111....10...1111
#             seq1_seq3  1111....0000.0000
#
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : \%final_posi_diffs;
# Argument  : %{&get_posi_rates_hash_out(\%msfo_file, \%jpo_file)};
#             Whatever the names, it takes one TRUE structral and one ALIGNED hash.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_rates_hash_out{
  my(%array1)=%{$_[0]};
  my(%array2)=%{$_[1]};
  my(@string1, @string2, @seq_position1, @seq_position2,
	  $len_of_seq,@position_diffs, @position_corrected1,
	  @names, @whole_length, %array3);
  %array1 = %{&hash_common_by_keys(\%array1, \%array2)};
  %array2 = %{&hash_common_by_keys(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};
  %array2 = %{&remov_com_column(\%array2)};
  @names= keys %array2;
  for $name (@names){
	  @string1 =split('', $array1{$name});
	  @string2 =split('', $array2{$name});
	  @seq_position1 = @{&get_posi_sans_gaps(\$array1{$name})};
	  @seq_position2 = @{&get_posi_sans_gaps(\$array2{$name})};
	  $len_of_seq =(@seq_position2);
	  push(@whole_length, $len_of_seq);
	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
	  $array3{$name}=join(",", @position_corrected1);  }
  my(%final_posi_diffs)=%{&get_each_posi_diff_hash(\%array3)};
  undef(@whole_length, $len_of_seq);
  return(\%final_posi_diffs);
}

#________________________________________________________________________
# Title     : get_posi_diff_hash
# Usage     : %position_diffs =\{&get_posi_diff_hash(\@seq_position1, \@seq_position2)};
# Function  : gets a ref. of a hash and calculates the position diffs.
# Example   :
# Warning   : split and join char is ',';    # used in 'get_posi_shift_hash'
# Keywords  :
# Options   :
# Returns   : one ref. for an array of differences of input arrays. array context.
#             ---Example input (a hash with sequences); The values are differences after comparion
#                                            with structural and sequential alignments.
#             %diffs =('seq1', '112342431111
#             'seq2', '12222...09011.1122',
#             'seq3', '13222...00011.1122',
#             'seq4', '12262...00011.112.');
#             example output;
#             seq3_seq4       01040...00000.000.
#             seq1_seq2       01012...1810...122
#             seq1_seq3       02012...1110...122
#             seq1_seq4       01032...1110...12.
#             seq2_seq3       01000...09000.0000
#             seq2_seq4       00040...09000.000.
# Argument  : Takes a ref. for hash which have positions of residues of sequences.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_diff_hash{
	if(ref($_[0]) ne 'HASH' ){
		print "\n Arg not not hash ref in get_posi_diff_hash \n\n";print chr(7);exit;}
	else{   my(%diffs)= %{$_[0]};  my(@names)= keys (%diffs);
	  my(%seqs_compared_in_pair)=();  my(@temp, @temp2);
	  for ($i=0; $i < @names; $i ++){
		 if($diffs{$names[$i]} =~/\,/){  @temp = split(',', $diffs{$names[$i]});}
		 else{ @temp = split('', $diffs{$names[$i]}); }  (@{"string$i"})=@temp; }
		 for ($i=0; $i < @names; $i++){
			for ($j=$i+1; $j < @names; $j ++){
			for ($k=0; $k < @string0; $k ++){
			  if ((${"string$i"}[$k] =~ /[-\d+]/) && (${"string$j"}[$k] =~ /[-\d+]/)){
				  my($diff) = abs(${"string$i"}[$k] - ${"string$j"}[$k]);
				  push(@temp2, $diff); }
			  else{  push(@temp2, '.'); }    }
			$seqs_compared_in_pair{"$names[$i]\_$names[$j]"}=join(",", @temp2) if $names[$i] < $names[$j];
			$seqs_compared_in_pair{"$names[$j]\_$names[$i]"}=join(",", @temp2) if $names[$i] > $names[$j];
			@temp2=();     }    }
	 \%seqs_compared_in_pair; }
}

#________________________________________________________________________
# Title     : get_posi_shift_hash  (bug free!!)
# Usage     : $rate_final = ${&get_posi_shift_hash(\%hash_msf, \%hash_jp)};
# Function  :
# Example   :
# Warning   : split and join char is ','; (space)
# Keywords  :
# Options   :
# Returns   : One scalar value of shift rate of position for proteins.
# Argument  : takes two hash REFERENCES for (one seq. and one struc. alignment(2nd arg)
# Version   : 1.1
#--------------------------------------------------------------------
sub get_posi_shift_hash{
  my(%array3, @whole_length);
  my(%array1)=%{$_[0]};   # %array1 = seq1 BCC..D, seq2 DD..FD, seq3 ..LJK..KJLJ
  my(%array2)=%{$_[1]};   # %array2 = seq1 B..CCD, seq2 DD..FD, seq3 KJ..LKKJL..J
  my(@string1, @string2, @seq_position1, @seq_position2, $len_of_seq, @names,
	  @position_diffs, @position_corrected1 );
  %array1 = %{&hash_common(\%array1, \%array2)}; # %array2 is from jp (structural)
  %array2 = %{&hash_common(\%array2, \%array1)};
  %array1 = %{&remov_com_column(\%array1)};
  %array2 = %{&remov_com_column(\%array2)};  @names= keys %array2;

  for $name (@names){
	  @string1 =split('', $array1{$name});
	  @string2 =split('', $array2{$name}); # ! @string2 is the structural. ! (used)

	  @seq_position1  = @{&get_posi_sans_gaps(\$array1{$name})};
	  @seq_position2  = @{&get_posi_sans_gaps(\$array2{$name})}; # @seq_position2 is structural

	  $len_of_seq = @seq_position2;
	  push(@whole_length, $len_of_seq);

	  @position_diffs = @{&get_posi_diff(\@seq_position1, \@seq_position2)};
	  @position_corrected1 = @{&put_position_back_to_str_seq(\@string2, \@position_diffs)};
	  $array3{$name}=join(",", @position_corrected1); # array3 is for disply of seq.
  }                      # !! split and join char is ',';

  # %array3 has the form.  These numbers are position differences between the same sequences
  #                        one from str. one from seq.
  # seq1  1,1,2,3,.,2,3,.,1,.,0,0,0,1,1,1,1,1,2
  # seq2  1,1,2,1,.,1,3,.,1,.,0,0,1,0,1,1,1,3,2
  # seq3  1,1,2,3,.,2,3,.,1,.,1,1,0,0,1,1,1,3,2
  my(%final_posi_diffs) =%{&get_posi_diff_hash(\%array3)};
  my($sum_of_posi_diffs)=${&sum_hash(\%final_posi_diffs)};
  my($av_of_posi_diffs) =$sum_of_posi_diffs/(@names); # dividing by seq number.
  my($sum_seq_length)   =${&sum_array(\@whole_length)};
  my($av_rate)          =$av_of_posi_diffs/($sum_seq_length);
  return(\$av_rate);
}



#________________________________________________________________________
# Title     : print_seq_in_block_with_print
# Usage     : &print_seq_in_block (\%input_hash1,\%input_hash2, \%input_hash3.... );
# Function  : gets a ref(s) for hash and prints the content in lines of 60 char
# Example   :
# Warning   : derived from  print_in_block
# Keywords  :
# Options   :
# Returns   : Nothing, STDOUT
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub print_seq_in_block_with_print{
  my(%input)=%{$_[0]};
  my(@names)=keys(%input);
  my($larg, $offset, $diff, $gap_char);
  my($seq)=0;
  $gap_char='.';  # <-- setting default gap character.
  for ($i=0; $i <=$#names; $i++){
	 $larg = length($input{$names[$i]}) if length($input{$names[$i]}) > $larg;
	 if ($input{$names[$i]} =~ /\-/){
		$gap_char='-';
	 }elsif($input{$names[$i]} =~ /\./){
		$gap_char='.';
	 }
  }

		  ######====== filling the end part gaps in shorter sequences #####
	for ($i=0; $i <=$#names; $i++){
	  if (length($input{$names[$i]}) < $larg){
				$offset=length($input{$names[$i]});
					  $diff=$larg-$offset;
					  substr($input{$names[$i]}, $offset, $larg)= "$gap_char" x $diff;
			 }
	}
		  ######====== filling the end part gaps in shorter sequences #####

  for ($k=0; $k < $larg; $k+=60){     # 60 residues interval
	 for($i=0; $i <=$#names; $i++){    # number of sequences
		print $names[$i], "  \t";       # 2 spaces and tab between the names and sequences.
		$input{$names[$i]}=~ s/\n//g;
		$seq = substr($input{$names[$i]}, $k, 60);
		print $seq;
		print "\n";                     # put a new line.
	 }
	 print "\n";                       # next block starts.
  }
}

#________________________________________________________________________
# Title     : fill_ending_space
# Usage     : (*out, *out2, *out3)=&fill_ending_space(\%input1, \%input2, \%input3);
#             &print_seq_in_block(\%out,\%out2,\%out3); <-- if you want printout.
# Function  : fills the ending gaps or space of sequences (shorter ones)
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : (\%hash1,..... )
# Argument  : (\%input1, \%input2, \%input3.....);
# Version   : 1.0
#--------------------------------------------------------------------
sub fill_ending_space{
  my(@out_hash_list);
  my($gap_char,$larg);
  for($a=0; $a<@_; $a++){
	 my(%hash)=%{$_[$a]};
	 my(@names)=keys(%hash); $gap_char='.';
		for ($i=0; $i < @names; $i++){
		  if (length($hash{$names[$i]}) > $larg){ $larg =length($hash{$names[$i]});}
		  if($hash{$names[$i]} =~ /\-/){  $gap_char='-'; }
		  elsif($hash{$names[$i]} =~ /\./){ $gap_char='.'; }
		  $hash{$names[$i]}=~s/\s/$gap_char/g;  }
		for ($i=0; $i < @names; $i++){
		  if (length($hash{$names[$i]}) < $larg){ $offset=length($hash{$names[$i]});
				$diff=$larg-$offset;
				if ($diff == 0){ next; }
				substr($hash{$names[$i]}, $offset, $larg)= "$gap_char" x $diff; } }
		push(@out_hash_list, \%hash);  }
  if($#_ == 0){ return(\%hash); }
  elsif($#_ > 0){ return(@out_hash_list); } # <-- contains (\%out_seq0, \%out_seq1, \%out_seq2, .... )
}

#________________________________________________________________________
# Title     : print_seq_in_block_old
# Usage     : &print_seq_in_block_old (\%input_hash1,\%input_hash2, \%input_hash3.... );
# Function  : gets a ref(s) for hash (single key and value)
#             and prints the content in lines of 60 char
# Example   :
# Warning   : This is more or less for debugging. Use  print_seq_in_block
# Keywords  :
# Options   :
# Returns   : Nothing, STDOUT
# Argument  : one or more refs. for hash
#               if there are more than one array input it makes such outputs
#
#             Name1    THIS.IS.from.hash.one
#             Name2    This
#
#             Name1    THIS
#             Name2    This.is.from.hash.two
#
# Version   : 1.0
#--------------------------------------------------------------------
sub print_seq_in_block_old{
  for ($i=0; $i <=$#_; $i++){
	 (${"name$i"}, ${"seq$i"}) = each (%{$_[$i]});
	 ${"seq$i"}=~ s/\n//g;               # rid of new line.
			@{"seq_array$i"}= split('', ${"seq$i"});
	 $larg = $#{"seq_array$i"} if ($#{"seq_array$i"} > $larg);
  }    # results = %hash1, %hash2, %hash3, ...
  for ($k=0; $k<=$larg; $k+=60){        # 60 residues interval
	 for($i=0; $i <=$#_; $i++){          # number of sequences
		print ${"name$i"}, "  \t";        # space between the names and sequences.
		for ($j=$k; $j<=($k+59); $j++){   # 1 to 60 and 61 to 120 etc...
		  print ${"seq_array$i"}[$j];     # prints one line
		}
		print "\n";                       # put a new line.
	 }
	 print "\n";                         # next block starts.
  }
}
#________________________________________________________________________
# Title     : print_in_block   (look at print_seq_in_block !!)
# Usage     : &print_in_block (\@input_array,\@input_array2, \@input_array3.... );
# Function  : gets a ref(s) for array and prints the content in lines of 60 char
# Example   :
# Warning   : This is more or less for debugging. Use  print_seq_in_block
# Keywords  :
# Options   :
# Returns   : Nothing, STDOUT
# Argument  : one or more refs. for array
#               if there are more than one array input it makes such outputs
#             Example out)
#               THIS.IS.from.array.one
#             This.is.from.array.two
#
#              THIS.IS.from.array.one
#               This.is.from.array.two
#
# Version   : 1.0
#--------------------------------------------------------------------
sub print_in_block{
  for ($i=0; $i <=$#_; $i++){
	 @{"arr$i"}= @{$_[$i]}; my($larg) = $#{"arr$i"} if ($#{"arr$i"}>$larg);}
  for ($k=0; $k <=$larg; $k+=60){       # 60 residues interval
	 for($i=0; $i <=$#_; $i++){  print "name$i", "  \t";
		for ($j=$k; $j <=($k+59); $j++){ print ${"arr$i"}[$j]; }
		  print "\n";  } print "\n"; }
}

#________________________________________________________________________
# Title     : get_posi_diff    # used in 'get_posi_shift_hash'
# Usage     : @position_diffs =&get_posi_diff(\@seq_position1,\@seq_position2);
# Function  :
# Example   : @compacted_posi_dif =(1 ,2, 1, 1, '.' ,2,  1,  1, '.');
#             @compacted_posi_dif2=(4 ,2, 1, 1, ,2,  1, '.' ,3,  1);
#             output ==> ( 3 0 0 0 . 1 . 2 .)   (it ignores positions which have non digits.
#             output ==> (-3 0 0 0 . 1 .-2 .) when abs is not used.
# Warning   :
# Keywords  :
# Options   :
# Returns   : one ref. for an @array of differences of input arrays. array context.
# Argument  : Takes two ref. for arrays which have positions of residues.
# Version   : 1.4
#--------------------------------------------------------------------
sub get_posi_diff{
	my(@positions1)=@{$_[0]};
	my(@positions2)=@{$_[1]};
	my(@num_diffs_between_str_and_ali, $diff, $z, $gap_char);
	if($debug eq 1){
	  print __LINE__, " # get_posi_diff : \n";
	}
	$gap_char = '.';
	for ($z=0; $z < @positions2; $z++){
	  if (($positions1[$z] =~ /\d+/) && ($positions2[$z] =~ /\d+/)){
		  $diff=($positions1[$z] - $positions2[$z]);
		  push(@num_diffs_between_str_and_ali, $diff );
	  }else{
		  push(@num_diffs_between_str_and_ali, $gap_char);
	  }
	}
	\@num_diffs_between_str_and_ali;
}

#________________________________________________________________________
# Title     : get_posi_diff_abs    # used in 'get_posi_shift_hash'
# Usage     : @position_diffs =&get_posi_diff_abs(\@seq_position1,\@seq_position2);
# Function  :
# Example   : @compacted_posi_dif =(1 ,2, 1, 1, '.' ,2,  1,  1, '.');
#             @compacted_posi_dif2=(4 ,2, 1, 1, ,2,  1, '.' ,3,  1);
#             output ==> ( 3 0 0 0 . 1 . 2 .)   (it ignores positions which have non digits.
#             output ==> (-3 0 0 0 . 1 .-2 .) when abs is not used.
# Warning   :
# Keywords  :
# Options   :
# Returns   : one ref. for an @array of differences of input arrays. array context.
# Argument  : Takes two ref. for arrays which have positions of residues.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_diff_abs{
	 my(@positions1)=@{$_[0]};  my(@positions2)=@{$_[1]};
	 my(@num_diffs_between_str_and_ali, $diff);
	 for ($z=0; $z<=$#positions2; $z++){
	  if (($positions1[$z] =~ /\d+/) && ($positions2[$z] =~ /\d+/))
	  {  # following abs is important for the behaviour of the output.
		  #push(@num_diffs_between_str_and_ali, (($positions1[$z] - $positions2[$z])) );
		  push(@num_diffs_between_str_and_ali, (abs($positions1[$z] - $positions2[$z])) );
	  }
 }
 return(\@num_diffs_between_str_and_ali);
}


#________________________________________________________________________
# Title     : put_position_back_to_str_seq ( put_posi_back_to_str_seq )
# Usage     : @result =@{&put_position_back_to_str_seq(\@string_from_struct, \@compacted_posi_dif)};
# Function  :
# Example   : @string_from_struct=('X', 'T', 'A' ,'B' , '.' ,'F',  'G', '.' , 'O' ,'P', '.');
#             @compacted_posi_dif=(1 ,2, 1, 1, ,2, 1, 1, 1);
# Warning   :
# Keywords  :
# Options   :
# Returns   : a ref. for an array
# Argument  : takes two refs for arrays (one for char the other for digits
# Version   : 1.0
#--------------------------------------------------------------------
sub put_position_back_to_str_seq{
  my(@string_from_struct)=@{$_[0]};
  my(@compacted_posi_dif)=@{$_[1]};
  my($j)=0; my($char)=0; my($i);
  for ($i=0; $i < @string_from_struct; $i++){
	 $char = $string_from_struct[$i];
	 if ($char =~ /\w/){
		 $string_from_struct[$i] = $compacted_posi_dif[$i-$j];
	 }else{ $j++; }
  }
  return(\@string_from_struct);
}


#________________________________________________________________________
# Title     : get_posi_shift_hash_rms
# Usage     : $result=${&get_posi_shift_hash_rm(\%h1, \%h2, \%h3)};
# Function  : caculates the error rate of seq after filtering according to
#                rms deviation.
# Example   :
# Warning   : Not complete yet.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_shift_hash_rms{  # minimum sub of 'get_posi_shift_rate'
  my(%array1)=%{$_[0]};
  my(%array2)=%{$_[1]};
  my(%array3)=%{$_[2]};
  my($string1, $string2, $av_rate, $rate,$sum_rate,
	  $sum_of_diff, $rate2);
  my(@seq_position1, @rates, @rates2,@seq_position2, @names, @position_diffs)=();
  %array1 = %{&hash_common(\%array1, \%array2)};
  %array2 = %{&hash_common(\%array2, \%array1)};
  %array3 = %{&hash_common(\%array3, \%array1)};
  %array1 = %{&remov_com_column(\%array1)}; # this removes wrong gaps(in '.' form, in MSF)
  %array2 = %{&remov_com_column(\%array2)};
  &show_hash(\%array1);
  &show_hash(\%array2);
  @names=sort keys %array1;
  for($j=0; $j<= $#names ; $j++){  # nf1 is the number of seq.
	 my($name)=$names[$j];
	 $string1=$array1{$name};
	 $string2=$array2{$name};
	 @seq_position1 = @{&get_posi_sans_gaps(\$string1)};
	 @seq_position2 = @{&get_posi_sans_gaps(\$string2)};
	 @position_diffs =@{&get_posi_diff(\@seq_position1, \@seq_position2)};
	 $sum_of_diff = ${&sum_array(\@position_diffs)};
	 print "sum of diff  $sum_of_diff";&n;
	 print "positions  $#position_diffs"; &n;
	 $rate2=$sum_of_diff/($#position_diffs+1);
	 push(@rates2, $rate2);
  }
  $av_rate = ${&array_average(\@rates2)};
  return(\$av_rate);
}

#________________________________________________________________________
# Title     : open_fil_file (fil file meant to be rms file)
# Usage     : %out = %{&open_fil_file(\$input_seq_file)};
# Function  : reads xxx.fil file which shows whether I have to discard
#             regions of sequences due to too big RMS deviation.
# Example   :
# Warning   : !!! not yet complete !!!
# Keywords  :
# Options   :
# Returns   : a ref. for a hash(associative array).
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub open_fil_file{   # opening msf files. input is a file name.
  my($nf1)=-1;
  my($input_file)=${$_[0]};							# $_[0]=>input eg. $dir.$out_seq_no.msf
  my(@names, %arr);
  unless (-e $input_file){ print chr(007);  # beep warning for error
			print "\n\n\t This is sub open_fil_files in th_lib.pl \n\n";
			print "\n\n\t Fatal: The input file $file1 is not in the directory \n";
			print "\n\n\t  !!! Program dies abnormally, bugs !!! \n";
			&caller_info; &n; exit;
  }
  open(FILE_1,"$input_file");  			# reading in (MSF)
  while(<FILE_1>){         				# file1 needs to be xxxx.msf for the moment, automatic later
			if(/^\s.+/){ next; }
			if(/\-+/)  { next; }				# to prevent lines with '-' as in hmm Sean's output
			if(/^([\w_]+)[\t]* +(\d+)$/){ 		# matching patterns (including tab space)
					 &caller_info if (($1 !=1) || ($1 !=0));
					 my($temp1)=$1;
					 my($temp2)=$2;
					 $temp2=~s/ //g;				   # removing all the spaces in $2
					 if( ! $arr{$temp1}){     		# if hash %array1 is not defined,
								$names[++$nf1]=$temp1; 	# with first set (names for seq.)
					 }
					 $arr{$temp1}.= $temp2;   		# maay and concatenate second set to first set(name)
			}
  }
  \%arr;
}

#________________________________________________________________________
# Title     : send_mail
# Usage     :
# Function  : mail a bunch of @lines to a user
# Example   :
#             send_mail ( $to, $subject, @lines );
#             #-# i -- $to      = email address
#             #-# i -- $subject = string to be put in the Subject: line
#             #-# i -- @lines   = lines to be mailed - must not have \n
#             -- DISCUSSION:
#
#             Uses /usr/lib/sendmail to mail a bunch of lines to the email address
#             specified. The @lines should not have terminating \n characters: they
#             will be supplied.
#
#             -- EXAMPLE:
#             &P10::mail ( 'schip@lmsc.lockheed.com', 'Test 34', @mylines );
#             -- END
#             : Could some one share their knowledge of how to mail a message from
#             :  within a Perl script with a novice Perl user?
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub send_mail{
	my ( $to, $subject, @lines ) = @_;
	my $pipe;
	$pipe = '|/usr/lib/sendmail -t';
	open ( $pipe, $pipe ) || die "Cannot open: '$pipe'\n";
	print $pipe "To: $to\n";
	print $pipe "Subject: $subject\n";
	print $pipe "\n";
	print $pipe join("\n",@lines),"\n";
	close $pipe;
}

#________________________________________________________________________
# Title     : rand_word
# Usage     : $word = ${&rand_word(7)};
#             print "sub rand_word gives $word\n";
# Function  : This sub routine should return an alphabet string of
#             length specified by  an argument.
# Example   :
# Warning   :
# Keywords  : randomize words, makes random words, scramble_word,
#              shuffle_words,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_word {
	 my($length) = $_[0];
	 my($word, $letter);
	 srand(((time/$$)^($>*time))/(time/(time^$$)));

	 foreach (1..$length){
		  $letter = pack("c", rand(128));
		  redo unless $letter =~ /[a-zA-Z]/;   # I just don't like \w, okay?
		  $word .= $letter;
	 }
	 return(\$word);
}

#________________________________________________________________________
# Title     : opendir_and_go_rand_fasta_and_clustal
# Usage     : &opendir_and_go_rand_fasta_and_clustal(\$input_dir); #$inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files if you wish, and then go in any sub
#             dir of it. Using recursion. created by A Biomatic
#             if any file is linked, it skips that file.
# Example   : $inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
#             &opendir_and_go($inputdir);
# Warning   : Seems to work fine.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub opendir_and_go_rand_fasta_and_clustal{
	 my($original_dir)=${$_[0]};
	 my(@read_files)=@{&read_any_dir(\$original_dir)};
	 foreach $file(@read_files){
		my($realfile1)=$original_dir.'/'.$file;
		if (-l $realfile1){
				print "\n\n$realfile1 is linked, skipping. \n";
				next;
		}elsif (-d $realfile1){
				&opendir_and_go_rand_fasta_and_clustal(\$realfile1);
		}elsif (-f $realfile1){ #<<------ This is where things match
				chdir($original_dir);
				$chk_file ='1-2-rev.fas';
				if (-e $chk_file){
					last;
				}elsif($file =~/(\d+\-2)\.fas$/){
					$out_file = "$1\-rand\.fas";
					system("rand-fasta.pl $file $out_file");
					system("clustalw $out_file");
				}
		}else{
				next;
		}
	 }
}

#________________________________________________________________________
# Title     : opendir_and_go_rand_fasta
# Usage     : &opendir_and_go_rand_fasta(\$input_dir); #$inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files if you wish, and then go in any sub
#             dir of it. Using recursion. created by A Biomatic
#             if any file is linked, it skips that file.
# Example   : $inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
#             &opendir_and_go($inputdir);
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub opendir_and_go_rand_fasta{
	 my($original_dir)=${$_[0]};
	 my(@read_files)=@{&read_any_dir(\$original_dir)};
	 foreach $file(@read_files){
		   my($realfile1)=$original_dir.'/'.$file;
		   if (-l $realfile1){
					  print "\n\n$realfile1 is linked, skipping. \n";
					  next;
		   }elsif (-d $realfile1){
			   &opendir_and_go_rand_fasta(\$realfile1);
		   }elsif (-f $realfile1){ #<<------ This is where things match
					  chdir($original_dir);

						$chk_file ='1-2-rev.fas';
						if (-e $chk_file){
						   last;
						}elsif($file =~/(\d+\-2)\.fas$/){
						   $out_file = "$1\-rand\.fas";
								  system("rand-fasta.pl $file $out_file");
						}
		   }else{
					  next;
		   }
	 }
}

#________________________________________________________________________
# Title     : reverse_sequences
# Usage     : %out = %{&rev_sequence_one_hash(\%input_seq_hash, \%hash2,...)};
# Function  : gets ref. of strings, reverses the elems.
# Example   :
# Warning   :
# Keywords  : reverse_sequence, reverse_sequence_hash, rev_sequence_hash
# Options   :
# Returns   : one or more hash references.
# Argument  : hash, eg(1, 'skdfj', 2, 'kdfjkdj', 3, 'kdfjk');
#             Input example:
#             ..
#             >HI0256
#             FLSANVLPIAPIINGGRTAVDNITQSVSDKPFVKDIGTKIKEAIALSKYSTQPQYISTTN
#             >HI0094
#             DILRTFVKMETGLKFPKKFKLKANLALFMNRRNKRPDTIMTAVADAGQKISEAKLNTTAK
#             ..
#
#             Output example: (Reversed :-)
#             ..
#             >HI0256_rv   <<-- note the added extension
#             ALDJFLKAJFJALSDJFLAJSLFJAKLSDFJLASJDFLAJSLDFJASJDFLJSDFJSDLJ
#             >HI0094_rv
#             LASJDFLKAJFJALSDJFLKSDJLFAJLKDJFLASJDFLKDFJKDJFKDJFKDJFKJDLJ
#             ..
#
# Version   : 1.2
#--------------------------------------------------------------------
sub reverse_sequences{
	my(%rev_hash, @rev_hash_refs, $name, $name_with_ext, $i);
	for($i=0; $i < @_; $i++){
	    my %in_hash = %{$_[$i]};
		my @keys    = keys %in_hash;
		for $name (@keys ){
		    $name_with_ext = "$name\_rv";
			$rev_hash{$name_with_ext} = reverse($in_hash{$name});
		}
		push(@rev_hash_refs, \%rev_hash);
	}
	if(@rev_hash_refs ==1){ return($rev_hash_refs[0]);}
	else{ return(@rev_hash_refs);}
}



#________________________________________________________________________
# Title     : rev_sequence_mul_array
# Usage     : @out = @{&rev_sequence_mul_array(\@input_mul_seq_array)};
# Function  : gets a ref. of an string, reverses the elems.
# Example   :
# Warning   : This reverses sequences!
# Keywords  :
# Options   :
# Returns   : one ref. of  mul_array, eg. ('jfkdj', 'kdfjsdj', 'jjjkk')
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub rev_sequence_mul_array{
  my(@in_array)=@{$_[0]};
  my(@rev_array, $reversed);
  for ($i = 0; $i <=$#in_array ; $i++){ $reversed =reverse($in_array[$i]);
	  push(@rev_array,$reversed);  }
  return(\@rev_array);
}



#________________________________________________________________________
# Title     : scramble_sequences
# Usage     : %out = %{&scramble_seq_hash(\%input_seq_hash)};
# Function  : gets ref. of strings, scambles the elems.
# Example   :
# Warning   :
# Keywords  : make_scrambled_seq, make_scrambled_sequence, scramble_seq_hash,
#              scramble_sequences, shuffle_sequences, shuffle_seq
# Options   :
# Returns   : (\%hashout), or (\%hash1, \%hash2,,,,,)
# Argument  : ref. of hash, eg(1, 'skdfj', 2, 'kdfjkdj', 3, 'kdfjk');
#
#             Input example:
#             ..
#             >HI0256
#             FLSANVLPIAPIINGGRTAVDNITQSVSDKPFVKDIGTKIKEAIALSKYSTQPQYISTTN
#             >HI0094
#             DILRTFVKMETGLKFPKKFKLKANLALFMNRRNKRPDTIMTAVADAGQKISEAKLNTTAK
#             ..
#
#             Output example: (scrambled :-)
#             ..
#             >HI0256_sc   <<-- note the added extension
#             ALDJFLKAJFJALSDJFLAJSLFJAKLSDFJLASJDFLAJSLDFJASJDFLJSDFJSDLJ
#             >HI0094_sc
#             LASJDFLKAJFJALSDJFLKSDJLFAJLKDJFLASJDFLKDFJKDJFKDJFKDJFKJDLJ
#             ..
# Version   : 1.5
#--------------------------------------------------------------------
sub scramble_sequences{
   my($gap_char)='';
   my(@ran_hash_ref, $i, $j);
   for($i=0; $i< @_; $i++){
		my(%in_hash)=%{$_[$i]};
		my(@names)  =keys %in_hash;
		my( @input, @random, %ran_hash );
		srand(time()|$$);  # or use srand(time^$$);
		for ($j=0; $j < @names; $j++){
			if($in_hash{$names[$j]} =~ /\w\,\w\,/){
			   $gap_char=',';
			}else{ $gap_char='';    }
			@input =split(/$gap_char/, $in_hash{$names[$j]});
			undef(@random);
			while (@input){
			   push(@random, splice(@input, int(rand(@input)) , 1) );
			}
			$ran_hash{"$names[$j]\_sc"}= join("$gap_char", @random);
		}
		push(@ran_hash_ref, \%ran_hash);
	}
	if(@ran_hash_ref ==1){
	return($ran_hash_ref[0]);
	}else{ return(@ran_hash_ref);}
}




#___________________________________________________________________
# Title     : scramble_array
# Usage     : @in=@{&scramble_array(\@in)};
# Function  : shuffles the elements of array
# Example   :
# Keywords  : randomise_array, randomize_array, shuffle_array
# Options   :
# Version   : 1.4
#---------------------------------------------------------------
sub scramble_array{
	srand(time()|$$);  # or use srand(time^$$);
	my ($i, @scrambled, @out, @each_array);

	for($i =0; $i< @_; $i++){
	   my @each_array = @{$_[$i]};
	   while (@each_array) {
		   push @scrambled, splice @each_array, int(rand(@each_array)), 1;
	   }
	   push(@out, \@scrambled);
	}
	if(@out > 1){
	   return(@out);
	}else{
	   return($out[0]);
	}
}


#________________________________________________________________________
# Title     : rand_sequence_mul_array  (not hash!)
# Usage     : @out = @{&rand_sequence_mul_array(\@input_mul_seq_array)};
# Function  : gets a ref. of an string, scambles the elem.
# Example   :
# Warning   : This scrambles sequences!!
# Keywords  : scramble_sequence_mul_array, shuffle_sequence_mul_array
# Options   :
# Returns   : one ref. of  mul_array, eg. ('jfkdj', 'kdfjsdj', 'jjjkk')
# Argument  : one ref. of mul_array, eg. ('lsjdfj', 'kdfjsdj', 'jjjkk')
# Version   : 1.1
#--------------------------------------------------------------------
sub rand_sequence_mul_array{
  my(@in_array)=@{$_[0]};
  my(@ran_array, $random);
  srand(time()|$$);
  for ($i = 0; $i < @in_array ; $i++){
	my(@input)=split(//,$in_array[$i]);
	 my($len1)=@input;
	 my($len2)=$len1;
	 my($ran_pos, $ran_num, $random, @random);
	 for ($k=$len1; $k > 0 ; $k--){ $ran_pos = int(rand($k));
		$ran_num=splice(@input, $ran_pos, 1); push(@random, $ran_num);  }
	 $random=join("",@random);
	 $ran_array[$i]=$random; }
  return(\@ran_array);
}

#________________________________________________________________________
# Title     : rand_sequence_one_string
# Usage     : @out = @{&rand_sequence_one_string(\$input_seq_string)};
# Function  : gets a ref. of a  string, scambles the elem.
# Example   :
# Warning   : This scrambles sequences!!
# Keywords  :
# Options   :
# Returns   : one ref. of string,
# Argument  : one ref. of string, eg ( 'ldkfjlsdjfsdjflj' )
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_sequence_one_string{
  my($input)=${$_[0]};
  my(@input)=split(//,$input);
  my($len1)=$#input+1;
  my($len2)=$len1;
  my($ran_pos, $ran_num, @random, $random);
  srand(time()|$$);
  for ($i=$len1; $i > 0 ; $i--){    $ran_pos = int(rand($i));
	 $ran_num=splice(@input, $ran_pos, 1);   push(@random, $ran_num); }
  $random=join("",@random);
  return(\$random);
}

#________________________________________________________________________
# Title     : rand_sequence_one_array
# Usage     : @out = @{&rand_sequence_one_array(\@input_seq_array)};
# Function  : gets a ref. of an array, scambles the elem.
# Example   :
# Warning   : This scrambles sequences!!
# Keywords  :
# Options   :
# Returns   : one ref. of array,
# Argument  : one ref. of array, eg ('e', 'b', 'c', 'd')
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_sequence_one_array{
  my(@input)=@{$_[0]};
  my($len1)=@input;
  my($len2)=$len1;
  my($ran_num,@random,$ran_pos);
  srand(time()|$$);
  for ($i= $len1; $i > 0 ; $i--){   $ran_pos = int(rand($i));
	 $ran_num=splice(@input, $ran_pos, 1); push(@random, $ran_num); }
  return(\@random);
}

#________________________________________________________________________
# Title     : make_random_sequence
# Usage     : $protein = ${&make_random_sequence(1, 400)};
# Function  : gets one or more numbers for seq length and makes random sequences
#             It can handle proportional random sequenes according to the
#             amino acid occurance matrix.
# Example   : $out=${&make_random_sequence(@ARGV)};  While @ARGV can be '1 200 -p'
# Warning   :
# Keywords  : scramble_sequence, make_scrambled_sequence, shuffle_sequence
#             random_sequence, make_random_sequence, generate_random_protein_seq
#             create_random_sequene create_random_aa_sequence
# Options   : 'p' for proportional random sequence option
#             'f' for fastsa format output (returns one ref. of HASH)
# Returns   : one or more scalar references according to the input numbers.
# Argument  : 1 200 [-p] [@array_of_array_refs]
#             1 = num of seq, 200=leng of seq, -p =option, @arr.. = option
#             You can optionally give amino acid matrices
# Version   : 1.4
#--------------------------------------------------------------------
sub make_random_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($seq_length, $residue, @out_seq_ref);
	if(@num_opt==1){
		$num_of_seq=1;
		$seq_length=$num_opt[0];
	}elsif(@num_opt ==2){
		$num_of_seq=$num_opt[0];
		$seq_length=$num_opt[1];
	}
	srand(time());
	if($char_opt=~/p/i){ ####### PROPORTIONAL random #####
	  my ($rand_protein_seq, $sum, $random);
	  if(@array>0){  # when matrices are given in @hash
		  @array1=@{$array[0]};
		  if($array1[0]=~/\d+/){
			  @aafq  =@array1;
			  @aminos=@{$array[1]};
		  }else{
			  @aminos=@array1;
			  @aafq  =@{$array[1]};
		  }
	  }else{
		  @aafq = (.08713, .03347, .04687, .04953, .03977,
				  .08861, .03362, .03689, .08048, .08536,
				  .01475, .04043, .05068, .03826, .04090,
				  .06958, .05854, .06472, .01049, .02992);

		  @aminos=("A",     "C",     "D",    "E",    "F",
				  "G",     "H",     "I",    "K",    "L",
				  "M",     "N",     "P",    "Q",    "R",
				  "S",     "T",     "V",    "W",    "Y");
	  }
	  if($debug==1){
		  printf("> rand%d random, from $0, len %d\n",
		  $i+1, $num_opt[0]);
	  }
	  for($j=0; $j<$num_of_seq; $j++){
		  my $rand_protein_seq, $random, $sum;
		  for($i=1; $i <= $seq_length; $i++){
			 $random=rand;
			 print "\n$random " if $debug ==1;
			 $sum =0;
			 for($x=0; $x< 20; $x++){
				$sum+=$aafq[$x];
				if( $sum >= $random){
					if($char_opt=~/f/i){
						$fasta{"rand${j}"} .=$aminos[$x]; last;
					}else{
						$rand_protein_seq .= $aminos[$x];
						last;
					}
				}
			 }
			 if($random > $sum){ $rand_protein_seq .= $aminos[19]; }
			 if( (!($i%60)) && ($char_opt=~/f/i) ){ $fasta{"rand${j}"} .= "\n" }
			 elsif( !($i%60) ){ $rand_protein_seq .= "\n" }
		  }
		  print "\nProportionasl Rand SEQ\n : $rand_protein_seq \n" if $debug ==1;
		  if($char_opt=~/f/i){ push(@fasta_out_ref, \%fasta) }
		  else{ push(@out_seq_ref, \$rand_protein_seq) }
	  }
	}else{
	  for($j=0; $j<$num_of_seq; $j++){
		 my $rand_protein_seq, $residue;
		 for $i (1..$seq_length) {
			$residue = pack("c", rand(128));
			redo unless $residue =~ /[ACDEFGHIKLMNPQRSTVWY]/i;
			if($char_opt=~/f/i){
				$fasta{"rand${j}"} .=$residue;
			}else{
				$rand_protein_seq .= $residue;
			}
			if( (!($i%60)) && ($char_opt=~/f/i) ){ $fasta{"rand${j}"} .= "\n" }
			elsif( !($i%60) ){ $rand_protein_seq .= "\n" }
		 }
		 $rand_protein_seq=~tr/a-z/A-Z/;
		 $fasta{"rand${j}"}=~tr/a-z/A-Z/;
		 if($char_opt=~/f/i){ push(@fasta_out_ref, \%fasta) }
		 else{ push(@out_seq_ref, \$rand_protein_seq) }
	  }
	}
	if($debug==1){ print "\n",%fasta, "\n" }
	if($char_opt=~/f/i){ return(@fasta_out_ref) }
	elsif(@out_seq_ref == 1){ return($out_seq_ref[0]); }
	elsif(@out_seq_ref > 1){ return(@out_seq_ref); }

}




#________________________________________________________________________
# Title     : rand_DNA_seq_generate  (produces randomized sequences)
# Usage     : $DNA = ${&rand_DNA_seq_generate(400)};
# Function  : gets one or more numbers for seq length and makes random sequences
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one or more scalar references according to the input numbers.
# Argument  : (343) or (\$length)
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_DNA_seq_generate { my($seq_length,$rand_DNA_seq, @out_seq_ref,$i,$residue);
  for($i=0; $i<@_; $i++){
	 if( ref($_[$i]) && (ref($_[$i]) eq 'SCALAR') ){
		if(${$_[$i]} =~/\d+/){ $seq_length = ${$_[$i]}; }    }
	 elsif( !ref($_[$i]) ){ if($_[$i] =~/\d+/){ $seq_length = $_[$i]; }}
	 else{ print "\n rand_DNA_generate in $0 gets number\(s\) \n"; exit; }
	 srand(time()|$$);
	 for (1..$seq_length) {
		 $residue = pack("c", rand(128));  redo unless $residue =~ /[ACGT]/;
		 $rand_DNA_seq .= $residue;   }
	 push(@out_seq_ref, \$rand_DNA_seq);  }
  if(@out_seq_ref == 1){ return($out_seq_ref[0]); }
  elsif(@out_seq_ref > 1){ return(@out_seq_ref); }
}


#________________________________________________________________________
# Title     : rand_RNA_seq_generate  (produces randomized sequences)
# Usage     : $DNA = ${&rand_RNA_seq_generate(400)};
# Function  : gets one or more numbers for seq length and makes random sequences
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one or more scalar references according to the input numbers.
# Argument  : (343) or (\$length)
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_RNA_seq_generate { my($seq_length,@out_seq_ref,$rand_RNA_seq, $i,$residue);
  for($i=0; $i<@_; $i++){
	 if( ref($_[$i]) && (ref($_[$i]) eq 'SCALAR') ){
		if(${$_[$i]} =~/\d+/){ $seq_length = ${$_[$i]}; }   }
	 elsif( !ref($_[$i]) ){ if($_[$i] =~/\d+/){ $seq_length = $_[$i]; } }
	 else{ print "\n rand_RNA_generate in $0 gets number\(s\) of them\n"; exit; }
	 srand(time()|$$);
	 for (1..$seq_length) {
		 $residue = pack("c", rand(128));  redo unless $residue =~ /[ACGU]/;
		 $rand_RNA_seq .= $residue;    }
	 push(@out_seq_ref, \$rand_RNA_seq);  }
  if(@out_seq_ref == 1){ return($out_seq_ref[0]); }
  elsif(@out_seq_ref > 1){ return(@out_seq_ref); }
}

#____________________________________________________________________________
# Title     : replace_text
# Usage     : &replace_text(\@input_array_of_filenames);
# Function  : finds patterns of text and replaces them in multiple input files
# Example   :
# Warning   : This produces a temporary file and rename it...
# Keywords  :
# Options   :
# Returns   : nothing
# Argument  : reference of one array of file names in pwd
# Version   : 1.3
#--------------------------------------------------------------------
sub replace_text{
  my ( @input_files );
  $|=1;
  my $old=shift ;
  my $new=shift;
  @input_files=@_;
  my($file);

  for $file(@input_files){
	 open (IN, "$file");
	 my @lines=<IN>;
	close(IN);
	 open (OUT, ">$file");
	 for (@lines){
		if($_=~/^(.*)($old)(.*)$/){
			print "\nmatched $_ in $file\n";
			$temp="$1$new$3\n";
			print OUT $temp;
		}else{
			print OUT $_;
		}
	 }
	 close OUT;
	 if($file =~/\.pl$/){ chmod 0755, $file; } # this makes it execu..
  }
}



#________________________________________________________________________
# Title     : get_av_seq_length
# Usage     : $std_devi_of_lengths = &get_av_seq_length(\%hash_ref);
# Function  : gets hash of sequence, compares lengths, and outs av.
# Example   :
# Warning   : uses a sub  &array_average(\@lengths);
# Keywords  :
# Options   :
# Returns   : one ref. for scaler digit.
# Argument  : one hash reference for sequences.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_av_seq_length{
  my(%hash1)= %{$_[0]};
  my(@names)=keys %hash1;
  my($sequence, $len, @lengths, $av_seq_length);
  for $name (@names){  $sequence = $hash1{$name}; $sequence =~s/\W//g;
	 $len =length($sequence);  push(@lengths,$len); }
  $av_seq_length=&array_average(\@lengths);
  return(\$av_seq_length);
}

#________________________________________________________________________
# Title     : get_sd_of_length_diff
# Usage     : $result = &get_sd_of_length_diff(\%input);
# Function  :
# Example   :
# Warning   : removes all non-char(.-, space....) in the input string
# Keywords  :
# Options   :
# Returns   : one scaler digit
# Argument  : gets one hash reference,
# Version   : 1.0
#--------------------------------------------------------------------
sub get_sd_of_length_diff{
  my(%hash1)= %{$_[0]};
  my(@names)=keys %hash1;
  my($sequence, $len, @lengths, $std_devi);
  for $name (@names){ $sequence = $hash1{$name};
	 $sequence =~s/\W//g; $len =length($sequence);
	 push(@lengths,$len); }
  $std_devi=&sd(\@lengths);
  \$std_devi;
}
#________________________________________________________________________
# Title     : get_av_and_sd_seq_length
# Usage     : $get_av_and_sd_seq_length= &get_av_seq_length(\%hash_ref);
# Function  : gets ref of hash of sequence, compares lengths, and outs av.
# Example   :
# Warning   : uses a sub  &array_average(\@lengths);
# Keywords  :
# Options   :
# Returns   : Two scaler digit.
# Argument  : Two hash references for sequences.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_av_and_sd_seq_length{
  my(%hash1)= %{$_[0]};
  my(@names)=keys %hash1;
  my($sequence, $len, @lengths, $av_seq_length);
  for $name (@names){
	 $sequence = $hash1{$name}; $sequence =~s/\W//g; $len =length($sequence);
	 push(@lengths,$len); }
  $av_seq_length=${&array_average(\@lengths)};
  $std_devi=${&sd(\@lengths)};
  @av_seq_length_and_std_devi=($av_seq_length, $std_devi);
  return(\@av_seq_length_and_std_devi);
}

#________________________________________________________________________
# Title     : get_seq_hash_sans_gaps.pl
# Usage     : ($ref_out1, $ref_out2)=&get_seq_hash_sans_gaps(\%hash, \%hash);
#              %out=%{&get_seq_hash_sans_gaps(\%hash)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_seq_hash_sans_gaps{ my(@in)=@_; my(%in, @keys); my(@out_hash_ref, $k,%out_hash);
  for($k=0; $k<=$#in; $k++){  %in=%{$in[$k]}; @keys=keys %in;
	 for(@keys){ $in{$_}=~ s/[-.]//g;  $out_hash{$_}=$in{$_};  }
	 push(@out_hash_ref, \%out_hash);  }
  if($#out_hash_ref == 0){ return($out_hash_ref[0]); }
  elsif($#out_hash_ref > 0){  return(@out_hash_ref); }
}


#________________________________________________________________________
# Title     : get_posi_sans_gaps (get positions without after removing gaps)
# Usage     : @seq_position1 = &get_posi_sans_gaps($string1);
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : the positions of residues after removing gaps(but keeps pos).
#               used for analysis of shifted positions of seq. comparison.
# Argument  : one scalar variable input of sequence string.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_sans_gaps{
   my($string) = ${$_[0]};
   my($char, @positions, $i);
   for($i=0; $i < length($string); $i++){
	  $char=substr($string,$i,1);
	  if(($char eq '-')||($char eq '.')){  next; }else{ push(@positions, $i); } }
   return(\@positions);
}

#________________________________________________________________________
# Title     : get_posi_shift_rate
# Usage     : $rate_final = &get_posi_shift_rate("perl.msf", "perl.jp");
# Function  :
# Example   :
# Warning   : sub  hash_common was unstable.
# Keywords  :
# Options   :
# Returns   : one ref. for scalar value of shift rate of position for proteins.
# Argument  : takes two file names for seq. and struc. alignment.
#             : Assumes the files are in the pwd.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_posi_shift_rate{
  my($test_seq_file, $struc_seq_file)=@_;
  my($string1, $string1, $av_rate, @rates, $rates_ref);
  $pwd = &pwd_path;
  $file1=$pwd.'/'.$test_seq_file;	# for clustalw alignment (usu, xxxx.msf)
  $file2=$pwd.'/'.$struc_seq_file;	# JPO's structural alignment (usu, xxxx.jp)
  if((-e $file1) && (-e $file2)){
		  %array1=&open_msf_files($file1);	# results are : %array1, %array2, @names1, @names2
		  %array2=&open_jp_files ($file2);
		  $ref_hash_1 = \%array1;
		  $ref_hash_2 = \%array2;
		  %array1 = &hash_common($ref_hash_1, $ref_hash_2);
		  %array2 = &hash_common($ref_hash_2, $ref_hash_1);
		  %array1 = &remov_com_column($ref_hash_1); # this removes wrong gaps(in '.' form, in MSF)
		  %array2 = &remov_com_column($ref_hash_2);
		  @names=sort keys %array1;print "\n\n";
		  $num_of_seq = $#names+1;  # <- this is true number of seq.
		  for($j=0; $j< $num_of_seq ; $j++){  # nf1 is the number of seq.
					  $name=$names[$j];
					  $string1=$array1{$name};
					  $string2=$array2{$name};
					  $ls=length($string1); 	# $ls is the whole length -1.
					  $ls2=length($string2);			# $string1 has the whole seq
					  $sum1=0; 		# $sum is for accumulating position difference
					  @seq_position1 = &get_posi_sans_gaps($string1);
					  @seq_position2 = &get_posi_sans_gaps($string2);
					  @position_diffs =&get_posi_diff(\@seq_position1, \@seq_position2);
					  $rate = &sd(@position_diffs);
					  push(@rates, $rate);
			}
			$rates_ref=\@rates;
			$av_rate = &array_average($rates_ref);
	}
	return(\$av_rate);
}

#________________________________________________________________________
# Title     : read_hssp_no_inserts
# Usage     : %anyarray = &read_hssp_no_inserts ($any_sequence_file_hssp_form);
# Function  : read hssp file and put sequences in a hash
# Example   :
# Warning   : It produces incomplete sequences when hssp seqs. have insertions.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_hssp_no_inserts {    								# reading hssp files. input is a file name.
	 my($input_file)=${$_[0]};	 # $_[0]=>input eg. $dir.$out_seq_no.hssp
	 my($whole_sequence,@names,%gaps,%second_hash,@chars);
	 my($c,$Sequence,$char,$no_of_seq, $AliNo, $first_part);
	 my($diff, $attachment, $second_part, $len_2nd_part);
	 unless (-e $input_file){
		 print chr(7);		     # beep warning for error
		 print "\n\n\t This is sub read_hssp_file  in th_lib.pl \n\n";
		 print "\n\n\t Fatal: The input file $file1 is not in the directory \n";
		 print "\n\n\t  !!! Program dies abnormally, bugs !!! \n";
		 &caller_info; print "\n"; exit;
	 }
	 open(FILE_1,"$input_file"); # reading in (hssp)
	 while(<FILE_1>){   		# file1 needs to be xxxx.hssp for the moment, automatic later
			 if (/PDBID\s+([\d\w]+)/){    # to get the test sequence file (mother sequence)
				 $names[0] = $1;           # put in an array as the first element.
				 next;                     # do not need to look at further.
			 }
			 if(/^\s+\d+ : ([\w_\d]+)\s+/){  # matching names part.
				 push(@names, $1);       # getting all other names of sequences.
				 next;                   # do not need to look at further.
			 }
			 $no_of_seq = $#names+1;        # true num. of seq. inc. mother sequence.
			 if(/^\s+\d+\s+\d+\s+([\w ]).+\d+ +\d+ +\d+  ([\w\t\s\.]+)$/){
				 $first_part  = $1;  # somehow this step is necessary(?)
				 $second_part = $2;  #
				 chomp($second_part);  # hssp has new line at the end. !! essential !!
				 $second_part =~ s/ /./g;  # converting all space to dots.
				 $len_2nd_part = length($second_part);
				 if ($len_2nd_part < $no_of_seq ){
						  $diff = ( $#names - $len_2nd_part);
						  $attachment = "."x($diff);  # filling spaces.
						  $second_part.=$attachment;
				 }
				 $whole_sequence .= $first_part.$second_part; # very long sequence.
				 $c+=1;             # counter for the whole lines
				 next;              # do not need to look at further.
			 } # <----the result of this if{} is a long string  $whole_sequence
			 if (/## SEQUENCE PROFILE AND ENTROPY/){
				 last;   # do not need to look at further.
			 }
	 }
	 @chars = split(//, $whole_sequence);  # putting into an array to handle
	 for ($i=0; $i < $c ; $i++){
		for ($k=0; $k<=$#names; $k++){
				  $char = shift( @chars);
				  $second_hash{$names[$k]} .= $char;
		}
	}
	return(\%second_hash); # this is the final output.
}


#________________________________________________________________________
# Title     : open_pdbg_files
# Usage     : %seq=%{&open_pdbg_files($tim_seq_file, ['1fcdc1'], [s] )};
#             if you put additional seq name as 1fcdc1 it will
#             fetch that scopclass only in the database file.
#             Any digit will be used as minimum seq size to be fetched.
# Function  : open pdb group files and put scopclass in a hash.
#             PDB group file format is like this;
#
#  >d1bia_1 1.4.3.1.1 (1-63) Biotin repressor, N-terminal domain [Escherichia coli]
#  >d1baba_ 1.1.1.1.15 Hemoglobin, alpha-chain [human (Homo sapiens)]
#  >d1cpcb_ 1.1.1.2.1 C-phycocyanin [cyanobacterium (Fremyella diplosiphon)]
#  >d1fcdc2 1.3.1.3.1 (81-174) Flavocytochrome c sulfide dehydrogenase, FCSD, cytochrome subunit [Purple phototrophic bacterium (Cromatium vinosum)]
#
#             This can also return the sizes of sequences rather than seqs.
# Example   : %out = %{&open_pdbg_files(@ARGV)};
#             while @ARGV at prompt was: 'pdb_40.pdbg'
# Warning   :
# Keywords  : open_pdbg_files, open_pdb_group_files
# Options   : any digit for the minimum seq length
#        b  for simple style reading (this reads in the name of pdbg file as it is)
#
# Returns   :
# Argument  :
# Version   : 1.5
#--------------------------------------------------------------------
sub open_pdbg_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

   my (%sequence, $S_start, $pdb_name, $range, $S_end, @seq_Names, %Sizes,
	   $domain, $pdb_name, $domain_num, $scop_class, $sequence);
   my $min_size=$num_opt[0];

   if(@file<1){ print "\n# open_pdbg_files: There is no fileinput for open_pdbg_files\n"; exit}

   for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  # Checking if file is there
	  #_________________________________
	  if($debug eq 1){ print "\n open_pdbg_files: Inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_fas_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  open(FILE_1,"$input_file");
	  if(@hash >=1){
		 for($h=0; $h< @hash; $h++){
			push(@string, keys %{$hash[$h]});
		 }
	  }

	  if( (@_ > 1)&&(@string > 0) ){  # when seq to fetch is given
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		#  Simplified style reading
		#_______________________________________________
		if($char_opt=~/b/){
			while(<FILE_1>){
			   if(/^ *\> *($string[$j]) +([\d\.]+) *\(?(\d*\-?\d*)\)?/){
				   $pdb_name=$1;
				   $range=$3;
				   $scop_class=$2;
				   if($char_opt=~/r/ and $range !~/\d+\-\d+/){ $range='' }else{ $range="_$range" }
				   $sequence{"${pdb_name}${range}"} = $scop_class;
			   }
			}
		}else{
			while(<FILE_1>){
			   for($j=0; $j<= @string; $j++){
				   if(/^ *\> *[cd]($string[$j])_* +([\d\.]+) *\(?(\d*\-?\d*)\)?/i){
						 $pdb_name=$1;
						 $range=$3;
						 $scop_class=$2;
						 if($char_opt=~/r/ and $range !~/\d+\-\d+/){ $range='' }else{ $range="_$range" }
						 splice(@string, $j, 1); ## for optimization
						 $sequence{"${pdb_name}${range}"} = $scop_class;
						 return(\%sequence) if(@string < 1);
				   }
			   }
			}
		}
	  }else{ # getting all seq in the given file(s)
		   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		   #  Simplified style reading
		   #_______________________________________________
		   if($char_opt=~/b/){
			   while(<FILE_1>){
				  if(/^ *\> *(\S+) +([\d\.]+)/){
					  $pdb_name=$1;
					  $scop_class=$2;
					  $sequence{"${pdb_name}"} = $scop_class;
				  }
			   }

		   }else{ #### This takes only the real names rather than dxxxx__ style
			   while(<FILE_1>){
				  if(/^ *\> *[cd](\S\S\S\S)(\S)(\S) +([\d\.]+) *\(?(\d*\-?\d*)\)?/){
					  $pdb_name=$1;
					  $range=$5;
					  $domain=$2;     #<-- like A or B or C
					  $domain_num=$3;
					  $scop_class=$4;
					  if($range !~/\d+\-\d+/){ $range='' }else{ $range="_$range" }
					  if($domain !~ /^_$/){
						 $domain="\U$domain";
					  }else{ undef($domain) }
					  if($domain_num !~/^_$/){
						 $domain_num=$3;
					  }else{ undef($domain_num) }
					  $sequence{"${pdb_name}${domain}${range}"} = $scop_class;
				  }else{ next }
			   }
		   }
	  }
	  close FILE_1;
   }

   if(defined(@range)){
	   %seq_fragments=%{&get_seq_fragments(\%sequence, \@range)};
	   return(\%seq_fragments);
   }elsif($char_opt=~/s/i){ # when SIZE return only option is set
	   @seq_Names=keys %sequence;
	   for($i=0; $i < @seq_Names; $i++){
		  $Sizes{$seq_Names[$i]}=length($sequence{$seq_Names[$i]});
	   }
	   return(\%Sizes);
   }elsif(@hash >=1){
	   for($h=0; $h< @hash; $h++){
		  my %hash=%{$hash[$h]};
		  my @Keys=keys %hash;
		  for($k=0; $k<@Keys; $k++){
			 if(defined($hash{$Keys[$k]})){
				($S_start, $S_end)=($hash{$Keys[$k]}=~/(\d+)\-(\d+)/);
				$sequence{$Keys[$k]}=substr($sequence{$Keys[$k]}, ($S_start-1), ($S_end-$S_start));
			 }
		  }
	   }
	   return(\%sequence);
   }else{
	   return(\%sequence);
   }
}

#________________________________________________________________________
# Title     : open_fasta_files
# Usage     : %fasta_seq=%{&open_fasta_files($fasta_file, ['MJ0084'])};
#             if you put additional seq name as MJ0084 it will
#             fetch that sequence only in the database file.
#
#             %out=%{&open_fasta_files(@ARGV, \%index)};
#               while  %index has (seq indexpos seq2 indexpos2,,,)
#               In this case, the fasta file should have xxxx.fa format
#
# Function  : open fasta files and put sequences in a hash
#              If hash(es) is put which has sequence names and seek position
#              of the index file, it searches the input FASTA file to
#              fetch at that seek position. This is useful for Big fasta DBs
#             If the seq name has ranges like  XXXXXX_1-30, it will only
#              return 1-30 of XXXXXX sequence.
#
#             FASTA sequence file format is like this;
#
#             > 1st-seq
#             ABCDEFGHIJKLMOPABCDEFGHIJKLMOPABCDEFGHIJKLMOPABCDEFG
#             > 2nd.sequ
#             ABCDEFGHIJKLMOYYUIUUIUIYIKLMOPABCDEFGHIJKLMOPABCDEFG
#             >owl|P04439|1A03_HUMAN HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-3 ALPHA CHAIN PRECURSOR....
#             MARGDQAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDT
#
#             This can also return the sizes of sequences rather than seqs.
#
#             This ignores any dup entrynames coming later.
#
# Example   : %out = %{&open_fasta_files(@ARGV)};
#             %out2=%{&open_fasta_files('seq.fa', \%index)};
#             %out3=%{&open_fasta_files('seq.fa', \%range)};
#             %seq=%{&open_fasta_files($PDB40_FASTA, \@seq_to_fetch)};
#
#             while @ARGV at prompt was: 'GMJ.pep MJ0084'
#
# Keywords  : open_fasta, open_fa_files, open_FASTA_files,
# Options   : Seq name to fetch the specified seq only.
#             as open_fasta_files.pl MY_SEQ_NAME Swissprot.fasta
#            -d  for giving back desc as well as the name. so it
#                gives  'HI0002 This is the description part'
#                as the key
#             If you put hash which is like('seq_name', ['20-30', '30-44',..])
#              it will produce hash which has got:
#              ( seq_name_20-30 'asdfasdfasdfasdfasd',
#                seq_name_30-44 'kljkljkjkjljkjljkll',
#                ....           .... )
#            -s for returning sequence size only
# Version   : 3.9
#--------------------------------------------------------------------
sub open_fasta_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

   my (%sequence, %HASH, @Keys, $seq_found1, $S_start, $S_end, $seq_found,
	   $present_seq, @seq_Names, %Sizes, $bare_seq_name, $fasta_seq_idx_file,
	   %seq_fragments);

   if(@file<1){
	  print "\n \@file has less than 1 elem. There is no fileinput for open_fasta_files\n";
	  exit
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  (1) When perl file INDEX pos info is given in hash, this speeds up
   #_________________________________________________________________________________
   for($d=0; $d < @hash; $d++){
	   my ($sequence, $NAME, $range_start, $range_leng);
	   %HASH=%{$hash[$d]};
	   my @Keys=keys %HASH;  ## <<< NOTE it is @Keys, not @keys
	   for($f=0; $f< @file; $f++){
		  #====== It must be xxxx.fa format =======
		  unless($file[$f]=~/\S\.fa[sta]?$/){
			  print "\n# open_fasta_files: \$file\[\$f\] does not have fasta extension, skipping"; next; }
		  open(FASTA, $file[$f]);
		  F0: for($e=0; $e< @Keys; $e++){
			 my $sequence;
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # When seq name has range attachment, it handles
			 #________________________________________________
			 if($Keys[$e]=~/^(\S+)_(\d+)\-(\d+)/){
				 $NAME=$1;
				 $range_start=$2-1;    ## to fit in substr function
				 $range_leng =$3-$2+1; ## to fit in substr
			 }else{
			     $NAME=$Keys[$e];
			 }
			 if($HASH{$Keys[$e]}=~/^(\d+)$/){
				 splice(@hash, $d, 1);
				 $d--;
				 splice(@file, $f, 1);
				 $f--;
				 seek(FASTA, $1-220, 0);  # -220 is necessary
				 while(<FASTA>){
					 if( /^\> *$NAME/  or
						 /^\> *owl\|\S+\|$NAME/){  # to handle ">owl|P04439|1A03_HUMAN HLA CLASS I HISTOCOMPATIBILITY
					        $seq_found1=1;
					 }elsif(/^(\w+)$/ and $seq_found1==1){	 $sequence .=$1;
					 }elsif(/^\> *\S+/ and $seq_found1==1){
						  #======= When range is defined, take only the ranged part==================
						  if($range_start =~/\d+/){
							  $sequence{$Keys[$e]}=substr($sequence, $range_start, $range_leng);
						  }else{	 $sequence{$Keys[$e]}=$sequence; }
						  $range_start='';
						  $sequence='';
						  $seq_found1=0; next F0;
					 }
				 }
			  }
		  }
	  }
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   # (2) opening FASTA files (NORmal, when no perl index pos number is given)
   #_______________________________________________________________________
   for($i=0; $i< @file; $i++){
	   unless(-s $file[$i]){ next; } ## this is essential as handle_arguments has a problem
	   my($entry_found, $name, $matched);
	   my($input_file) = ${$file[$i]} || $file[$i];

	   if($debug eq 1){ print "\n open_fasta_files: Inputfile is $input_file\n" };
	   unless (-e $input_file){
			print chr(7);
			print "\n\n\t This is sub open_fas_files in $0  \n\n";
			print "\t Fatal: The input file $input_file is not in the directory \n";
	   }
	   open(FILE_1,"$input_file");
	   if(@hash >=1){  ## if seq names are given in hash
		   for($h=0; $h< @hash; $h++){
			  @string=(@string, keys %{$hash[$h]});
		   }
	   }
	   @string=sort @string;
	   $num_of_seq_to_fetch=@string;
	   if(@string > 0){
		   print "\n# open_fasta_files(normal fasta fetch): \$num_of_seq_to_fetch is $num_of_seq_to_fetch\n";
	   }

	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	   #  (2.1) when seq to fetch is given by \@sequences  format
	   #_______________________________________________________________________
	   if( @_ > 1  and  @string > 0 ){
		   print "\n#  open_fasta_files is fetching sequences from \$input_file= $input_file\n";
		   %sequence=%{&fetch_sequence_from_db($input_file, \@string)};
		   print "\n# $fasta_seq_idx_file file is made by open_fasta_files(fetch_sequence_from_db), you may remove it\n";
	   }
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	   #  (2.2) When seq names NOT given, fetches all (THE DEFAULT)
	   #____________________________________________________________
	   else{
		 while(<FILE_1>){                # file1 needs to be xxxx.fasta for the moment, automatic later
			if(/^> *gi\|\d+\|\S+\|(\S+)\|.*/){  ## for >gi|1669546|dbj|D84107|D84107 Human mRNA for Werner syndrome-1/type 1, complete cds
				 if($char_opt=~/[\-]?d/i){  # To add the description
					 $name=$_;  # entire line becomes the name of the seque.
				 }else{
					 if( $sequence{$1} ){
						 #------- To avoid identical entry reading repeatedly -----
						 print "\n# I am open_fasta_files: $1 seems to be the same as previous entry, ERROR??\n";
						 $entry_found=0;
					 }else{      $name=$1;   $entry_found=1;     }
				 }
			}elsif(/^> *owl\|\S+\|(\S+)/){  ## for ">owl|P04439|1A03_HUMAN HLA CLASS I HISTOCOMPATIB
				 if($char_opt=~/[\-]?d/i){  # To add the description
					 $name=$_;  # entire line becomes the name of the seque.
				 }else{
					 if( $sequence{$1} ){
						 #------- To avoid identical entry reading repeatedly -----
						 print "\n# I am open_fasta_files: $1 seems to be the same as previous entry, ERROR??\n";
						 $entry_found=0;
					 }else{      $name=$1;   $entry_found=1;     }
				 }
			}elsif(/^> {0,5}([\w\-\.]+) *.*$/){
				 if($char_opt=~/[\-]?d/i){   $name=$_;  # To add the description
				 }else{
					 if( $sequence{$1} ){ # check if the entry already exists
						print "\n# $1 seems to be the same as previous entry, ERROR??\n";
						$entry_found=0;
					 }else{     $name=$1;   $entry_found=1;      }
				 }
			}elsif(/^([\w\.\- ]+)$/ and $entry_found == 1){
				 $matched=$1;    $matched=~s/ //g;
				 $sequence{$name}.= $matched if defined($name);
			}elsif(/^$/){  next;
			}else{  $entry_found=0;  } ## this is when rubbish is matched
		 }# end of while
	   }
	   close FILE_1;
   }


   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~~~~~~~~`
   # (3) When ranges information is given(via \@range), seq in those ranges are returned
   #______________________________________________________________________________________
   if(defined(@range)){
	   %seq_fragments=%{&get_seq_fragments(\%sequence, \@range)};
	   return(\%seq_fragments);
   }
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   # (4) When only size is asked with -s option
   #_____________________________________________________________________________
   elsif($char_opt=~/s/){ # when SIZE(length of seq) return only option is set
	   @seq_Names=keys %sequence;
	   for($i=0; $i<@seq_Names; $i++){
		  $Sizes{$seq_Names[$i]}=length($sequence{$seq_Names[$i]});
	   }
	   return(\%Sizes);
   }
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   # (5) when hash which has range info is given(@range should not be defined)
   #_____________________________________________________________________________
   elsif(@hash >=1){
	   for($h=0; $h< @hash; $h++){
		   my %hash=%{$hash[$h]};
		   my @Keys=keys %hash;
		   for($k=0; $k<@Keys; $k++){
			   if(defined($hash{$Keys[$k]})){
				  ($S_start, $S_end)=$hash{$Keys[$k]}=~/(\d+)\-(\d+)/;
				  $sequence{$Keys[$k]}=substr($sequence{$Keys[$k]}, ($S_start-1), ($S_end-$S_start));
			   }
		   }
	   }
	   return(\%sequence);
   }else{
	   return(\%sequence);
   }
}




#________________________________________________________________________
# Title     : msf_permute_hash_write
# Usage     : &msf_permute_hash_write(\%hash, $group_name); # void
# Function  : gets 2 references (one for %hash the other for group $name)
#             uses &msf_permute_array_write(\%hash, \$group_name)
#             the second arg is for output file name. can be anything.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub msf_permute_hash_write{ my(%hash)=%{$_[0]}; my($group_name)=${$_[1]};
	my(@array)= keys %hash;  &msf_permute_array_write (\@array, $group_name );
}

#________________________________________________________________________
# Title     : msf_permute_array_write
# Usage     : &msf_permu_array_write(\%hash, \$group_name); # void
# Function  :
#             the second arg is for output file name. can be anything.
#             used in &msf_permu_hash_write
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : gets 2 references
# Version   : 1.0
#--------------------------------------------------------------------
sub msf_permute_array_write{
  my(@array) =@{$_[0]};
  my($group_name)=${$_[1]};
  my($i,$j);
  for ($i=0;$i<@array;$i++) {
	 push(@result,$array[$i]);
	 if (@result>=2){
		  $count+=1;  # this is for the whole number of files(permutation) produced.
			for $key( @result){
			$input{$key}=$hash{$key};
		  }
		  $hash_ref_to_msfs_write=\%input;
					  #print %input;
					  @seq_names= (keys %input);
					  $seqno=($#seq_names) + 1;
					 #print $seqno;&n;
			$out_file_name="$group_name$count\-$seqno\.fas"; # output file name.
					  ###### eg. result file => perl1-2.fas, -2 means seq. no.
			&msf_write($hash_ref_to_msfs_write, $out_file_name);
	 }
		  %input=();
		  my(@input_array)=@array[$i+1..$#array];
		  &msf_permute_array_write(\@input_array) if $i<$#array;
			 #&msf_permu_array_write(@array[$i+1..$#array]) if $i<$#array;
		  pop(@result);  # <--this is essential.
  }
}

#________________________________________________________________________
# Title     : pir_permute_hash_write
# Usage     : &pir_permute_hash_write($hash_ref, $group_name); # void
# Function  : gets a reference of hash which has names and sequences as keys and values.
#             uses &pir_permute_array_write
#             the second arg is for output file name. can be anything.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pir_permute_hash_write{
  my(%hash);
  my($hash_ref)=$_[0];  my($group_name)=$_[1];
  %hash=%{$hash_ref};
  my(@array)= keys %hash;
  $array_ref1 = \@array;
  &pir_permute_array_write ($array_ref1, $group_name );
}

#________________________________________________________________________
# Title     : fasta_permute_hash_write
# Usage     : &fasta_permute_hash_write($hash_ref, $group_name); # void
# Function  : gets a reference of a hash which has names and sequences as keys and values.
#             uses &fasta_permute_array_write
#             the second arg is for output file name. can be anything.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fasta_permute_hash_write{ my(%hash)=%{$_[0]}; my($group_name)=${$_[1]};
  my(@array)= keys %hash; $array_ref1 = \@array;
  &fasta_permute_array_write (\@array, \$group_name );
}
#________________________________________________________________________
# Title     : fasta_permute_array_write
# Usage     : &fasta_permu_array_write($hash_ref, $group_name); # void
# Function  : gets a reference of an array which has names and sequences as keys and values.
#             the second arg is for output file name. can be anything.
#             used in &fasta_permu_hash_write
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fasta_permute_array_write{
  my(@array)=@{$_[0]};
  my($group_name)=${$_[1]};
  my($i,$j, @result);
  for ($i=0;$i<@array;$i++) {
	 push(@result,$array[$i]);
	 if (@result>=2){
		$count+=1; # this is for the whole number of files produced.
		for $key( @result){
		  $input{$key}=$hash{$key};
		}
		$hash_ref_to_fastas_write=\%input;
		#print %input;
		@seq_names= (keys %input);
		$seqno=($#seq_names) + 1;
		#print $seqno;&n;
		$out_file_name="$group_name$count\-$seqno\.fas"; # output file name.
		###### eg. result file => perl1-2.fas, -2 means seq. no.
		&fastas_write($hash_ref_to_fastas_write, $out_file_name);
	 }
	 %input=();
		my(@input_array)=@array[$i+1..$#array];
		my($array_ref3)=\@input_array;
		&fasta_permute_array_write($array_ref3) if $i<$#array;
		#&fasta_permu_array_write(@array[$i+1..$#array]) if $i<$#array;
		pop(@result);  # <--this is essential.
	 }
}
#________________________________________________________________________
# Title     : ssp_permute_hash_write
# Usage     : &ssp_permute_hash_write($hash_ref, $group_name); # void
# Function  : gets a reference of hash which has names and sequences as keys and values.
#             uses &ssp_permute_array_write
#             the second arg is for output file name. can be anything.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub ssp_permute_hash_write{
  my(%hash);
  my($hash_ref)=$_[0]; my($group_name)=$_[1];
  %hash=%{$hash_ref}; my(@array)= keys %hash;
  $array_ref1 = \@array; &ssp_permute_array_write ($array_ref1, $group_name );
}

#________________________________________________________________________
# Title     : pir_permute_array_write
# Usage     : &pir_permu_array_write($hash_ref, $group_name); # void
# Function  : gets a reference of hash which has names and sequences as keys and values.
#             the second arg is for output file name. can be anything.
#             used in &pir_permu_hash_write
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pir_permute_array_write{
		  my($array_ref1)=$_[0];
		  my($group_name)=$_[1];
		  my($i,$j);
		  my(@array)=@{$array_ref1};
		  for ($i=0;$i<@array;$i++) {
					 push(@result,$array[$i]);
					 if (@result>=2){
								 $count+=1; # this is for the whole number of files produced.
								 for $key( @result){
											$input{$key}=$hash{$key};
								 }
								 $hash_ref_to_pirs_write=\%input;
								 #print %input;
								 @seq_names= (keys %input);
								 $seqno=($#seq_names) + 1;
								 #print $seqno;&n;
								 $out_file_name="$group_name$count\-$seqno\.fas"; # output file name.
											###### eg. result file => perl1-2.fas, -2 means seq. no.
								 &pir_write($hash_ref_to_pirs_write, $out_file_name);
					 }
					 %input=();
					 my(@input_array)=@array[$i+1..$#array];
					 my($array_ref3)=\@input_array;
					 &pir_permute_array_write($array_ref3) if $i<$#array;
						#&pir_permu_array_write(@array[$i+1..$#array]) if $i<$#array;
					 pop(@result);  # <--this is essential.
		  }
}
#________________________________________________________________________
# Title     : ssp_permute_array_write
# Usage     : &ssp_permu_array_write($hash_ref, $group_name); # void
# Function  : gets a reference of hash which has names and sequences as keys and values.
#             the second arg is for output file name. can be anything.
#             used in &ssp_permu_hash_write
#             ssp file is for PHD secondary structure prediction service.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub ssp_permute_array_write{
	 my($array_ref1)=$_[0];
	 my($group_name)=$_[1];
	 my($i,$j);
	 my(@array)=@{$array_ref1};
	 for ($i=0;$i<@array;$i++) {
		  push(@result,$array[$i]);
		  if (@result>=2){
				 $count+=1; # this is for the whole number of files produced.
				 for $key( @result){
							$input{$key}=$hash{$key};
				 }
				 $hash_ref_to_ssp_write=\%input;
				 #print %input;
				 @seq_names= (keys %input);
				 $seqno=($#seq_names) + 1;
				 #print $seqno;&n;
				 $out_file_name="$group_name$count\-$seqno\.fas"; # output file name.
							###### eg. result file => perl1-2.fas, -2 means seq. no.
				 &ssp_write($hash_ref_to_ssp_write, $out_file_name);
		  }
		  %input=();
		  my(@input_array)=@array[$i+1..$#array];
		  my($array_ref3)=\@input_array;
		  &ssp_permute_array_write($array_ref3) if $i<$#array;
			 #&ssp_permute_array_write(@array[$i+1..$#array]) if $i<$#array;
		  pop(@result);  # <--this is essential.
	 }
}

#________________________________________________________________________
# Title     : permute
# Usage     : &permute(\@array);
# Function  : gets permutated array elements except single char elements.
#             fastest
# Example   :
# Warning   : from : Kenneth Albanowski <kjahds@kjahds.com> CIS: 70705,126)
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub permute{
	my(@array)= @{$_[0]};
	my($i,$j);
	for ($i=0;$i<@array;$i++){ push(@result,$array[$i]);
		print join(" ",@result),"\n" if @result>=2;
		&permute(@array[$i+1..$#array]) if $i<$#array;
		pop(@result); }
}
#________________________________________________________________________
# Title     : permute_binary
# Usage     :
# Function  : outs permutated array elements
# Example   : &permute_binary(@array);
# Warning   : from : silly@ugcs.caltech.edu
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub permute_binary{  my(@a)=@_;
  for ($i = 1; $i < (1 << ($#a + 1)); $i++) {
  for ($tmp = $i, $bit = 0; $tmp; ($tmp >>= 1), $bit++) {
	 if ($tmp & 1) { print "@a[$bit]"; push (@result, @a[$bit]);}}
  print "\n"; }
}

#________________________________________________________________________
# Title     : ssp_write
# Usage     : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Function  : writes multiple seqs. in fasta format (takes one or more than one seq.!!)
#             ssp is PHD server format.
# Example   : &ssp_write($hash_pointer, $out_file_name);
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub ssp_write{
			my($hash_ref1, $output_file)=@_;	# $name is the name of seq.
			my(%input)=%{$hash_ref1};
			open (SSP_WRITE,">$output_file");		# $string is the seq string.
			for $name(keys %input){
						my($ll)=0;								# $output_file is the name of output.
						$string=$input{$name};
						$string=~s/[ .-]//g;		# replaces all non-chars to null.
						$string=~tr/a-z/A-Z/;    # converts lower to upper cases.
						print SSP_WRITE "# $name.ssp\n";			# this writes only one seq. in one file.
						$ls2=length($string);					# use fasta_append for mul. lines of seq
						for($i=0; $i<$ls2; $i++){
								  $char=substr($string,$i,1);
								  print SSP_WRITE $char;
								  $ll++;
								  if($ll == 60){                # wraps at 60 char position.
											 $ll=0;
											 print SSP_WRITE "\n";
								  }
						}
						print SSP_WRITE "\n";
			}
}
#________________________________________________________________________
# Title     : pir_write
# Usage     : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Function  : writes multiple seqs. in fasta format (takes one or more than one seq.!!)
#             pir is PHD server format.
# Example   : &pir_write($hash_pointer, $out_file_name);
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pir_write{
  my($hash_ref1, $output_file)=@_;	# $name is the name of seq.
  my(%input)=%{$hash_ref1};
  my(@names)=keys %input; my($name);
  open (pir_WRITE,">$output_file");		# $string is the seq string.
  print pir_WRITE "# PIR list  @names\n"; # This is !! essential for PHD !!!
  for $name(keys %input){
	 my($ll)=0;								# $output_file is the name of output.
	 $string=$input{$name};
	 $string=~s/[ .-]//g;		# replaces all non-chars to null.
	 $string=~tr/a-z/A-Z/;    # converts lower to upper cases.
	 print pir_WRITE ">P1;\n";
	 print pir_WRITE "$name\n";	# this writes only one seq. in one file.
	 $ls2=length($string);					# use fasta_append for mul. lines of seq
	 for($i=0; $i<$ls2; $i++){
		$char=substr($string,$i,1);
		print pir_WRITE $char;
		$ll++;
		if($ll == 60){                # wraps at 60 char position.
		  $ll=0;
		  print pir_WRITE "\n";
		}
	 }
	 print pir_WRITE "\n";
  }
}
#________________________________________________________________________
# Title     : msf_write
# Usage     : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Function  : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Example   : &msf_write(\%hash, \$out_file_name);
#             - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#             PileUp
#
#
#             MSF: 1205  Type: P    Check:  9937   ..
#
# Keywords  :
# Options   :
# Version   : 1.0
#--------------------------------------------------------------------
sub msf_write{
	$| =1;
	my(%input)=%{$_[0]};
	my($output_file)=${$_[1]};
	my($string);
	open (msf_WRITE,">$output_file");		# $string is the seq string.

	print msf_WRITE '   MSF: 1205  Type: P    Check:  9937   .. '; ## This is dummy
	print msf_WRITE "\n\n";

	my(@names) = keys %input;
	my($larg)  = length($input{$names[0]});

	for $name (keys %input){
	  $len = length($input{$name});
	  printf msf_WRITE (" Name: %-15s oo  Len: %-5s Check:  9999  Weight:  1.00\n", $name, $len);
	}
	print msf_WRITE "\n";
	print msf_WRITE "\/\/\n\n\n\n";

###################################################
####             MSF file form           ##########
###################################################
format msf_WRITE =
@<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$names,         $seq
.
  for ($k=0; $k < $larg; $k+=50){    # 50 residues interval
	 for($i=0; $i < @names; $i++){    # number of sequences
		$names = $names[$i];
		$input{$names[$i]}=~ s/\n//g;
		$seq = substr($input{$names[$i]}, $k, 50);
		$seq = put_gaps_every_x_position_in_string($seq, 10, ' ');
		select (msf_WRITE); ## to print out to a FILE
		#$~='msf_WRITE';
		write msf_WRITE;
	 }
	 print "\n";                       # next block starts.
  }
}

#________________________________________________________________________
# Title     : pir_write
# Usage     : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Function  : writes multiple seqs. in fasta format (takes one or more than one seq.!!)
# Example   : &pir_write($hash_pointer, $out_file_name);
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pir_write{
   my($hash_ref1, $output_file)=@_;    # $name is the name of seq.
   my(%input)=%{$hash_ref1};
   open (pir_WRITE,">$output_file");       # $string is the seq string.
   for $name(keys %input){
	   my($ll)=0;                              # $output_file is the name of output.
	   $string=$input{$name};
	   $string=~s/[ .-]//g;        # replaces all non-chars to null.
	   $string=~tr/a-z/A-Z/;    # converts lower to upper cases.
	   print pir_WRITE ">P1;\n";           # this writes only one seq. in one file.
	   print pir_WRITE "$name\n";
	   $ls2=length($string);                   #
	   for($i=0; $i<$ls2; $i++){
		   $char=substr($string,$i,1);
		   print pir_WRITE $char;
		   $ll++;
		   if($ll == 60){                # wraps at 60 char position.
			  $ll=0;
			  print pir_WRITE "\n";
		   }
	   }
	   print pir_WRITE "\n";
   }
}
#__________________________________________________________________
# Title     : write_seq_files
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub write_seq_files{
  #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
  my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
  my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
  my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
  my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
  my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
  if($debug==1){print "\n\t\@hash=\"@hash\"
  \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
  \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($string, $na,$out_file_name_provided);
  my($output_file) ='default_out.seq'; ### when no output file name is given, this is used
  if(@file>0){
	$output_file = $file[0];
	$out_file_name_provided=1;
  }else{ $output_file='default_out.seq'; }

  for ($n=0 ; $n < @hash; $n ++){
	 my %hash=%{$hash[$n]};
	 my @keys=keys %hash;
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # When only one seq is given, use the seq name as output file
	 #________________________________________________________________
	 if(@hash==1 and @keys==1 and @file < 1){
	    $output_file="$keys[0]\.seq";
	 }elsif(@file < 1){
	    $output_file="default_fa_$n\.seq";
	 }

	 open (SEQ_WRITE, ">$output_file");      # $string is the seq string.

	 for ($i=0; $i < @keys; $i++){
		$na= $keys[$i];
		$string = "\U$hash{$na}";
		$string=~s/[\n \.-]//g;     # replaces all non-chars to null. '_' is used for stop codon
		$seq_leng=length($string);
		if($debug == 1){
			printf ("%-15s %-5s %s", $na, $seq_leng, $string);
			printf SEQ_WRITE ("%-15s %-5s %s", $na, $seq_leng, $string);
	    }elsif($char_opt=~/v/){
			printf ("%-15s %-5s %s", $na, $seq_leng, $string);
			printf SEQ_WRITE ("%-15s %-5s %s", $na, $seq_leng, $string);
		}else{
			printf SEQ_WRITE ("%-15s %-5s %s", $na, $seq_leng, $string);
		}
		print SEQ_WRITE "\n";
	 }
  }
  close SEQ_WRITE;
  if( $out_file_name_provided != 1){
	  print "\n\n# You didnt give out file name, $output_file  used\n";
  }
  if( -s $output_file){
	 print "\n# Sequences were written in  $output_file ";
  }else{
	 print "\n# The size of written outfile \"$output_file\" is 0, error \n\n" }
}

#__________________________________________________________________________
# Title     : make_singlet_list_from_pdb_entries
# Usage     : &write_pdbs_files(\@files);
# Function  : gets the classificaiton of scop in pdb40d.fa like file and
#             produces pdb40d.pdbs file.
#
#             1.1.1.1.4  means: Class.Fold.Superfamily.Family.Protein
#
#             Compare with make_groups_from_pdb_entries
# Example   : &make_singlet_list_from_pdb_entries(\@files);
# Input>
#   >d2sn3__ 7.3.6.1.1 scorpion toxin [Centruroides sculpturatus ewing, variant 3]
#   KEGYLVKKSDGCKYGCLKLGENEGCDTECKAKNQGGSYGYCYAFACWCEGLPESTPTYPL
#
# OUTPUT>
#   >d2cmd_1 3.18.1.5.2 (1-145) Malate dehydrogenase [Escherichia coli]
#   >d2naca2 3.18.1.4.1 (148-335) Formate dehydrogenase [Pseudomonas sp. 101]
#
# Keywords  : make_singlet_list_from_pdb40d, make_singlet_list_from_scop, make_superfamilies
#             write_pdbs_files, make_pdbs_files, make_pdb_group_files,
#             write_pdbs, make_singlet_list_from_pdb_entries
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub make_singlet_list_from_pdb_entries{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	for($i=0; $i< @file; $i++){
	   my(%superfamily_singlets, $seq, $classification, $class, $fold, $superfamily,
	      $family, $protein, $description, $group_num_counter, @keys, @values,
		  $all_seq_in_input_classification, $num_of_singlets);

	   $base=${&get_base_names($file[$i])};
	   $out_pdbs_file="$base\.pdbs";
	   open(PDB_SEQ_FILE, "$file[$i]");
	   while(<PDB_SEQ_FILE>){  # matching  1.1.1.1.4  <-- classification,
		  if(/\> {0,3}(\S+) +((\d+)\.(\d+)\.(\d+)\.(\d+)\.(\d+)) +(.+)/){
			 $seq=$1;
			 $classification=$2;
			 $class = $3;
			 $fold  = $4;
			 $superfamily=$5;
			 $family=$6;
			 $protein=$7;
			 $description=$8;
			 $superfamily_singlets{"$class\.$fold\.$superfamily"}.="\>$seq $classification $description\n";
			 $superfamily_member_count{"$class\.$fold\.$superfamily"}++;
			 $all_seq_in_input_classification++;
		  }
	   }
	   close PDB_SEQ_FILE;
	   open(PDBS, ">$out_pdbs_file");
	   @keys= keys %superfamily_singlets;
	   for($j=0; $j < @keys; $j++){
	      if($superfamily_member_count{$keys[$j]} > 1){
			  $num_of_groups ++;
	          next; ## skipping singlets
	      }else{
		      print PDBS $superfamily_singlets{$keys[$j]}, "\n";
			  $singlet_num_counter++;
		  }
	   }
	   my $num_of_seq_in_group=$all_seq_in_input_classification-$singlet_num_counter;

	   print "\n# No of Groups                : $num_of_seq_in_group";
	   print "\n# No of All seq in the input  : $all_seq_in_input_classification";
	   print "\n# No of singlets              : $singlet_num_counter";
	   print "\n# No of seq in the $num_of_groups groups : $num_of_seq_in_group";
	   print PDBS "\n# No of Groups: $num_of_groups";
	   print PDBS "\n# No of All seq in the input: $all_seq_in_input_classification";
	   print PDBS "\n# No of singlets: $singlet_num_counter";
	   print PDBS "\n# No of seq in the $num_of_groups groups: $num_of_seq_in_group";
	   close PDBS;
	   print "\n# $out_pdbs_file has been written \n\n";
	}
}


#__________________________________________________________________________
# Title     : write_pdbg_files
# Usage     : &write_pdbg_files(\@files);
# Function  : gets the classificaiton of scop in pdb40d.fa like file and
#             produces pdb40d.pdbg file.
#
#             1.1.1.1.4  means: Class.Fold.Superfamily.Family.Protein
#
# Example   :
# Input>
#   >d2sn3__ 7.3.6.1.1 scorpion toxin [Centruroides sculpturatus ewing, variant 3]
#   KEGYLVKKSDGCKYGCLKLGENEGCDTECKAKNQGGSYGYCYAFACWCEGLPESTPTYPL
#
# OUTPUT>
#   >d2cmd_1 3.18.1.5.2 (1-145) Malate dehydrogenase [Escherichia coli]
#   >d2naca2 3.18.1.4.1 (148-335) Formate dehydrogenase [Pseudomonas sp. 101]
#
# Keywords  : make_groups_from_pdb40d, make_groups_from_scop, make_superfamilies
#             write_pdbg_files, make_pdbg_files, make_pdb_group_files,
#             write_pdbg, make_groups_from_pdb_entries
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub write_pdbg_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	for($i=0; $i< @file; $i++){
	   my(%superfamily_groups, $seq, $classification, $class, $fold, $superfamily,
	      $family, $protein, $description, $group_num_counter, @keys, @values,
		  $all_seq_in_input_classification, $num_of_singlets);

	   $base=${&get_base_names($file[$i])};
	   $out_pdbg_file="$base\.pdbg";
	   open(PDB_SEQ_FILE, "$file[$i]");
	   while(<PDB_SEQ_FILE>){  # matching  1.1.1.1.4  <-- classification,
		  if(/\> {0,3}(\S+) +((\d+)\.(\d+)\.(\d+)\.(\d+)\.(\d+)) +(.+)/){
			 $seq=$1;
			 $classification=$2;
			 $class = $3;
			 $fold  = $4;
			 $superfamily=$5;
			 $family=$6;
			 $protein=$7;
			 $description=$8;
			 $superfamily_groups{"$class\.$fold\.$superfamily"}.="\>$seq $classification $description\n";
			 $superfamily_member_count{"$class\.$fold\.$superfamily"}++;
			 $all_seq_in_input_classification++;
		  }
	   }
	   close PDB_SEQ_FILE;
	   open(PDBG, ">$out_pdbg_file");
	   @keys= keys %superfamily_groups;
	   for($j=0; $j < @keys; $j++){
	      if($superfamily_member_count{$keys[$j]} < 2){
			  $num_of_singlets ++;
	          next; ## skipping singlets
	      }else{
		      print PDBG $superfamily_groups{$keys[$j]}, "\n";
			  $group_num_counter++;
		  }
	   }
	   my $num_of_seq_in_group=$all_seq_in_input_classification-$num_of_singlets;

	   print "\n# No of Groups                : $group_num_counter";
	   print "\n# No of All seq in the input  : $all_seq_in_input_classification";
	   print "\n# No of singlets              : $num_of_singlets";
	   print "\n# No of seq in the $group_num_counter groups : $num_of_seq_in_group";
	   print PDBG "\n# No of Groups: $group_num_counter";
	   print PDBG "\n# No of All seq in the input: $all_seq_in_input_classification";
	   print PDBG "\n# No of singlets: $num_of_singlets";
	   print PDBG "\n# No of seq in the $group_num_counter groups: $num_of_seq_in_group";
	   close PDBG;
	   print "\n# $out_pdbg_file has been written by $0\n\n";
	}
}

#__________________________________________________________________________
# Title     : write_msp3_files
# Usage     : &write_msp3_files(\@files);
# Function  : opens two files. Gx.msp_1 and Gx.msp_2 to create Gx.msp3 file
#              you can set the msp3 file extension by e= option,
#              for example, e=interm will make  G1.interm instead of G1.msp3
#
# Example   : &write_msp3_files(\@files);  # while @files has G*.pdbg
# Keywords  :
# Options   :
#  $upper_expect_limit2= by u2=  # u2 is for msp_2 files (eg, 0.0006)
#  $upper_expect_limit1= by u1=  # u1 is for msp_1 files (eg, 0.081 )
#  $lower_expect_limit1= by l1=
#  $lower_expect_limit2= by l2=
#  R for NOT adding ranges in seq names.
#  e= for  extension name
#  n  for  no sort by columns in output
#
# Returns   : returns the names of msp3 files
# Version   : 1.7
#----------------------------------------------------------------------------
sub write_msp3_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $upper_expect_limit1=5  ;
	my $upper_expect_limit2=5;   # default
	my $lower_expect_limit1=0  ;
	my $lower_expect_limit2=0;   # default
	my $msp3_extension = 'msp3'; # default
	my $no_sort='n' if $char_opt=~/n/;

	if($vars{'u2'}=~/\S+/){ $upper_expect_limit2 = $vars{'u2'}      };
	if($vars{'u1'}=~/\S+/){ $upper_expect_limit1 = $vars{'u1'}      };
	if($vars{'l2'}=~/\S+/){ $lower_expect_limit2 = $vars{'l2'}      };
	if($vars{'l1'}=~/\S+/){ $lower_expect_limit1 = $vars{'l1'}      };
	if($vars{'e'} =~/\S+/){ $msp3_extension      = $vars{'e'}       };
	if($char_opt=~/R/){     $Dont_add_range      = 'R'              }

	for($i=0; $i< @file; $i++){
	   my ($base, $msp1,  $msp2, %msp2_hash, %final_hash, $j, $k, @msp_2,
		  $msp2_match, $msp3, $enq, $value1, @values, $evalue, $score,
		  $size_of_msp3_file);

	   $base=${&get_base_names($file[$i])};

	   if(-s "$base\.msp_1"){
		  $msp1="$base\.msp_1";
	   }elsif( -s "./MSP_1/$base\.msp_1"){
		  $msp1="./MSP_1/$base\.msp_1";
	   }elsif( -s "../$base\.msp_1"){ ## when xxxx.msp_1 files are in up dir
		  $msp1="../$base\.msp_1";
	   }
	   if(-s "$base\.msp_2"){
		  $msp2="$base\.msp_2";
	   }elsif( -s "./MSP_2/$base\.msp_2"){
		  $msp2="./MSP_2/$base\.msp_2";
	   }elsif( -s "../$base\.msp_2"){   ## when xxxx.msp_2 files are in up dir
	      $msp2="../$base\.msp_2";
	   }

	   $msp3="$base\.$msp3_extension"; #<--- Exension addition

	   print "\n# $msp1 and $msp2 and $msp3\n";
	   open(MSP1, "$msp1") || open(MSP1, "./MSP_1/$msp1");
	   open(MSP2, "$msp2") || open(MSP2, "./MSP_2/$msp2");

	   print "\n# I am opening $msp2\n";
	   while(<MSP2>){
		  if(/^ *(\S+) +(\S+) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
			  if($2 > $upper_expect_limit2 or
			     $2 < $lower_expect_limit2){ next ; }

			  $enq="$5";
			  $evalue=$2;
			  $score=$1;
			  if($Dont_add_range=~/R/){     $msp2_match="$8";
			  }else{                       $msp2_match="$8\_$6\-$7";
			  }
			  $value1=sprintf("%-25s %-4s %-10s %-25s\n", $enq, $score, $evalue, $msp2_match);
			  $msp2_hash{$enq} .="$value1";
		  }
	   }

	   print scalar keys %msp2_hash,
	         "<- Num of interm. in msp2.  Evalue cut \$upper_expect_limit2: $upper_expect_limit2 \n";

	   print "\n# I am opening $msp1\n";
	   while(<MSP1>){
		  if(/^ *(\S+) +(\S+) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
			  if($2 >  $upper_expect_limit1  or
			     $2 <  $lower_expect_limit1){ next ; }
			  $range_start1=$3;
			  $range_stop1 =$4;
			  $range_start2=$6;
			  $range_stop2 =$7;
			  $match=$8;
			  $enq=$5;
			  $evalue=$2;
			  $score=$1;
			  if($enq=~/\S+\_\d+\-\d+/){
			  }else{
				  $enq="$enq\_$range_start1\-$range_stop1";
			  }
			  if($Dont_add_range=~/R/){     $msp2_match="$match";
			  }else{
				  unless($match =~/\S+\_\d+\-\d+/){
						$msp2_match="$match\_$range_start2\-$range_stop2";
				  }else{
				        $msp2_match=$match;
				  }
			  }

			  @msp_2=split(/\n/, $msp2_hash{$msp2_match});

			  for($j=0; $j < @msp_2; $j++){
				  $value1=sprintf("%-25s %-4s %-10s %-25s", $enq, $score, $evalue, $msp_2[$j]);
				  $final_hash{"$msp_2[$j]"}="$value1";
			  }
		  }
	   }

	   open(MSP3, ">$msp3");
	   @values= values %final_hash;
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # following sorts by the first column name, 4th colomn name, 2 column E value etc
	   #___________________________________________________________________________
	   unless($no_sort){
	       @values= map {$_->[0]} sort { $a->[1] cmp $b->[1] or $a->[4] cmp $b->[4] or $a->[2] <=> $b->[2] or $a->[3] <=> $b->[3]}
		             map { ($_=~/^(\S+)_\d+\-\d+ +\d+ +(\S+) +\S+ +\d+ +(\S+) +(\S+)_\d+\-\d+/); [$_,$1,$2,$3,$4] } @values;
	   }

	   for($k=0; $k< @values; $k++){
		  #print "\n$values[$k]";
		  print MSP3  "\n$values[$k]";
	   }
	   print MSP3 "\n";
	   close MSP3;
	   $size_of_msp3_file=-s $msp3;
	   if($size_of_msp3_file > 50){ print "\n# size of $msp3 is: $size_of_msp3_file, SUCCESS?\n"; }
	   push(@msp3_file_names, $msp3);
	}
	if(@msp3_file_names > 1){
	   return(\@msp3_file_names);
	}else{
	   return($msp3_file_names[0]);
	}
}







#________________________________________________________________________
# Title     : write_fasta
# Usage     : many argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
# Function  : writes multiple seqs. in fasta format (takes one or more seq.!!)
#             This needs hash which have 'name' 'actual sequence as value'
#
#             To print out each fasta seq into each single file, use write_fasta_seq_by_seq
#             This can rename seq names
#
# Example   : &write_fasta(\%in1, \$out_file_name, \%in2, \%in3,..., );
#             << The order of the hash and scalar ref. doesn't matter. >>
# Warning   : The default output file name is 'default_out.fa' if you do not
#             specify output file name.
#             OUTput file should have xxxxx.fa or xxxx.any_ext NOT just 'xxxxx'
# Keywords  : write_fasta_file, print_fasta_file, write fasta file, fasta_write
#             show_fasta
# Options   : v for STD out.
#             r for rename the sequences so that Clustalw would not complain with 10 char limit
#               so result wuld be:  0 ->ASDFASDF, 1->ASDFASFASF, 2->ADSFASDFA
# Returns   :
# Argument  :
# Version   : 2.3
#--------------------------------------------------------------------
sub write_fasta{
  #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
  my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
  my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
  my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
  my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
  my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
  if($debug==1){print "\n\t\@hash=\"@hash\"
  \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
  \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  $| = 1;

  my($string, $string_leng, $na,$out_file_name_provided);
  my($output_file) ='default_out.fa'; ### when no output file name is given, this is used
  if(@file>0){
	$output_file = $file[0];
	$out_file_name_provided=1;
  }else{ $output_file='default_out.fa'; }

  for ($n=0 ; $n < @hash; $n ++){
	 my %hash=%{$hash[$n]};
	 my @keys=keys %hash;
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # When only one seq is given, use the seq name as output file
	 #________________________________________________________________
	 if(@hash==1 and @keys==1 and @file < 1){
	    $output_file="$keys[0]\.fa";
	 }elsif(@file < 1){
	    $output_file="default_fa_$n\.fa";
	 }

	 open (FASTAS_WRITE,">$output_file");      # $string is the seq string.

	 for ($i=0; $i < @keys; $i++){
		$na= $keys[$i];
		$string = "\U$hash{$na}";
		$string=~s/[\n \.-]//g;	    # replaces all non-chars to null. '_' is used for stop codon
		if($char_opt=~/r/){  # rename the seqeunces with '0, 1, 2, 3," etc for  clustalw
		   $na=$i;
		}

		if($debug == 1){
			print ">$na\n";
			print FASTAS_WRITE ">$na\n";
	    }elsif($char_opt=~/v/){
		    print ">$na\n";
		    print FASTAS_WRITE ">$na\n";
		}else{
		    print  FASTAS_WRITE ">$na\n";
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		#  Main algorithm of writing in 60 char leng line
		#_____________________________________________________
		$string_leng=length($string);
		for($j=0; $j< $string_leng; $j+=60){
			if($debug == 1){
				printf "%.60s\n", substr($string,$j,60);
				printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
			}elsif($char_opt=~/v/i){
				printf "%.60s\n", substr($string,$j,60);
				printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
			}else{
			   printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
			}
		}
	 }
	 close FASTAS_WRITE;
  }
  if( $out_file_name_provided != 1){
	  print "\n\n# You didnt give out file name, $output_file  used\n";
  }
  if( -s $output_file ){
	 if($verbose=~/\S/){ ## if v option is given, mesg is omitted to prevent comments to a redirected output
	    print "\n# Sequences were written in  $output_file ";
	 }
  }else{
	 print "\n# The size of written outfile \"$output_file\" is 0, error \n\n"
  }
}

#________________________________________________________________________
# Title     : write_fasta_seq_by_seq
# Usage     : &write_fasta_seq_by_seq(\%hash, [$extension], [\$output_filename]);
# Function  : accepts one hash of multiple sequences and writes many files
#             of single sequences by using the names as file names.
#             If $extension is provided, it writes an output as in
#             the below example (seq1_sc.fasta). If not, it just attach
#             'fa' to files.
#             This needs, hash of 'name', 'actual sequence as value'
# Example   : with >xxxx
#                  ASDFASDFASDFASDFASDFASDFASDF
#                  >yyyy
#                  ASDFASDFASDFASDFASDFASDFSDAFSD
#
#             You will get two files (xxxx.fa, yyyy.fa)
# Keywords  : write_each_fasta, write_single_fasta, write_fasta_single
#             single_fasta_write, write_fasta_files_seq_by_seq,
#             write_single_fasta_files,
# Options   : can specify extension name.
#             e  for checking fasta file exists or not and skipps if so
#             r for rename the sequences so that Clustalw would not complain with 10 char limit
#               so result wuld be:  0 ->ASDFASDF, 1->ASDFASFASF, 2->ADSFASDFA
# Returns   : nothing. default OUTPUT file name is '$key.fa' !!
# Version   : 1.9
#--------------------------------------------------------------------
sub write_fasta_seq_by_seq{
	 my ($i, $exists_opt, $rename_seq_opt, $out_file_name_given);
	 for($i=0; $i< @_; $i++){
		if($_[$i]=~/e$/){
		   $exists_opt=1;
		   splice(@_, $i, 1);
		   $i--;
		}elsif($_[$i]=~/r$/){
		   $rename_seq_opt='r';
		   splice(@_, $i, 1);
		   $i--;
		}elsif( $_[$i] =~/\.fa/ or -e $_[$i] ){
		   $out_file_name_given=1;
		   $out_file_name = $_[$i];
		   splice(@_, $i, 1);
		   $i--;
		}elsif( ref ($_[$i]) eq 'SCALAR'){
		   if( ${$_[$i]} =~/\.fa/ or -e ${$_[$i]} ){
		      $out_file_name_given=1;
		      $output_file=${$_[$i]};
		      splice(@_, $i, 1);
		      $i--;
		   }
		}
	 }
	 my(%temp_hash, $key, $output_file);
	 my(%input)     =%{$_[0]};
	 my($extension) =${$_[1]} || $_[1];
	 for $key (keys %input){
		my %temp_hash=();
		$temp_hash{$key}=$input{$key};
		if (($key=~ /\_$extension$/)||($#_ == 0)){
			$output_file = "$key\.fa";
		}else{
			$output_file = "$key\_$extension\.fa";
		}
		if( ($exists_opt==1)&&(-e $output_file)){
		   print "\n# write_fasta_seq_by_seq: File $output_file exists, NO write\n";
		}elsif( $out_file_name_given == 1){
		   &write_fasta(\%temp_hash, \$output_file, $rename_seq_opt);
		}else{
		   &write_fasta(\%temp_hash, \$output_file, $rename_seq_opt);
		}
	 }
}




#________________________________________________________________________
# Title     : show_in_fasta
# Usage     : &show_hash_in_fasta(\%in1, \%in2, \%in3, .... );
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Function  : shows multiple seqs. in fasta format (takes one or more seq.!!)
# Example   : &show_in_fasta(\%hash);
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub show_in_fasta{ my($k, $name, $string);
  for ($k=0; $k<=$#_; $k++){
	 my(%input) = %{$_[$k]};
	 for $name(keys %input){
		my($ll)=0;								# $output_file is the name of output.
		$string=$input{$name};
		$string=~s/\n//g;
		$string=~tr/a-z/A-Z/;   # converts lower to upper cases.
		print ">$name\n";	      # this writes only one seq. in one file.
		for($i=0; $i<length($string); $i+=60){
		  printf "%.60s\n", substr($string,$i,60);
		}
		print "\n";
	 }
  }
}



#________________________________________________________________________
# Title     : One_To_Three_Letter  ( amino acid code change)
# Usage     : %one_letter  = %{&One_To_Three_Letter};   # takes no arguments (void).
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub One_To_Three_Letter{  my(%one_letter) =
  ( "A", "Ala",  "C", "Cys",  "D", "Asp",  "E", "Glu",  "F", "Phe",
	 "G", "Gly",  "H", "His",  "I", "Ile",  "K", "Lys",  "L", "Leu",
	 "M", "Met",  "N", "Asn",  "P", "Pro",  "Q", "Gln",  "R", "Arg",
	 "S", "Ser",  "T", "Thr",  "V", "Val",  "W", "Trp",  "Y", "Tyr"   );
  return(\%one_letter);
}
#________________________________________________________________________
# Title     : ONE_TO_THREE_LETTER  ( amino acid code change)
# Usage     : %one_letter  = %{&ONE_TO_THREE_LETTER };   # takes no arguments (void).
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub ONE_TO_THREE_LETTER{  my(%one_letter) = (
	 "A", "ALA",  "C", "CYS",  "D", "ASP",  "E", "GLU",  "F", "PHE",
	 "G", "GLY",  "H", "HIS",  "I", "ILE",  "K", "LYS",  "L", "LEU",
	 "M", "MET",  "N", "ASN",  "P", "PRO",  "Q", "GLN",  "R", "ARG",
	 "S", "SER",  "T", "THR",  "V", "VAL",  "W", "TRP",  "Y", "TYR"   );
  return(\%one_letter);
}
#________________________________________________________________________
# Title     : one_to_three_letter  ( amino acid code change)
# Usage     : %one_letter  = %{&one_to_three_letter};   # takes no arguments (void).
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub one_to_three_letter{  my(%one_letter) = (
	 "a", "ala",  "c", "cys",  "d", "asp",  "e", "glu",  "f", "phe",
	 "g", "gly",  "h", "his",  "i", "ile",  "k", "lys",  "l", "leu",
	 "m", "met",  "n", "asn",  "p", "pro",  "q", "gln",  "r", "arg",
	 "s", "ser",  "t", "thr",  "v", "val",  "w", "trp",  "y", "tyr"   );
  return(\%one_letter);
}
#________________________________________________________________________
# Title     : THREE_TO_ONE_LETTER  ( amino acid code change)
# Usage     : %one_letter  = %{&THREE_TO_ONE_LETTER};   # takes no arguments (void).
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub THREE_TO_ONE_LETTER{  my(%AA);
	 $AA{"ALA"} = "A";  $AA{"MET"} = "M";  $AA{"ASP"} = "D";  $AA{"PRO"} = "P";
	 $AA{"CYS"} = "C";  $AA{"ASN"} = "N";  $AA{"GLU"} = "E";  $AA{"GLN"} = "Q";
	 $AA{"PHE"} = "F";  $AA{"ARG"} = "R";  $AA{"GLY"} = "G";  $AA{"SER"} = "S";
	 $AA{"HIS"} = "H";  $AA{"THR"} = "T";  $AA{"ILE"} = "I";  $AA{"VAL"} = "V";
	 $AA{"LYS"} = "K";  $AA{"TRP"} = "W";  $AA{"LEU"} = "L";  $AA{"TYR"} = "Y";
	return(%AA);
}
#________________________________________________________________________
# Title     : three_to_one_letter  ( amino acid code change)
# Usage     : %three_letter  = &three_to_one_letter ;   # takes no arguments (void).
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub three_to_one_letter{  my(%aa);
	 $aa{"ala"} = "a";  $aa{"met"} = "m";  $aa{"asp"} = "d";  $aa{"pro"} = "p";
	 $aa{"cys"} = "c";  $aa{"asn"} = "n";  $aa{"glu"} = "e";  $aa{"gln"} = "q";
	 $aa{"phe"} = "f";  $aa{"arg"} = "r";  $aa{"gly"} = "g";  $aa{"ser"} = "s";
	 $aa{"his"} = "h";  $aa{"thr"} = "t";  $aa{"ile"} = "i";  $aa{"val"} = "v";
	 $aa{"lys"} = "k";  $aa{"trp"} = "w";  $aa{"leu"} = "l";  $aa{"tyr"} = "y";
  return(\%aa);
}
#________________________________________________________________________
# Title     : Three_To_One_Letter  ( amino acid code change)
# Usage     : %Aa  = &Three_To_One_Letter ;   # takes no arguments (void).
#              $Aa('Ala');  will return 'A'
# Function  : a hash of one letter to 3 letter amino acid code , returns a hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub Three_To_One_Letter{ my(%Aa);
  $aa{"Ala"} = "A";  $aa{"Met"} = "M";  $aa{"Cys"} = "C";  $aa{"Asn"} = "N";
  $aa{"Asp"} = "C";  $aa{"Pro"} = "P";  $aa{"Glu"} = "E";  $aa{"Gln"} = "Q";
  $aa{"Phe"} = "F";  $aa{"Arg"} = "R";  $aa{"Gly"} = "G";  $aa{"Ser"} = "S";
  $aa{"His"} = "H";  $aa{"Thr"} = "T";  $aa{"Ile"} = "I";  $aa{"Val"} = "V";
  $aa{"Lys"} = "K";  $aa{"Trp"} = "W";  $aa{"Leu"} = "L";  $aa{"Tyr"} = "Y";
  return(\%Aa);
}


#________________________________________________________________________
# Title     : amino_acid_compos_id_percent
# Usage     : $percent = &amino_acid_compos_id_percent (%any_hash_with_sequences);
#             The way identity(composition) is derived is;
#
# Function  : gets amino acid composition identity of any given
#             number of sequences(at least 2).
# Example   :
# Warning   :
# Keywords  : get_amino_acid_composition, get_protein_composition, composition
# Options   :
# Argument  : hash of at least 2 sequences.
# Version   : 1.1
#--------------------------------------------------------------------
sub amino_acid_compos_id_percent{
  my(%input)= %{$_[0]};
  my(@names)=keys (%input);
  my(@temp, $i, $j, $iden, @all_pairs_id, $iden_sum);
  my(%compos_table1, %compos_table2, $final_iden, $larger);
  for ($i=0; $i < @names; $i ++){  # putting seqs in arrays.
	 $input{$names[$i]}=~ tr/a-z/A-Z/;
	 $input{$names[$i]}=~ s/\W//g;
	 @{"string$i"}= split('', $input{$names[$i]});
	 $larger = @{"string$i"} if @{"string$i"} > $larger;
  }
  for ($i=0; $i < @names; $i++){   # to make permutated pairs.
	 for ($j=$i; $j < @names; $j ++){
		if ($j == $i){ next; }
		for ($k=0; $k < $larger; $k ++){  # getting composition tables for two seqs.
		  $compos_table1{${"string$i"}[$k]}++ if (${"string$i"}[$k] =~ /\w/);
		  $compos_table2{${"string$j"}[$k]}++ if (${"string$j"}[$k] =~ /\w/);
		}
		$iden = ${&common_compos_id_hash(\%compos_table1, \%compos_table2)};
		%compos_table1=();  %compos_table2=();
		push (@all_pairs_id,  $iden );
	 }
  }
  for $iden (@all_pairs_id){  $iden_sum+= $iden;  }
  if(@all_pairs_id == 0){ @all_pairs_id =1; }
  $final_iden=$iden_sum/@all_pairs_id;
  \$final_iden;
}

#________________________________________________________________________
# Title     : seq_id_percent_array  (more than 2 elements array)
# Usage     : $percent = &seq_id_percent_array(@any_array_sequences);
#             The way identity(pairwise) is derived is;
#
# Function  : produces amino acid composition identity of any given number of sequences.
# Example   :
# Warning   : This can handle 'common gaps' in the sequences
# Keywords  : get_percent_composition_identity, seq_composition_identity,
#             percent_sequence_composition_id
#
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub seq_id_percent_array{
	my(@input, $denominator,@all_pairs_id, $percent_id);
	my($largest,$p,$i,$j,$k,$iden_residue_num,$iden,@temp,$iden_sum,$gap_num,$final_iden);
	for($d=0; $d<@_; $d++){
		if(ref($_[$d]) eq 'ARRAY'){ @input=@{$_[$d]}; }
		elsif( (ref($_[$d]) eq 'SCALAR') &&( ${$_[$d]}=~/^[aA]/) ){ $average_len_opt =1 }
		elsif( !(ref($_[$d])) && ( $_[$d] =~/^[aA]/) ){ $average_len_opt =1 } }
	if ((@input== 1)||( @input== 0)){
		print "\n\n \" $0 \"  requires at least 2 sequences\n\n";
		print "\n Abnormally dying at seq_id_percent_array in $0 \n\n";
		print chr(7); exit;}
	$shortest=length($input[0]);
	my($sans_gap_seq, $length_sum, $average_seq_len);
	for($p=0; $p < @input; $p++){
		$input[$p]=~ tr/a-z/A-Z/;
		$sans_gap_seq=$input[$p];
		$sans_gap_seq=~s/\W//g;
		$input[$p]=~ s/\W/./g;
		(@{"string$p"})=split('', $input[$p]);
		$largest = length($input[$p]) if length($input[$p]) > $largest;
		$shortest = length($sans_gap_seq) if length($sans_gap_seq) < $shortest;
		$length_sum += length($sans_gap_seq);
	}
	$average_seq_len = $length_sum/@input;
	for($i=0; $i< @input; $i++){
		for($j=$i+1; $j< @input; $j++){
			for ($k=0; $k <  $largest; $k ++){  # getting composition tables for two seqs.
				if ((${"string$i"}[$k] !~ /\W/)&&(${"string$i"}[$k] eq ${"string$j"}[$k])){
					$iden_residue_num++; }
				elsif((${"string$i"}[$k] =~ /\W/)&&(${"string$i"}[$k]=~ /\W/)){ $gap_num++; }}
			if( $average_len_opt == 1){ $denominator = $average_seq_len; }
			else{ $denominator = $shortest; }
			if($denominator == 0){ $denominator=1; }  # in the above it is 50% rather than 0.07%
			$percent_id=($iden_residue_num/($denominator))*100;
			push(@all_pairs_id, $percent_id);
			undef ($iden_residue_num, $gap_num);
		}
	}
	for (@all_pairs_id){  $iden_sum+=$_;    }
	$final_iden=$iden_sum/($#all_pairs_id+1);
	return( \$final_iden );
}

#________________________________________________________________________
# Title     : compos_id_percent_array  (more than 2 elements array)
# Usage     : $percent = &compos_id_percent_array(@any_array_sequences);
#             The way identity(composition) is derived is;
# Function  : produces amino acid composition identity of any given number of sequences.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub compos_id_percent_array{
   my(@input)=@{$_[0]};
   my($largest,$iden,@temp,$iden_sum,$final_iden, @all_pairs_id);
	for($i=0; $i<=$#input; $i++){  $input[$i]=~ tr/a-z/A-Z/;  $input[$i]=~ s/[\.\-\s]//g;
		@temp = split('', $input[$i]);  (@{"string$i"})= @temp;
		$largest = @{"string$i"} if @{"string$i"} > $largest;    }
	for($i=0; $i< @input; $i++){ #_________ permutating ___________
		for($j=$i+1; $j<=$#input; $j++){
			for ($k=0; $k <= $largest; $k ++){  # getting composition tables for two seqs.
				$compos_table1{${"string$i"}[$k]}++ if (${"string$i"}[$k] =~ /\w/);
				$compos_table2{${"string$j"}[$k]}++ if (${"string$j"}[$k] =~ /\w/);   }
			$iden =${&calc_compos_id_hash(\%compos_table1, \%compos_table2)};
			push(@all_pairs_id, $iden);  %compos_table1=();  %compos_table2=();   }   }
	for $iden (@all_pairs_id){  $iden_sum+=$iden;  }
	$final_iden=$iden_sum/(@all_pairs_id);
	#-----------------------------------------------------
	#  Input here is like :  %hash1= (A,3,B,3,C,4,D,4), %hash2= (A,4,B,1,C,4)
	sub  calc_compos_id_hash{  # input is like this;
	  my(%hash1)=%{$_[0]};
	  my(%hash2)=%{$_[1]};
	  my(%common_of_the_2)=();
	  my($common, $compos_id, $sum_residues, $sum_of_the_common_residue_no);
	  my(@values1) = values (%hash1);
	  my(@values2) = values (%hash2);
	  my(@combined_values)=(@values1,@values2);
	  for $elem (@combined_values){  $sum_residues += $elem;   }
	  if($sum_residues == 0){ $compos_id =0; } # to prevent Illegal division error.
	  else{ for $key1(keys %hash1){
				 $common=&smaller_one($hash1{$key1}, $hash2{$key1});
					 sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
				 $sum_of_the_common_residue_no += $common;     }
			 $compos_id = $sum_of_the_common_residue_no/($sum_residues/2)*100;   }
	  \$compos_id;
	}
	#-----------------------------------------------------
	return ( \$final_iden ); # final identity for any given set of strings(seq).
}

#________________________________________________________________________________
# Title     : compos_id_percent_hash  (synonym of amino_acid_compos_id_percent)
# Usage     : $percent = &compos_id_percent_hash(%any_hash_with_sequences);
#             The way identity(composition) is derived is;
#
# Function  : gets amino acid composition identity of any given number of sequences.
# Example   :
# Warning   :
# Keywords  : get_amino_acid_composiiton
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub compos_id_percent_hash{ my(%input, @names);
  if(ref($_[0]) eq 'HASH'){  %input= %{$_[0]}; @names= keys  %input;  }
  else{ print "\n hash ref was not passed to compos_id_percent_hash\n"; exit; }
  my(@temp, $iden, @all_pairs_id, $i, $j, $k,$iden_sum);
  my(%compos_table1, %compos_table2, $final_iden, $larger);
  for ($i=0; $i < @names; $i ++){  $input{$names[$i]}=~ tr/a-z/A-Z/;
	 $input{$names[$i]}=~ s/\W//g;    @temp = split('', $input{$names[$i]});
	 (@{"string$i"})=@temp; $larger = @{"string$i"} if @{"string$i"}>$larger;}
  for ($i=0; $i < @names; $i++){
	 for ($j=$i; $j < @names; $j ++){
		if ($j == $i){ next; }
		for ($k=0; $k < $larger; $k ++){
		  $compos_table1{${"string$i"}[$k]}++ if (${"string$i"}[$k] =~ /\w/);
		  $compos_table2{${"string$j"}[$k]}++ if (${"string$j"}[$k] =~ /\w/); }
		$iden = ${&common_compos_id_hash(\%compos_table1, \%compos_table2)};
		%compos_table1=(); %compos_table2=(); push (@all_pairs_id, $iden); }}
  for $iden (@all_pairs_id){ $iden_sum+=$iden; }
  $final_iden=$iden_sum/(@all_pairs_id);
  return(\$final_iden);
}
#________________________________________________________________________
# Title     : common_compos_id_hash (BUG free)
# Usage     : %hash = &common_compos_hash(\%any_hash1, \%any_hash1);
# Function  : actual calculation of identity
# Example   : ('A', 200, 'C', 191, D, 99)
#                  ('A', 290, 'C', 199, D, 100)
#             uses only two sequences.
# Warning   :
# Keywords  :
# Options   :
# Returns   : ref. of a scaler (in percent)  eg)  95
# Argument  : two references of hash of seqeunces.
# Version   : 1.0
#--------------------------------------------------------------------
sub  common_compos_id_hash{
  my(%hash1)=%{$_[0]};
  my(%hash2)=%{$_[1]};
  my(%common_of_the_2)=();  my($common, $compos_id, $sum_of_the_common_residue_no);
  my(@values1) = values (%hash1);  my(@values2) = values (%hash2);
  my(@combined_values)=(@values1, @values2);
  my($sum_residues) = ${&sum_array(\@combined_values)};
  for $key1(keys %hash1){
	 $common=&smaller_one($hash1{$key1}, $hash2{$key1});
	 sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
	 $sum_of_the_common_residue_no += $common;
  }
  if( $sum_residues == 0){ $sum_residues =1 }
  $compos_id = $sum_of_the_common_residue_no/($sum_residues/2)*100;
  \$compos_id;
}


#________________________________________________________________________
# Title     : calc_compos_id_hash (the same as 'common_compos_hash')
# Usage     : %hash = &calc_compos_hash(\%any_hash1, \%any_hash1);
# Function  : actual calculation of identity
# Example   : ('A', 200, 'C', 191, D, 99)
#                  ('A', 290, 'C', 199, D, 100)
#             uses only two sequences.
# Warning   :
# Keywords  :
# Options   :
# Returns   : ref. of a scaler (in percent)  eg)  95
# Argument  : two references of hash of seqeunces.
# Version   : 1.0
#--------------------------------------------------------------------
sub  calc_compos_id_hash{ my(%hash1)=%{$_[0]}; my(%hash2)=%{$_[1]}; my(%common_of_the_2)=();
	  my($common, $compos_id, $sum_residues, $sum_of_the_common_residue_no);
	  my(@values1) = values (%hash1);   my(@values2) = values (%hash2);
	  my(@combined_values)=(@values1,@values2);
	  for $elem (@combined_values){
			$sum_residues += $elem;   }
	  if($sum_residues == 0){ $compos_id =0; } # to prevent Illegal division error.
	  else{
		  for $key1(keys %hash1){
			  $common=&smaller_one($hash1{$key1}, $hash2{$key1});
				  sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
			  $sum_of_the_common_residue_no += $common;     }
		  $compos_id = $sum_of_the_common_residue_no/($sum_residues/2)*100;   }
	  \$compos_id;
}
#________________________________________________________________________
# Title     : get_percentage
# Usage     : %out= %{&get_percentage(\%result, '1')};
# Function  : calculates the percentage content of any single char over the whole
#             length of strings in it.
# Example   : if the string is  'seq  ABCDEEEEEFFEFE' given in a hash
#             if you put 'A' as one argument, it counts the occurances of 'A'
#             and gets the percentage of it.
# Warning   : This converts array and string input as ref. into arbitrary hash and
#             returns hash
#             programmed by A Biomatic
# Keywords  : get_percentage_of_char
# Options   : None yet.
# Returns   : Numerical Percentage
# Argument  : ref. for Scalar string or Array of chars or Hash  AND 'the target char'
# Version   : 1.0
#--------------------------------------------------------------------
sub get_percentage{
  my(@in, $k, $sort, $numerator, $residue, @out_hash_ref, %hash_out );
  for($k=0; $k< @_ ;$k++){
	  if( !ref($_[$k])&& (length($_[$k]) == 1 )){
		 $numerator = $_[$k];
	  }
	  elsif( (ref($_[$k]) eq 'SCALAR') && (length(${$_[$k]}) == 1 )){
		 $numerator = ${$_[$k]};
	  }
	  elsif(ref($_[$k]) eq "HASH")  { push(@in, $_[$k]); }
	  elsif(ref($_[$k]) eq "ARRAY") { push(@in, &convert_arr_and_str_2_hash($_[$k], $k));} #<-- conv array to hash.
	  elsif(ref($_[$k]) eq "SCALAR"){ push(@in, &convert_arr_and_str_2_hash($_[$k], $k));} #<-- conv array to hash.
  }
  ####### final output is => @in of hash ref elements #######
  for (@in){   my(%H) = %{$_}; my(@keys)= sort keys %H;
	 for $name(@keys){
		 my($numerator_count);
		 my($seq_len) = length($H{$name}); print  "\n $name Sequence length: ", $seq_len, "\n";
		 my(@string) = split(//, $H{$name});
		 for $residue (@string){  if($residue =~/^$numerator$/){ $numerator_count ++; }}
		 my($percent) = $numerator_count/$seq_len *100;
		 $hash_out{$name}=$percent;    }
	 push(@out_hash_ref, \%hash_out);  }
  if(@out_hash_ref ==1){ return($out_hash_ref[0]); }
  elsif( @out_hash_ref > 1){ return(@out_hash_ref); }
}


#________________________________________________________________________
# Title     : pairwise_percent_id  (pairwise sequence identity in percentage)
# Usage     : $identity = ${&pairwise_percent_id(%arrayinput)};
#
# Function  : takes a ref. of a hash of names and sequences, returns
#             percent identity.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pairwise_percent_id{
	my($i,$j,$k, @iden_array_ref);
	for($i=0; $i< @_; $i++){  my %input= %{$_[$i]};  my @names= sort keys %input;
	  my(@temp, $iden, @all_pairs_id, $whole_seq_len, $residue_sum1,$residue_sum2);
	  my($final_av_iden, $larger, $percent_for_pair,@percent_for_pair, $iden_sum);
	  for ($i=0; $i < @names; $i ++){ $input{$names[$i]}=~ tr/a-z/A-Z/;
		 @temp = split('', $input{$names[$i]});  (@{"string$i"})=@temp;
		 $larger = @{"string$i"} if @{"string$i"} > $larger; }
			 for ($i=0; $i < @names; $i++){       # to make permutated pairs.
				for ($j=$i+1; $j < @names; $j ++){
					for ($k=0; $k < $larger; $k ++){  # getting composition tables for two seqs.
					  $iden+=2 if ((${"string$i"}[$k] eq ${"string$j"}[$k])&&(${"string$i"}[$k] =~ /\w/));
					  $residue_sum1++ if (${"string$i"}[$k] =~ /\w/);
					  $residue_sum2++ if (${"string$j"}[$k] =~ /\w/);  }
					$whole_seq_len =($residue_sum1+$residue_sum2);
					$percent_for_pair = $iden/$whole_seq_len*100;
					push(@percent_for_pair,$percent_for_pair);
					$residue_sum1=0; $residue_sum2=0; $iden=0; } }
			 for $iden (@percent_for_pair){ $iden_sum+=$iden;}
	  $final_av_iden=$iden_sum/( @percent_for_pair );
	  push(@iden_array_ref, \$final_av_iden);  }
	if(@iden_array_ref ==1){ return($iden_array_ref[0]);}else{ return(@iden_array_ref);}
}


#________________________________________________________________________
# Title     : get_seq_identity
# Usage     : $identity = ${&get_seq_identity(%arrayinput)};
#
# Function  : takes a ref. of a hash of names and sequences, returns
#             percent identity. NOT composition identity.

# Example   :
# Warning   :
# Keywords  : get_sequence_identity
# Options   :
# Returns   :
# Argument  : hash(es) of sequences.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_seq_identity{
	my($i,$j,$k, $c, @iden_array_ref);
	for($c=0; $c< @_; $c++){
	  my %input= %{$_[$c]};
	  my @names= sort keys %input;
	  my(@temp, $iden, @all_pairs_id, $whole_seq_len, $residue_sum1,$residue_sum2);
	  my($final_av_iden, $larger, $percent_for_pair,@percent_for_pair, $iden_sum);
	  for ($i=0; $i < @names; $i ++){
		 $input{$names[$i]}=~ tr/a-z/A-Z/;
		 @temp = split(//, $input{$names[$i]});
		 @{"string$i"}=@temp;
		 $larger = @{"string$i"} if @{"string$i"} > $larger; }
		 for ($i=0; $i < @names; $i++){       # to make permutated pairs.
			for ($j=$i+1; $j < @names; $j ++){
				for ($k=0; $k < $larger; $k ++){
				  if ( ${"string$i"}[$k] eq ${"string$j"}[$k] and ${"string$i"}[$k] =~ /\w/){
					 $iden+=2 ;
				  }
				  $residue_sum1++ if (${"string$i"}[$k] =~ /\w/);
				  $residue_sum2++ if (${"string$j"}[$k] =~ /\w/);
				}
				$whole_seq_len =($residue_sum1+$residue_sum2);
				$percent_for_pair = $iden/$whole_seq_len*100;
				push(@percent_for_pair,$percent_for_pair);
				$residue_sum1=0;
				$residue_sum2=0;
				$iden=0;
			}
		 }
	  for $iden (@percent_for_pair){
		  $iden_sum+=$iden;
	  }
	  if(@percent_for_pair <1){ @percent_for_pair=(1); }
	  $final_av_iden=$iden_sum/( @percent_for_pair );
	  push(@iden_array_ref, \$final_av_iden);
	}
	if(@iden_array_ref ==1){
	   return($iden_array_ref[0]);
	}else{
	   return(@iden_array_ref);
	}
}




#________________________________________________________________________
# Title     : get_correct_percent_alignment_rate  (made for Bissan)
# Usage     : &get_correct_percent_alignment_rate(\$file1, \$file2);
# Function  : accepts two files and prints out the sequence identities of the alignment.
# Example   :
# Warning   : Alpha version,  A Biomatic , made for Bissan
# Keywords  :
# Options   : h  # for help
#             v  # for verbose printouts(prints actual sequences)
# Returns   : reference of Scalar for percentage correct alignment(for already
#             aligned sequences)
# Argument  : two sequence files which have identical sequence names.
# Version   :
#--------------------------------------------------------------------
sub get_correct_percent_alignment_rate{
	 my($i, $j, $k, $verbose, @string1, @string2, $larger, $seq_pair_id, @seq_pair_ids );
	 my(%inputhash1) = %{&read_any_seq_files($_[0])};
	 my(%inputhash2) = %{&read_any_seq_files($_[1])};
	 my(@names)= sort keys %inputhash1;
	 ######################################
	 ####### Sub argument handling ########
	 ######################################
	 for($k=0; $k< @_ ;$k++){
		if( !ref($_[$k])&& (length(${$_[$k]}) < 5)){  # when inputs are not ref.
		  if($_[$k]=~ /^[\-vV]$/){ $verbose = 1; next;}
		}
		elsif((ref($_[$k]) eq "SCALAR")&&(length(${$_[$k]})<5)){  #  when inputs are  ref.
		  if(${$_[$k]}=~ /^[\-vV]$/){$verbose = 1;next;}          # should shorter than 5
		}
	 }
	 for($i =0; $i < @names; $i++){
		print "\n\n==== Processing structural $names[$i] against artificial $names[$i]\n";
		$inputhash1{$names[$i]} =~ tr/a-z/A-Z/;
		$inputhash2{$names[$i]} =~ tr/a-z/A-Z/;
		@string1=split(//, $inputhash1{$names[$i]});
		@string2=split(//, $inputhash2{$names[$i]});
		print "\n The string1 is  ",@string1,"\n" if $verbose ==1;
		print "\n The string2 is  ",@string2,"\n" if $verbose ==1;
		(@string2 > @string1) ? ($larger=@string2, $smaller=@string1) : ($larger=@string1, $smaller=@string2);
		$true_seq=$inputhash1{$names[$i]};
		$true_seq=~s/\W//g;
		$true_len=length($true_seq);
		print "\n True seq length:   $true_len  , Whole length inc gap: $larger\n";
		for($j = 0; $j < $larger; $j++){
		  $iden_sum++ if ($string1[$j] eq $string2[$j])&& !($string1[$j]=~/\W/); }
		$seq_pair_id =($iden_sum/$true_len) * 100;
		print "\nID between structural and artifical alignment is  $seq_pair_id \%" , "\n";
		push(@seq_pair_ids, $seq_pair_id);
		undef( $iden_sum, $seq_pair_id );
	 }
	 print "\n", "_"x88, "\n";
	 my($whole_average_of_the_id)=${&array_average(\@seq_pair_ids)};
	 print "The whole average is; $whole_average_of_the_id\n";
	 if(@seq_pair_ids == 1){ return( \$seq_pair_ids[0] ); }
	 elsif(@seq_pair_ids > 1){ return( \@seq_pair_ids ); }
}


#________________________________________________________________________
# Title     : amino_acid_compos_id_percent_trend
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub amino_acid_compos_id_percent_trend{
  my(%input) = %{$_[0]};
  my(@common, @string,@accumu_percent_iden)=(); my(%common_so_far, %compos_table);
  my($percent_id_so_far, $length_of_one_seq,$length_of_all_seq, $seq_no)=0;
  for $key(keys %input){
			$input{$key}=~s/[. \d-]//g;
			@string= split(//, $input{$key});
			print @string; print "\n";
			$length_of_one_seq = $#string+1;
			$length_of_all_seq +=$length_of_one_seq;
			$seq_no += 1;
			%compos_table  = &composition_table(@string);
			@check = keys (%common_so_far);
			if  ($#check < 0){ %common_so_far = %compos_table; }
			else{ %common_so_far= %{&common_compos_2_hash(\%common_so_far,\%compos_table)};}
			for $value(values %common_so_far){ $common_residue_sum +=$value; }
			$final_percent_id = $common_residue_sum/($length_of_all_seq/$seq_no)*100;
			$common_residue_sum =0;  }
  for $value(values %common_so_far){ $common_residue_sum +=$value; }
  $final_percent_id = $common_residue_sum/($length_of_all_seq/$seq_no)*100;
  return(\$final_percent_id);
}

#________________________________________________________________________
# Title     : composition_table   (can handle both nucleic and protein seq)
# Usage     : %output = %{&compos_table(@input_array1, @input_array2,,,,)};
#             example input
#
# Function  : returns a table of alphabet with occurances.
#             can handle any char, this converts char to upper case.
# Example   :
# Warning   : converts all SMALL letters to Capital letters before counting!!
# Keywords  :
# Options   :
# Returns   : %hash1 = ('A',3, 'C',2, 'D',1, 'Q',2, 'S',1), %hash2,,,
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub composition_table{
  my($i, @input,%input,$input,$j,@ref_out);
  for($i=0;$i<@_; $i++){
	 my(%alphabet)=();
	 if( ref($_[$i]) eq 'ARRAY'){ @input=@{$_[$i]}; undef(%alphabet);
		 for ($j=0; $j<=$#input; $j++){ $input[$j] =~ tr/a-y/A-Y/;
			 $alphabet{$input[$j]}+=1 if ($input[$j] =~/[A-Y]/) }
		 push(@ref_out, \%alphabet); }
	 elsif( ref($_[$i]) eq 'HASH'){ %input=%{$_[$i]};@input=keys %input;undef(%alphabet);
		 for ($j=0; $j< @input; $j++){ $input[$j] =~ tr/a-y/A-Y/;
			 $alphabet{$input[$j]}+=1 if ($input[$j] =~/[A-Y]/) }
		 push(@ref_out, \%alphabet); }
	 elsif( ref($_[$i]) eq 'SCALAR'){ $input=${$_[$i]}; $input=s/\,//g if $input=~/\,/;
		 @input=split('', $input); undef(%alphabet);
		 for ($j=0; $j<@input; $j++){ $input[$j] =~ tr/a-y/A-Y/;
			 $alphabet{$input[$j]}+=1; }
		 push(@ref_out, \%alphabet); }    }
  if(@ref_out ==1){
	 return($ref_out[0]);
  }else{ return(@ref_out); }
}

#________________________________________________________________________
# Title     : common_compos_2_hash
# Usage     : %hash = &common_compos_hash(\%any_hash1, \%any_hash1);
# Function  :
# Example   : common gaps means only '.' (dots, not alphabets!!)
#             AAA....BBCB
#             AAAB..B.BCC  --> A.A.....BC. (as in an array)
#             A.AAA...BCA
# Warning   :
# Keywords  :
# Options   :
# Returns   : a hash (string1, number1, string2, number2, string3, number3, ...)
# Argument  : two references of hash of seqeunces.
# Version   :
#--------------------------------------------------------------------
sub  common_compos_2_hash{ my(%hash1)=%{$_[0]}; my(%hash2)=%{$_[1]};
  my(%common_of_the_2)=(); my($common)=0;
  for $key1(keys %hash1){
	 $common=&smaller_one($hash1{$key1}, $hash2{$key1});
	 if ($common =~ /\d+/){
		$common_of_the_2{$key1}=$common; } }
  \%common_of_the_2;
}


#________________________________________________________________________
# Title     : pair_percent_id_trend
# Usage     : @array = &pair_percent_id_trend (%arrayinput);
# Function  :
# Example   : common gaps means only '.' (dots, not alphabets!!)
#             AAA....BBCB
#             AAAB..B.BCC  --> A.A.....BC. (as in an array)
#             A.AAA...BCA
#             The resulting array XXXXX..XXXX is literally like so.
#             This is to detect absurd gaps in the above.
#
#
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub pair_percent_id_trend{
  my(%input) = %{$_[0]};
  my(@common, @string,@accumu_percent_iden)=();
  my($percent_id_so_far)=0;
  for $key(keys %input){
	  my($len) = &smaller_one($#common, $#string) unless $#common < 0;
	  $input{$key}=~s/ //g;
	  @string= split(//, $input{$key});
	  $length_of_one_seq = $#string+1;
	  $length_of_all_seq +=$length_of_one_seq;
	  $seq_no += 1;
	  for ($k=0; $k <= $len;$k++){
		  if($#common == -1){
			 @common = @string;
			 last;
		  }
		  elsif (($string[$k] =~ /^(\W)/)&&($1 ne $previous_non_char)){
			 $non_char_count +=1;
			 $previous_non_char=$1;
		  }
		  elsif ( $string[$k] eq $common[$k] ){
			 $common[$k] = $string[$k];
			 $identical_count +=1;
		  }elsif( $string[$k] ne $common[$k]){
			 $common[$k]='.';
		  }
	  }
	  $num_of_iden_char = &count_num_of_char(\@common);
	  $av_seq_no = $length_of_all_seq/$seq_no;
	  $percent_id_so_far = $num_of_iden_char/$av_seq_no*100;

	  print "\n percent_id so far = $percent_id_so_far \n";
	  push(@accumu_percent_iden,$percent_id_so_far);

  } # end of for (after all sequences have been run).
  $num_of_iden_char = &count_num_of_char(@common);
  $av_seq_no = $length_of_all_seq/$seq_no;
  $percent_id_so_far = $num_of_iden_char/$av_seq_no*100;
  print "\n percent_id so far = $percent_id_so_far \n";
  \@accumu_percent_iden; # final ids array.
}
#________________________________________________________________________
# Title     : smaller_one
# Usage     : $smaller = & smaller_one($var, $var2);
#
# Function  : gets smaller value of the two inputs
# Example   : will return   5   with  &smaller_one(5, 50);
# Warning   : gets only digits!!
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub smaller_one{
  if (($_[0], $_[1]=~/\d+/)||($_[0] > $_[1])){
	 return $_[1];
  }elsif(($_[0], $_[1]=~/\d+/)||($_[0] <= $_[1])){
	 return $_[0];
  }else{
	 print "\n I am sub 'smaller_one', the input were not digits \n";
  }
}

#________________________________________________________________________
# Title     : count_num_of_char
# Usage     : $num_char = &count_num_of_char(@input_array_of_single_char);
# Function  : takes only ARRAY and counts the number of char. Each elem should be
#             a single char.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub count_num_of_char{
	 my(@input)={$_[0]};
	 my($num_of_char)=0;
	 for $elem(@input){  # this is for the percentage of TWO seqs.
		 if ($elem =~ /\w/){
					 $num_of_char +=1;
		 }
	 }
	 $num_of_char;
}
#________________________________________________________________________
# Title     : remov_com_column2  (this is the older and slower version)
# Usage     : %new_string = %{&remov_com_column2(\%input_hash)};
# Function  :
# Example   : seq1  ABCDE------DDD         seq1  ABCDE--DDD
#             seq2  ABCDEE-----DD-  ==>    seq2  ABCDEE-DD-
#             seq3  ---DEE----DDE-         seq3  ---DEEDDE-
#                         ^^^^
#             from above the 4 columns of gap will be removed
#             To remove absurd gaps in multiple sequence alignment
# Warning   :
# Keywords  :
# Options   :
# Returns   : a ref. of a hash.
#
#               <input hash>                   <out hash>
#
# Argument  : accepts reference for a hash.
# Version   : 1.0
#--------------------------------------------------------------------
sub remov_com_column2{
  my(%input) = %{$_[0]};
  my(@common)=();
  my($len)=0;
  my(@string)=();
  my(@new_string)=();
  my(@string2)=();
  my(%new_string);

  ########## Finds common gaps ###########
  for $key(keys %input){
	  $len  = &smaller_one($#common, $#string) unless $#common < 0;
		  sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
	  @string = split(/|| /, $input{$key});
	  for ($k=0; $k <= $len;$k++){
		  if($#common == -1){
			  @common = (@string);
			  last;
		  }
		  if (($string[$k] eq '.')&&($common[$k] eq '.')){
			  $common[$k]='.';
		  }else{
			  $common[$k]='X';
		  }
	  }
  }

  ########## removes gaps ###########
  for $key2 (keys %input){
		@string2 = split(//, $input{$key2});
		for ($i=0; $i <= $#string2; $i++){
		  if ($common[$i] eq $string2[$i]){
			  print;
		  }else{
			  push(@new_string, $string2[$i]);
		  }
		}
		$new_string{$key2}= join("", @new_string);
		@new_string = ();
  }
  \%new_string;
}
#________________________________________________________________________
# Title     : get_common_column   (similar to overlay_seq_for_identical_chars )
# Usage     : %out =%{&get_common_column(\%hash1, \%hash2, '-')};
# Function  : (name1         --EHH--HHEE-- )
#             (name2         --HHH--EEEE-- ) ==> result is;
#
#             (name1_name2   -- HH--  EE-- )
#             to get the identical chars in hash strings of sequences.
#
# Example   : %out =%{&get_common_column(\%hash1, \%hash2, '-')};
#             output> with 'E' option >>> "name1     --HHH--1232-"
#   Following input will give;
#   %hash1 = ('s1', '--EHH-CHHEE----EHH--HHEE----EHH--HHEE----EHH-CHHEE--');
#   %hash2 = ('s2', '--EEH-CHHEE----EEH-CHHEE----EEH-CHHEE----EEH-CHHEE--');
#   %hash3 = ('s3', '-KEEH-CHHEE-XX-EEH-CHHEE----EEH-CHHEE----EEH-CHHEE--');
#   %hash4 = ('s4', '-TESH-CHEEE-XX-EEH-CHHEE----EEH-CHHEE----EEH-CHHEE--');
#
#     s1_s2_s3_s4    --E-H-CH-EE----E-H--HHEE----E-H--HHEE----E-H-CHHEE--
#
# Warning   : This gets more than 2 hashes. Not more than that!
#
#             get common column in sequence, superpose_secondary_structure,
#             get_common_secondary_structure,
#             for secondary structure only representation.
# Keywords  : Overlap, superpose hash, overlay identical chars, superpose_seq_hash
#             get_common_column, get_com_column, get_common_sequence,
#             get_common_seq_region, multiply_seq_hash,
# Options   :
# Returns   : one hash ref. of the combined key name (i.e., name1_name2). Combined by '_'
# Argument  : 2 or more ref for hash of identical keys and value length.
#             One optional arg for replacing space char to the given one.
# Version   : 1.5
#--------------------------------------------------------------------
sub get_common_column{
  my($i, $k,$j, $name1, $name2, @in, %out, @out_chars, $gap_chr, @str1, @str2);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  Sub argument handling     $gap_chr here can be 'HE' etc.
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^(.)$/) ){
		  $gap_chr  .= $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(.)$/) ){
		  $gap_chr  .= $1;    }
	  elsif(ref($_[$k]) eq "HASH") { push(@in,  $_[$k]); }    }

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #"  Checking if the input hashes were right
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( (@in < 2) && ( ref($in[0]) eq 'HASH') ){
	  print "\n", __LINE__, " # get_common_column usually needs 2 hashes. Error \n";
	  print "\n", __LINE__, " # get_common_column : Anyway, I will just return the single input hash:  @in. \n";
	  %out=%{$in[0]}; # <--- This is essential to return the single input hash!!
	  goto FINAL;
  }

  %out = %{$in[0]};  ## Initializing %out
  print "\n",__LINE__, " # get_common_column hashes given are: @in \n" if $debug eq 1;

  for( $k=1; $k < @in; $k++){
		my(@out_chars);   ## <-- Necessary to prevent concatenation.
		my(%hash1)=%out;
		my(%hash2)=%{$in[$k]};
		my(@names1)= sort keys %hash1;
		my(@names2)= sort keys %hash2;
		$name1 = $names1[0];
		$name2 = $names2[0];
		@str1=split(/||\,/, $hash1{$names1[0]});
		@str2=split(/||\,/, $hash2{$names2[0]});
		for($i=0; $i < @str1; $i++){
			if($str1[$i] eq $str2[$i] ){
				push(@out_chars, $str1[$i]); }
			elsif( defined($gap_chr) ){ push(@out_chars, $gap_chr); }
			else{ push(@out_chars, ' '); }
		}
		if( $name2 < $name1){      ## To make an ordered name output eg.  seq1_seq2, than  seq2_seq1
			%out='';
			$out{"$name2\_$name1"}= join("", @out_chars); }
		else{
			%out='';
			$out{"$name1\_$name2"}= join("", @out_chars); }
  }
  FINAL:
  if ($debug eq 1){
	  print "\n",__LINE__, " # get_common_column Final res. \%out :\n",
	  &show_hash(%out);
  }
  \%out;
}
#________________________________________________________________________
# Title     : overlay_seq_for_identical_chars
# Usage     : %out =%{&overlay_seq_for_identical_chars(\%hash1, \%hash2, '-')};
# Function  : (name1         --EHH--HHEE-- )
#             (name2         --HHH--EEEE-- ) ==> result is;
#
#             (name1_name2   -- HH--  EE-- )
#             to get the identical chars in hash strings of sequences.
#
# Example   : %out =%{&overlay_seq_for_identical_chars(\%hash1, \%hash2, '-')};
#             output> with 'E' option >>> "name1     --HHH--1232-"
# Warning   : Works only for 2 sequence hashes.
# Keywords  : Overlap, superpose hash, overlay identical chars, superpose_seq_hash
# Options   :
# Returns   : one hash ref. of the combined key name (i.e., name1_name2). Combined by '_'
# Argument  : 2 ref for hash of identical keys and value length. One optional arg for
#             replacing space char to the given one.
# Version   : 1.0
#--------------------------------------------------------------------
sub overlay_seq_for_identical_chars{
  my($i, $k,$j, $name1, $name2, @in, %out, @out_chars, $gap_chr, @str1, @str2);
  ######################################
  ####### Sub argument handling ########  $gap_chr here can be 'HE' etc.
  ######################################
  for($k=0; $k< @_ ;$k++){
	  if( ( !ref($_[$k]) )&&($_[$k]=~ /^(.)$/) ){
		  $gap_chr  .= $1;    }
	  elsif((ref($_[$k]) eq "SCALAR")&&(${$_[$k]}=~ /^(.)$/) ){
		  $gap_chr  .= $1;    }
	  elsif(ref($_[$k]) eq "HASH") { push(@in,  $_[$k]); }    }

  if(@in < 2){ print "\n overlay_seq_for_identical_chars needs 2 hashes. Error \n"; exit; }
  my(%hash1)=%{$in[0]}; my(%hash2)=%{$in[1]};
  my(@names1)=sort keys %hash1; my(@names2)= sort keys %hash2;
  $name1 = $names1[0]; $name2 = $names2[0];
  @str1=split(/|\,/, $hash1{$names1[0]}); @str2=split(/|\,/, $hash2{$names2[0]});
  for($i=0; $i < @str1; $i++){
	  if($str1[$i] eq $str2[$i] ){
		  push(@out_chars, $str1[$i]); }
	  elsif( defined($gap_chr) ){ push(@out_chars, $gap_chr); }
	  else{ push(@out_chars, ' '); }
  }
  if( $name2 gt $name1){
	  $out{"$name1\_$name2"}= join(",", @out_chars); }
  else{  $out{"$name2\_$name1"}= join(",", @out_chars); }
  \%out;
}

#________________________________________________________________________
# Title     : remov_com_column
# Usage     : %new_string = %{&remov_com_column(\%hashinput)};
#             @out=@{&remov_com_column(\@array3)};
# Function  : removes common gap column in seq.
# Example   :
# Warning   :
# Keywords  : remove_com_column, remove_common_column,
#             remove_common_gap_column, remov_common_gap_column,
#             remove com column
# Options   :
# Returns   : a ref. of  hash(es) and array(s).
#
#             name1   ABCDE....DDD       name1  ABCDE..DDD
#             name2   ABCDEE..DD..  -->  name2  ABCDEEDD..
#             name3   ...DEE..DDE.       name3  ...DEEDDE.
#
#             (ABC....CD, ABCD...EE) --> (ABC.CD, ABCDEE)
#             from above the two column of dot will be removed
#             To remove absurd gaps in multiple sequence alignment. for nt6-hmm.pl
# Argument  : accepts reference for hash(es) and array(s).
# Version   :
#--------------------------------------------------------------------
sub remov_com_column{
  my(@hash_ref_out, $d, $gap_char);
  for($d=0; $d < @_; $d++){
	  if(ref($_[$d]) eq 'HASH'){
	      my($len,@string,@new_string,@string2);
		  my(%input)=%{$_[$d]};
		  my(@common);
		  for (keys %input){
			  @string = split('', $input{$_});
			  if(!(defined(@common))){ @common = (@string);  }
			  else{ for ($k=0; $k < @string; $k++){
				 if (($string[$k] =~ /\W/ )&&($common[$k] =~ /(\W)/)){ $common[$k]= $1;}
				 elsif(($string[$k] =~ /(\W)/)&&(!(defined($common[$k])))){ $common[$k]=$1;}
				 else{ $common[$k]='X';} } } }
		  for (keys %input){ @string2 = split(//, $input{$_});
			  for ($i=0; $i < @string2; $i++){
				 if ($common[$i] ne $string2[$i]){ push(@new_string, $string2[$i]); } }
			  $new_string{$_}= join('', @new_string); @new_string = ();      }
		  push(@hash_ref_out, \%new_string);
	  }
	  elsif(ref($_[$d]) eq 'ARRAY'){
	      my( $k, $y, $x,@string_array, @string);
		  my(@input)=@{$_[$d]};  @common=();
		  for($y=0; $y< @input; $y++){
			  @string = split('', $input[$y]);
			  if(!(defined(@common))){  @common = @string;  }
			  else{
				 for ($k=0; $k < @string; $k++){
					 if (($string[$k]  =~ /(\W)/)&&($common[$k]  =~ /(\W)/)){ $common[$k]=$1;}
					 elsif(($string[$k] =~ /(\W)/)&&(!(defined($common[$k])))){ $common[$k]=$1;}
					 else{ $common[$k]='X';}
				 }
			  }
		  }
		  for($x=0; $x < @input; $x++){
		      my($new_string, @string2);
			  @string2 = split(//, $input[$x]);
			  for ($i=0; $i < @string2; $i++){
				  if ($common[$i] ne $string2[$i]){ $new_string.= "$string2[$i]"; }
			  }
			  push(@string_array, $new_string);
		  }
		  push(@hash_ref_out, \@string_array);
	  }
  }
  if(@hash_ref_out ==1) { return( $hash_ref_out[0] ); }
  elsif(@hash_ref_out>1){ return( @hash_ref_out ) }
}

#________________________________________________________________________
# Title     : remov_common_gap
# Usage     : %result = &remov_common_gap (*common_pos_arr, *target_hash_of_sequence);
# Function  : XXX...XXX, and an hash input. removes all the common gap(dots) in targets.
# Example   : XXX...XXX with AAA.....BBBB, The common positions of 3,4,5 deleted
#             XXX...XXX will be removed in AAA.....BBBB --> AAA..BBBB
#             XXX...XXX is an @array, while AAA.....BBBB is a value of the input hash
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub remov_common_gap{
	my(@common)=@{$_[0]}; my(%input)=%{$_[1]};
	for $key2 (keys %input){
		my(@string2)  = split(//, $input{$key2});
		for ($i=0; $i <= $#string2; $i++){
		  if ($common[$i] eq $string2[$i]){shift;}else{ push(@new_string, $string2[$i]);}}
		$new_string{$key2}= join("", @new_string);
		@new_string = (); }
	\%new_string;
}


#________________________________________________________________________
# Title     : com_gap_pos_hash
# Usage     : @array = @{&com_elem_pos_hash(%arrayinput)};
# Function  : returns X...XXXX, as an array. '.' means common elements.
# Example   : common gaps means only '.' (dots, not alphabets!!)
#             AAA....BBBB
#             AABB....BBC  --> XXXXX..XXXX (as in an array)
#             ..AAA...BCA
#             This is to detect absurd gaps in the above.
# Warning   :
# Keywords  : common_gap_pos_hash
# Options   :
# Returns   :
# Argument  : gets a ref. of a hash of sequences
# Version   : 1.0
#--------------------------------------------------------------------
sub com_gap_pos_hash{
  my(%input) = %{$_[0]};
  for $key(keys %input){
	my($len) = &smaller_one($#common, $#string) unless $#common < 0;
	  sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
	@string= split(//, $input{$key});
	for ($k=0; $k <= $len;$k++){
	  if($#common == -1){
	     my(@common) = @string;
		 last;
	  }
	  if (($string[$k] ne '.')||($common[$k] ne '.')){
	     $common[$k]='X';
	  }
	}
  }
  \@common;
}
#________________________________________________________________________
# Title     : pairwise_iden_pos
# Usage     : @array = &pairwise_iden_pos(%arrayinput);
# Function  :
# Example   : common gaps means only '.' (dots, not alphabets!!)
#             AAA....BBCB
#             AAAB..B.BCC  --> A.A.....BC. (as in an array)
#             A.AAA...BCA
#             The resulting array XXXXX..XXXX is literally like so.
#             This is to detect absurd gaps in the above.
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pairwise_iden_pos{
  my(%input) = %{$_[0]};
  my(@common, @string)=();
  for $key(keys %input){
	  my($len) = &smaller_one($#common, $#string) unless $#common < 0;
	  sub smaller_one{if ($_[0] > $_[1]){ $_[1];}else{$_[0];}}
	  @string= split(//, $input{$key});
	  for ($k=0; $k <= $len;$k++){
					  if($#common == -1){
								 @common = @string;
								 last;
					  }
					  elsif (($string[$k] =~ /^(\W)/)&&($1 ne $previous_non_char)){
								 $non_char_count +=1;
								 $previous_non_char=$1;
					  }
					  elsif ( $string[$k] eq $common[$k] ){
						$common[$k] = $string[$k];
					  }elsif( $string[$k] ne $common[$k]){
								 $common[$k]='.';
					  }
	  }
	}
	\@common;	# returns an array.
}
#________________________________________________________________________
# Title     : open_pdb_files  (read the sequences only)
# Usage     : %out = %{&open_pdb_files(\$VAR)};
# Function  : Convert a PDB structure file to FASTA format sequences.
# Example   :
# Warning   : (read the sequences only)
# Keywords  : read_pdb_files{, read pdb files, open pdb files
# Options   :
# Returns   : One ref. for a hash of sequences(DNA, RNA, PROTEIN (IN diff chains)
#             If the two chains are identical, it rids of one of them and returns
#             a name with out chain note-->  2tma, not 2tmaA and 2tmaB
# Argument  : one ref. for an inputfile (absolute
#             >>> PDB example >>>
#             SEQRES   1 A  284  MET ASP ALA ILE LYS LYS LYS MET GLN MET LEU LYS LEU  2TMA  51
#             SEQRES   2 A  284  ASP LYS GLU ASN ALA LEU ASP ARG ALA GLU GLN ALA GLU  2TMA  52
#
# Version   : 1.6
#--------------------------------------------------------------------
sub open_pdb_files{
  my($input)=${$_[0]} if(ref($_[0]) eq 'SCALAR');
  my($input)=$_[0];
  if($input=~/\.fa/){ print "\n You have put fasta file to PDB prog\n" }
  my $input=${&find_seq_files(\$input)};
  my($i, $j, $keys, $temp,$outseq, $SEQRES, $res_numb, $structure, %outhash, @fields,
	  %AA, @residues);
	 $AA{"ALA"} = "A";  $AA{"MET"} = "M";  $AA{"ASP"} = "D";  $AA{"PRO"} = "P";
	 $AA{"CYS"} = "C";  $AA{"ASN"} = "N";  $AA{"GLU"} = "E";  $AA{"GLN"} = "Q";
	 $AA{"PHE"} = "F";  $AA{"ARG"} = "R";  $AA{"GLY"} = "G";  $AA{"SER"} = "S";
	 $AA{"HIS"} = "H";  $AA{"THR"} = "T";  $AA{"ILE"} = "I";  $AA{"VAL"} = "V";
	 $AA{"LYS"} = "K";  $AA{"TRP"} = "W";  $AA{"LEU"} = "L";  $AA{"TYR"} = "Y";
  open(INPUT_PDB_FILE, "$input");
  while (<INPUT_PDB_FILE>){
	 my($chain, $seq_size, $residues, $pdb_name);
	 if(/^SEQRES +\d+ +(.*) +(\d+) +(.{51,53}) +(\w\w\w\w)[\w]* +\d+/){
		$SEQRES = 1;
		$chain =$1;
		$seq_size = $2;
		$residues = $3;
		$pdb_name=$4;
		@residues=split(' ', $residues);
		if($residues[0]=~/^[A-U]$/){
			for(@residues){$outhash{"\L$pdb_name\U$chain\E"}.=$_;}
		}else{
			for(@residues){$outhash{"\L$pdb_name\U$chain\E"}.=$AA{$_};}  ## use %AA matrix
		}
	 }elsif( ($SEQRES != 1)&&(/^ATOM +\d+ +\w+ +(\w\w\w) (.) +(\d+).+ +(\w\w\w\w) *\d+/) ){
		if($res_numb == $3){ next;
		}else{
		  $chain = $2;
		  $res_numb = $3;
		  $pdb_name=$4;
		  $outhash{"\L$pdb_name\U$chain\E"}.= $AA{"$1"};
		}
	 }
  }
  @keys=keys %outhash;
  if ($chain=~/[A-Z]/){
	 for ($i=0; $i < @keys; $i++){
		for ($j=$i+1; $j < @keys; $j++){
		  if ($outhash{$keys[$i]} eq $outhash{$keys[$j]}){
			 delete($outhash{$keys[$j]});
			 $temp=$keys[$i];
			 chop($temp);
			 $outhash{$temp}=$outhash{$keys[$i]};
			 delete($outhash{$keys[$i]});
		  }
		}
	 }
  }
  return( \%outhash );
}

#________________________________________________________________________
# Title     : open_brk_files (read the sequences only)(the same as open_pdb_files
# Usage     : %out = %{&open_brk_files(\$VAR)};
# Function  : Convert a PDB structure file to FASTA format sequences.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : One ref. for a hash of sequences(DNA, RNA, PROTEIN (IN diff chains)
#             If the two chains are identical, it rids of one of them and returns
#             a name with out chain note-->  2tma, not 2tmaA and 2tmaB
# Argument  : one ref. for an inputfile (absolute
#             >>> PDB example >>>
#
#             SEQRES   1 A  284  MET ASP ALA ILE LYS LYS LYS MET GLN MET LEU LYS LEU  2TMA  51
#             SEQRES   2 A  284  ASP LYS GLU ASN ALA LEU ASP ARG ALA GLU GLN ALA GLU  2TMA  52
#             SEQRES   3 A  284  ALA ASP LYS LYS ALA ALA GLU ASP ARG SER LYS GLN LEU  2TMA  53
#
# Version   :
#--------------------------------------------------------------------
sub open_brk_files{    my($input)=${$_[0]};  $input=${&find_seq_files(\$input)};
  my($outseq, $structure, %outhash, @fields, %AA, $chain, $residues);
	 $AA{"ALA"} = "A";  $AA{"MET"} = "M";  $AA{"ASP"} = "D";  $AA{"PRO"} = "P";
	 $AA{"CYS"} = "C";  $AA{"ASN"} = "N";  $AA{"GLU"} = "E";  $AA{"GLN"} = "Q";
	 $AA{"PHE"} = "F";  $AA{"ARG"} = "R";  $AA{"GLY"} = "G";  $AA{"SER"} = "S";
	 $AA{"HIS"} = "H";  $AA{"THR"} = "T";  $AA{"ILE"} = "I";  $AA{"VAL"} = "V";
	 $AA{"LYS"} = "K";  $AA{"TRP"} = "W";  $AA{"LEU"} = "L";  $AA{"TYR"} = "Y";
  open(INPUT_PDB_FILE, "$input");
  while (<INPUT_PDB_FILE>){
	 if(/^HELIX/ || /^ATOM/ || /^FTNOTE/ || /^HET/){ last; };
	 if(/^SEQRES +\d+ +(.) +(\d+) +(.+)\s+(\w+)\s+\d+.+$/){
		$chain =$1; $seq_size = $2; $residues = $3; $pdb_name=$4;
		@residues=split(' ', $residues);
		if($residues[0]=~/[A-U]/){ # <-- Check if it is DNA/RNA seq.
		  for(@residues){  $outhash{"\L$pdb_name\U$chain\E"}.=$_;  }
		}else{
		  for (@residues){  $outhash{"\L$pdb_name\U$chain\E"}.=$AA{$_}; } } }
  }
  @keys=keys %outhash;
  if ($chain=~/[A-Z]/){
	 for ($i=0; $i < @keys; $i++){
		for ($j=$i+1; $j < @keys; $j++){
		  if ($outhash{$keys[$i]} eq $outhash{$keys[$j]}){
			 delete($outhash{$keys[$j]});  $temp=$keys[$i];
			 chop($temp);  $outhash{$temp}=$outhash{$keys[$i]};
			 delete($outhash{$keys[$i]});}}}}  return( \%outhash  );
}

#________________________________________________________________________
# Title     : open_msf_jp_files
# Usage     : &open_msf_jp_files($file1, $file2);
# Function  : makes two hashes from  ...msf and ..jp files. %array1 is for msf
# Example   :
# Warning   : !!! not very general bettter not use.
#             msf file is meant to be seq
#             jp file is meant to be structural alignment (correct seq
#
#             msf format is
#
#             cofi_human  ATFVKM
#             ici2_horvu  RVRLFVDKLD NIA
#             ici3_horvu  RVRLFVDRLD NIA
#
#             jp format is;
#
#             ycah_ecoli  RNVEIV----VID-GVRRFGNIA
#             icis_vicfa  RVRLYVDESNKVV-RAAPIGNIA
#             ier1_lyces  RVRLFVNLLDIVV-QTPKVGNIA
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub open_msf_jp_files{
	my($file1, $file2) = @{$_[0]};  my(@return);
	open(F1,"$file1");
	while(<F1>){
	  if(/^([\w_]+)[\t]* +(.+)$/){	# matching patterns
		 if(!$array1{$1}){      		# if hash %array1 is not defined,
			$names1[++$nf1]=$1;
		 }
		 $array1{$1}.=$2;   }   }
	open(F2,"$file2");  			# reading in (JP) file
	while(<F2>){
		if(/^([\w]+)[\t]* +([\w-]+[ ]+)$/){
			substr($_,11)=~ s/ /-/g;      }
		if(/^([\w]+)  +([\w-]+)$/){
			if(!$array2{$1}){
				$names2[++$nf2]=$1;
			}
			$array2{$1}.=$2; }  }
	@return =(\%array1, \%array2);
	\@return;
}



#________________________________________________________________________
# Title     : scoring
# Usage     :
# Function  :
# Example   :
# Warning   : not general, !!!
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub scoring{
  $hash_subt{$subt}++; 	# this is for individual seq.
																		  # if $subt is 1, $hash_subt{1} will increase
																		  # its value(not key) by one, etc.
  $hash_subt_all{$subt}++;  # for the wholesome of seqs.
}

#______________________________________________________________
# Title     : sort_files_by_time
# Usage     : @files = @{&sort_files_by_time(\@files)};
# Function  : sorts files by creation time. Oldest the first
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : sort_by_time, sort_files_chronically
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub sort_files_by_time{  # by "Shawn Wagner" <shawnw@wpi.edu>
   my @files;
   if(ref($_[0]) eq 'ARRAY'){
	  @files=@{$_[0]};
   }else{
	  @files=@_;
   }
   @files= reverse sort { -M $a <=> -M $b} @files;
   return(\@files);
}


#______________________________________________________________________
# Title     : sort_hash_by_value_and_make_array
# Usage     : @values_sorted =@{&sort_hash_by_value_and_make_array(\%assoc)};
# Function  : sorts any hash by its values and returns ref. of sorted hash values
#             with keys attached. So, if the input key value were
#             key1 value1, the result will be an element 'value1 key1' as
#             a string
# Example   :
# Warning   : The same values will be overwritten.
# Keywords  : sort_hash_by_value, sort_hash, sort_by_values,
# Options   : -n  for numerical sort(not working yet)
# Returns   :
# Argument  :
# Version   : 1.1
#------------------------------------------------------------------
sub sort_hash_by_value_and_make_array{
  my ($i, $j, %hash2, @array, $num_sort, @array_sorted);
  my %hash = %{$_[0]};
  my @keys = keys %hash;
  sub numerically{  $a <=> $b;  }
  for($i=0; $i<@ARGV; $i++){ # if numerically option is set.
	  if($ARGV[$i]=~/^\-n$/){
		  $num_sort =1;
		  splice(@ARGV, 0, 1);
	  }
  }
  for($i=0; $i < @keys; $i ++){
	  ${"long_string$i"} = "$hash{$keys[$i]} $keys[$i]";
	  #push(@array, ${"long_string$i"});
  }
  for($i=0; $i < @keys; $i++){
	  for($j=1; $j < @_; $j ++){
		  %hash2 = %{$_[$j]};
		  ${"long_string$i"} .= "  $hash2{$keys[$i]}";
	  }
	  push(@array, ${"long_string$i"});
  }
  if($num_sort==1){
	  @array_sorted = sort numerically @array;
  }else{ @array_sorted = sort  @array; }
  return(\@array_sorted);
}

#________________________________________________________________________
# Title     :  sort_by_hash_values
# Usage     : @values_sorted =@{sort_by_by_values(\%assoc)};
# Function  : sorts any hash by its values and returns ref. of sorted hash values
# Example   :
# Warning   : The same values will be overwritten.
# Keywords  : sort_hash_by_value, sort_hash, sort_by_values, sort_by_value
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub sort_by_hash_values{
  my(%hash) = %{$_[0]};
  @values = sort values %hash;
  \@values
}

#________________________________________________________________________
# Title     :  sort_by_keys
# Usage     : @values_sorted =@{sort_by_by_values(\%assoc)};
# Function  : sorts any hash by its values and returns ref. of sorted hash values
# Example   :
# Warning   :
# Keywords  : sort_hash_by_keys, sort_hash, key_sort
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sort_by_keys{
  my(%hash) = %{$_[0]};
  @values = sort keys %hash;
  \@values
}

#________________________________________________________________________
# Title     :  sort_hash_by_keys
# Usage     : @values_sorted =@{sort_by_values(\%assoc)};
# Function  : sorts any hash by its values and returns ref. of sorted hash values
# Example   :
# Warning   :
# Keywords  : sort_hash_by_keys, sort_hash, key_sort
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sort_hash_by_keys{
  my(%hash) = %{$_[0]};
  @values = sort keys %hash;
  \@values
}


#________________________________________________________________________
# Title     : sort_hash_by_value
# Usage     : @values_sorted =@{sort_hash_by_values(\%assoc)};
# Function  : sorts any hash by its values and returns ref. of sorted hash values
# Example   :
# Keywords  : sort_hash_by_value, sort_hash, value_sort,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sort_hash_by_value{
  my(%hash) = %{$_[0]};
  @values = sort values %hash;
  return(\@values);
}


#________________________________________________________________________
# Title     : by_values
# Usage     : for $key(sort by_values(values %assoc)){print $assoc{$key},"\n";}
# Function  :
# Example   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub by_values{
  $hash{$a} cmp  $hash{$b}
}

#______________________________________________________________________
# Title     : sort_string_by_length (synonym of sort_str_by_length  )
# Usage     : @output = @{&sort_string_by_length(@any_input_strings, [-r], @more)};
# Function  : sorts strings in array according to their sizes
#             bigger comes first.
# Example   :
# Warning   :
# Keywords  : sort_array_by_length, sort_str_by_length, sort_array_string_by
#             sort_string_by_leng, sort_by_length, sort_by_leng,
#             sort_array_by_string_length, sort_array_elements_by_string_length
# Options   : -r  reverse the order
# Version   : 1.2
#-------------------------------------------------------------------
sub sort_string_by_length{
	my(@input, $i, $small_first, @output);
	for($i=0; $i<@_; $i++){
		if( $_[$i]=~/^\-?r$/i){
			$small_first =1;
			splice(@_, $i, 1);
		}elsif(ref($_[$i]) eq 'ARRAY'){
		    push(@input, @{$_[$i]});
		}elsif(ref($_[$i]) eq 'SCALAR'){
			if(${$_[$i]}=~/^\-?r$/i){
			   $small_first=1;
			   splice(@_, $i, 1);
			}else{
			   push(@input, ${$_[$i]});
			}
		}elsif( !ref($_[$i]) ){
		    push(@input, $_[$i]);
		}
	}
	if($small_first ==1){
	    @output = sort {length($a) <=> length($a) || ($b cmp $a)} @input;
	}else{
	    @output = sort {length($b) <=> length($a) || ($a cmp $b)} @input;
	}
	return (\@output);
}


#________________________________________________________________________
# Title     : get_host_by_addr
# Usage     : ($name,$aliases,$addrtype,$length,@addrs)=&get_host_by_addr('131.111.137.11'); or
# Function  :
# Example   : ($name,$aliases,$addrtype,$length,@addrs)=&get_host_by_addr($var); while $var = "13.13.12.12";
# Warning   :
# Keywords  : get_host_by_address, get_hostname_by_address
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_host_by_addr{
   my $addr, @output;
   if(ref($_[0]) eq 'SCALAR'){
	   $addr= ${$_[0]};
   }elsif(!ref($_[0])){
	  $addr=$_[0];
   }
	my(@addr) = split(/\./, $addr);
	my($address) = pack("C4",@addr);
   my($name,$alias,$addrtype,$leng,@addrs)= gethostbyaddr($address,2);
   push(@output, $name,$alias,$addrtype,$leng,@addrs);
   return(\@output);
}

#________________________________________________________________________
# Title     : get_host_by_name
# Usage     : ($name,$aliases,$addrtype,$length,@addrs)=&get_host_by_name('ind4'); or
# Function  :
# Example   : ($name,$aliases,$addrtype,$length,@addrs)=&get_host_by_name($var);
#             while $var = "ind4";
# Warning   : ! not working yet.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_host_by_name{
	my($name)=$_[0];
	my($name,$alias,$addrtype,$leng,@addrs)= gethostbyname($name);
}

#________________________________________________________________________
# Title     : word_wrap
# Usage     : &word_wrap($line_to_format)
# Function  :
# Example   :
# Warning   :
#             The following subroutine does word wrapping on a text string
# Keywords  :
# Options   :
# Returns   :
#             The string with newlines replacing spaces in appropriate places.
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub word_wrap {
	 local ($_) = @_;            # Access the argument.
	 s/(.{60}\S+)\s+/$1\n/g;     # Wrap on first space after 60th char.
										  # (This isn't the best algorithm, but it's
										  # simple.)
	 $_ && "$_\n";               # Append a newline if there's any text.
}

#________________________________________________________________________
# Title     : show_array
# Usage     : &show_array(\@input_array);
# Function  : for debugging purpose. Shows any array elem line by line.
# Example   : Output:      item1
#             Output:      item2
#             Output:      item3
# Warning   : can handle scalar ref, too.
# Keywords  :
# Options   : -h  for horizontal display of elements
#             c   for compact (do not put new line between array chunk)
#             s   for putting new line between arrays
# Returns   :
# Argument  :
# Version   : 2.4
#--------------------------------------------------------------------
sub show_array{
  my($k, $i, $t,  @in2, $in, $space, $show_horizontally, $compact);
  my(@in)=@_;

  ## This is to get the option of 'horizontal' to make horizontal output.
  for($t=0; $t < @in ; $t++){
	 if($in[$t] =~/\-?[hH][orizontal]*$/){   ### No ref.
		 $show_horizontally = "h";
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/-?[hH][orizontal]*$/){  ### ref.
		 $show_horizontally = "h";
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/^s$/i){  ### ref.
		 $space = "s";
		 $compact='';
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/^c$/i){  ### ref.
		 $compact = "c";
		 $space='';
		 splice(@in, $t, 1);  $t--;
	 }
  }

  for($k=0; $k < @in; $k++){
	 if(ref($in[$k]) eq 'ARRAY'){
		 &show_array(@{$in[$k]}, "$show_horizontally", "$compact", "$space" );
	 }elsif(ref($in[$k]) eq 'SCALAR'){
		 if($show_horizontally eq "h"){
			 print ${$in[$k]}, ",  ";
		 }elsif(  $show_horizontally ne "h"){
			 print ${$in[$k]}, "\n";
		 }
	 }elsif( !ref($in[$k]) ){
		 if($show_horizontally eq 'h'){
			 print  $in[$k] , ",  ";
		 }elsif(  $show_horizontally ne "h"){
			 print  $in[$k] , "\n";
		 }
	 }
  }
  if($compact !~/^c$/i){
	print "\n"; #### This is necessary to distinguish different arrays.
  }
}

#________________________________________________________________________
# Title     : array_most_occur
# Usage     : $median = ${&array_most_occur(\@array)};
# Function  :
# Example   :
# Warning   :
# Keywords  :  median_array, get_median_array, get_array_median, array_median
# Options   :
# Returns   : \$median
# Argument  : \@array
# Version   : 1.0
#--------------------------------------------------------------------
sub array_most_occur{
  my(%hash, @keys, $i, $most, $most_key);
  my(@array)=@{$_[0]};
  for($i=0; $i<@array; $i++){
		$hash{$array[$i]}++;
  }
  @keys = keys %hash;
  #&show_array(@keys);
  $most = $hash{$keys[0]};
  for($i=1; $i<@keys; $i++){
		if($hash{$keys[$i]} > $most){
	  $most_key=$keys[$i];
			 $most    =$hash{$keys[$i]};
		}
  }
  \$most_key;
}

#________________________________________________________________________
# Title     : array_least_occur
# Usage     : $median = ${&array_least_occur(\@array)};
# Function  :
# Example   :
# Warning   :
# Keywords  :  median_array, get_median_array, get_array_median, array_median
# Options   :
# Returns   : \$median
# Argument  : \@array
# Version   : 1.0
#--------------------------------------------------------------------
sub array_least_occur{
  my(%hash, @keys, $i, $least, $least_key);
  my(@array)=@{$_[0]};
  for($i=0; $i<@array; $i++){
		$hash{$array[$i]}++;
  }
  @keys = keys %hash;
  #&show_array(@keys);
  $least = $hash{$keys[0]};
  for($i=1; $i<@keys; $i++){
		if($hash{$keys[$i]} < $least){
	  $least_key=$keys[$i];
			 $least    =$hash{$keys[$i]};
		}
  }
  \$least_key;
}


#________________________________________________________________________
# Title     : show_hash
# Usage     : &show_hash(\@input_array);
# Function  : for debugging purpose. Shows any array elem line by line.
#             the line is 60 elements long (uses recursion)
# Example   : Output:      item1
#             Output:      item2
#             Output:      item3
# Warning   : There is a global variable:  $show_hash_option
#             It tries to detect any given sting which is joined by ','
# Keywords  :
# Options   : -s or -S or s or S for spaced output. Eg)
#             seq1       1 1 1 1 1 1 1 1 1 1 1 1
#
#             instead of
#             seq1       111111111111
#
#             -h or -H or h or H for horizontal line of '---------...'
#
# Returns   :
# Argument  :
# Version   : 1.7
#--------------------------------------------------------------------
sub show_hash{
  my($k, $i, $t, @in2, $in, $LEN, %TEM ); ## You should not put $show_hash_option
  my(@in)=@_;                     ## and $horizontal_line  in my !!!
  my($KL)=2; # default keys string length;
  my($VL)=80; # default values string length;
  my($GAP)=2;  # default space between keys and values
  my($horizontal_line, $show_hash_optionXX, $Hash_counter, @line);

  ## This is to get the option of 'space' to make spaced output.
  for($t=0; $t < @in; $t++){
	 if($in[$t] =~/^[-]+[sS][pace]*$/){
		 $show_hash_optionXX = 1;
		 splice(@in, $t, 1);
	 }elsif(${in[$t]} =~/^[-]+[sS][pace]*$/){
		 $show_hash_optionXX = 1;
		 splice(@in, $t, 1);
	 }elsif($in[$t] =~/^[-]+[hH][rR]*$/){
		 $horizontal_line = 1;
		 splice(@in, $t, 1);
	 }elsif(${in[$t]} =~/^[-]+[hH][rR]*$/){
		 $horizontal_line = 1;
		 splice(@in, $t, 1);
	 }
  }

  ######## Main loop #################
  if($horizontal_line ==1){  ## This puts the delimiter '--------------(  )'
	  $Hash_counter ++;
	  print "\n","-"x78,"(${Hash_counter}th hash)", "\n";
  }

  for($k=0; $k < @in; $k++){
	 if(ref($in[$k]) eq 'ARRAY'){  ### When the hashes were given in array ref.
		 &show_hash(@{$in[$k]}, $show_hash_optionXX, $horizontal_line);
		 print "\n";
	 }
	 elsif(ref($in[$k]) eq 'HASH'){  ### recursion
		 &show_hash(%{$in[$k]});
		 print "\n";
	 }
	 elsif(ref($in[$k+1]) eq 'HASH'){  ### recursion
		 &show_hash(%{$in[$k+1]}); print "\n";
	 }
	 elsif(ref($in[$k]) eq 'SCALAR'){  print ${$_[$k]}, "\n";  }
	 elsif( !ref($in[$k]) ){
		 if( !ref($in[$k+1]) && defined($in[$k+1])  ){
			 if($show_hash_optionXX == 1){  #### space option checking.
				#if($in[$k+1] =~ /\,.+\,/){  #### if the string is joined with ','
				#	 @line = split(/\,/, $_[$k+1]);
				# }else{
				#	 @line = split(//, $_[$k+1]);
				# }
				%TEM = @in;
				$LEN = ${&max_elem_string_array_show_hash(keys %TEM)};
				 if($LEN > $KL){ $KL = $LEN + $GAP +2};
				 printf ("%-${KL}s ", $in[$k]);  $k++;
				 printf ("%-${VL}s\n","@line");
			 }else{                        ### If not option is set, just write
				%TEM = @in;
				$LEN = ${&max_elem_string_array_show_hash( keys %TEM)};
				 if($LEN > $KL){ $KL = $LEN + $GAP +2};
				 printf ("%-${KL}s ", $in[$k]);  $k++; # print $in[$k], "\t";  $k++;
				 printf ("%-${VL}s\n",$in[$k]);        # print $in[$k], "\n";
			 }
		 }
		  #________________________________________________________
		  # Title    : max_elem_string_array_show_hash
		  # Keywords : largest string length of array
		  # Function : gets the largest string length of element of any array of numbers.
		  # Usage    : ($out1, $out2)=@{&max_elem_array(\@array1, \@array2)};
		  #            ($out1)       =${&max_elem_array(\@array1)          };
		  # Argument : numerical arrays
		  # returns  : one or more ref. for scalar numbers.
		  # Version  : 1.1
		  #-------------------------------------------------------
		  sub max_elem_string_array_show_hash{
			 my(@input, $i, $max_elem);
			 @input = @{$_[0]} || @_;
			 for($i=0; $i< @input ; $i++){
					$max_elem = length($input[0]);
					if (length($input[$i]) > $max_elem){
						$max_elem = length($input[$i]);
					}
			 }
			 \$max_elem;
		  }
		  #####################################insert_gaps_in_seq_hash
	 }
  }
}


#_______________________________________________________________
# Title     :  open_predator_files
# Usage     :
# Function  : gets sec. str. prediction of predator and puts in hash
#             If 's' option is given, it also gives sequence hash ref
#             as the second output ref.
# Example   :
# Warning   :
# Keywords  : open_prd_files, predator, secondary structure prediction file
# Options   : 's' for sequence output as well (\%sec_str, \%seq)
#             'p' for percentage of the sec. str.
#             'a' for accumulated percentage. This will
#                  set 'p' automatically
#             'n' for NO name when outputing Percentage of chars with
#                 HASH input to get_occurances_of_char sub.
# Returns   :
# Argument  :
# Version   : 1.3
#-----------------------------------------------------------
sub open_predator_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my( @out_ref, $seq_out, %sec_str, %seq, $percent_out, $NO_name_out,$accumulate);
  if($char_opt=~/s/i){ $seq_out=1 }
  if($char_opt=~/a/i){ $accumulate=1  }
  if($char_opt=~/p/i){ $percent_out=1 }
  if($char_opt=~/n/i){ $NO_name_out='n' }
  for($i=0; $i< @file; $i++){
	 my (%sec_str, %seq) if($accumulate !=1);

	 open(IN, "$file[$i]");
	 while(<IN>){
		if(/\> *(\S+)/){
			$name=$1;
		}elsif(/^[\t ]{1,88}(\w+)$/){
		   $sec_str{$name}.=$1;
		}elsif( ($seq_out==1) &&(/[ \t]*\d+[ \t]*(\w+)[\t ]*\d+/)){
		   $seq{$name}.=$1;
		}
	 }
	 print "\n \%sec_str is: ", %sec_str, "\n" if ($debug == 1);
	 if($seq_out==1){ push(@out_ref, \%sec_str, \%seq);
	 }elsif($percent_out==1 ){
		push(@out_ref, [%{&get_occurances_of_char(\%sec_str, $NO_name_out, 'p')}] );
	 }elsif($percent_out !=1){ push(@out_ref, \%sec_str) }
  }
  if(@out_ref==1){
	 return($out_ref[0]);
  }elsif(@out_ref>1){
	 return(@out_ref);
  }
}

#_______________________________________________________________________________
# Title     : open_phd_files
# Usage     : &open_phd_files(\$file_name, $options,,,,,);
#             :
# Function  : open phd files and put sequences in a hash(s) (run open_phd_files.pl to
#             get some ideas on how this works. type  'open_phd_files.pl xxx.phdo s',
#             it will produce 5 different hashes of secondary structure pred.
# Example   :
# Warning   : All the spaces are converted to '_'
# Keywords  :
# Options   : $secondary, $access, $PHD_sec, $Rel_sec, $prH_sec, $prE_sec, $prL_sec,
#                  $prL_sec, $SUB_sec, $P_3_acc, $PHD_acc, $Rel_acc, $SUB_acc);
# Returns   : one or more hashes(ref.) secondary structure prediction of PHD server
#             --- The PHD secondary server output which are read by open_phd_files -----
#             1 =>       PHD sec |         HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH     HHHHHHH|
#             2 =>       Rel sec |987544342178899999999987678999998478999999999995679771688999|
#             3 =>       prH sec |001222323478899999999987778999998678999999999986110115788999
#             4 =>       prE sec |000010000101000000000000010000000000000000000000000000010000
#             5 =>       prL sec |987666565410000000000001110000001211000000000002789774100000
#             6 =>       SUB sec |LLLL
#             7 =>       P_3 acc |eeeeeeeeee bbeeebbbebbbbebeeee b bbebbebb eebeebe eee eebbeb|
#             8 =>       PHD acc |988787787630066600060000606667515007007005760671847885760160
#             9 =>       Rel acc |979685546222352421667053233245604127749164753790316552446141
#             0 =>       SUB acc |eeeeeeeee
#             types of PHD output, like 1 for 'PHD sec', 2 for 'Rel sec' etc.
# Argument  : one or more file names and options. Files should be PHD server's result.
# Version   : 1.3
#-------------------------------------------------------------------------------
sub open_phd_files{
  my(@names, $i, $j,$n, $s, @in, @out_hash_ref_list, $base, @option);
  my($secondary, $access, $PHD_sec, $Rel_sec, $prH_sec, $prE_sec, $prL_sec, $prL_sec,
	  $SUB_sec, $P_3_acc, $PHD_acc, $Rel_acc, $SUB_acc);

  for($i=0; $i < @_; $i ++){
	 if  ( (ref($_[$i]) eq 'SCALAR' )&&(-f ${$_[$i]}) ){ push(@in, ${$_[$i]}) }
	 elsif( ref($_[$i]) eq 'ARRAY' ){
		 for($j=0; $j< @{$_[$i]}; $j ++){
			 if( -f ${$_[$i]}[$j]){ push (@in, ${$_[$i]}[$j]) }
		 }
	 }
	 elsif((ref($_[$i]) eq 'SCALAR' ) && ($_[$i]=~/^[\-]*\w+/) ){ push(@option, ${$_[$i]}); }
	 elsif(!ref($_[$i]) && (-e $_[$i]) ){  push(@in, $_[$i]) }
	 elsif(!ref($_[$i]) && ($_[$i]=~/^[\-]*\w+/) ){ push(@option, $_[$i]); }
  }
  ## option handling ##
  if(@option == 0){ $PHD_sec_on = 1; } # if no option is given. default is single PHD_sec output.

  elsif(@option >= 1){ $PHD_sec_on = 0;
		for($i=0; $i < @option; $i ++){
		  if( $option[$i] =~ /^[sS]$/){
			  $PHD_sec_on = 1;  $Rel_sec_on = 1; $prH_sec_on = 1;
			  $prE_sec_on = 1;  $prL_sec_on = 1; $SUB_sec_on = 1; 	}
		  elsif( $option[$i] =~ /^[aA]$/){
			  $P_3_acc_on = 1; $PHD_acc_on = 1; $Rel_acc_on = 1;  $SUB_acc_on = 1; 	}
		  elsif( $option[$i] =~ /^PHD_sec$/){ $PHD_sec_on = 1}
		  elsif( $option[$i] =~ /^Rel_sec$/){ $Rel_sec_on = 1}
		  elsif( $option[$i] =~ /^prH_sec$/){ $prH_sec_on = 1}
		  elsif( $option[$i] =~ /^prE_sec$/){ $prE_sec_on = 1}
		  elsif( $option[$i] =~ /^prL_sec$/){ $prL_sec_on = 1}
		  elsif( $option[$i] =~ /^SUB_sec$/){ $SUB_sec_on = 1}
		  elsif( $option[$i] =~ /^P_3_acc$/){ $P_3_acc_on = 1}
		  elsif( $option[$i] =~ /^PHD_acc$/){ $PHD_acc_on = 1}
		  elsif( $option[$i] =~ /^Rel_acc$/){ $Rel_acc_on = 1}
		  elsif( $option[$i] =~ /^SUB_acc$/){ $SUB_acc_on = 1}
		}
  }## option handling END ##

  for($i=0; $i < @in; $i++){
	  $base=${&get_base_names($in[$i])};
	  $name=$base; ## default name when 'protein: xxxx' line is not in there.
	  open(FILE_1, "$in[$i]");    ########  This pattern matching is most buggy !! ####
		  while(<FILE_1>){
			 if   (      /^# +(\w+) *$/ or /protein ?\: +(\S+) +length +\d+/i  ){
			     if($1 eq 'predict'){ $name     =$base }else{ $name=$1 }; ## when no name is given to phd, it puts 'predict' in xxx.phd, so to change it to a proper name.
			 }elsif( (/^[\t ]+PHD[\t ]+sec[\t ]\|([\t\w\. ]+)[\|]+ *$/)&&($PHD_sec_on==1) ){ $PHD_sec .=$1 }
			 elsif( (/^[\t ]+Rel +sec +\|([\w\. ]+)[\|]* *$/)&&($Rel_sec_on==1) ){ $Rel_sec .=$1 }
			 elsif( (/^[\t ]+prH +sec +\|([\w]+)[\|]* *$/) &&($prH_sec_on==1) ){ $prH_sec .=$1 }
			 elsif( (/^[\t ]+prE +sec +\|([\w]+)[\|]* *$/) &&($prE_sec_on==1) ){ $prE_sec .=$1 }
			 elsif( (/^[\t ]+prL +sec +\|([\w]+)[\|]* *$/) &&($prL_sec_on==1) ){ $prL_sec .=$1 }
			 elsif( (/[\t ]+SUB +sec +\|([\w\. ]+)[\|]+ *$/)&&($SUB_sec_on==1) ){ $SUB_sec .=$1 }
			 elsif( (/[\t ]P_3 +acc +\|([\w\. ]+)[\|]* *$/)&&($P_3_acc_on==1) ){ $P_3_acc .=$1 }
			 elsif( (/[\t ]PHD +acc +\|([\w\. ]+)[\|]* *$/)&&($PHD_acc_on==1) ){ $PHD_acc .=$1 }
			 elsif( (/[\t ]+Rel +acc +\|([\w\. ]+)[\|]* *$/)&&($Rel_acc_on==1) ){ $Rel_acc .=$1 }
			 elsif( (/[\t ]+SUB +acc +\|([\w\. ]+)[\|]* *$/)&&($SUB_acc_on==1) ){ $SUB_acc .=$1 }
		 }
		 if ($PHD_sec_on==1){ $PHD_sec =~s/ /_/g; $PHD_sec{"$name"._PHD_s} = $PHD_sec }
		 if ($Rel_sec_on==1){ $Rel_sec =~s/ /_/g; $Rel_sec{"$name"._Rel_s} = $Rel_sec }
		 if ($prH_sec_on==1){ $prH_sec =~s/ /_/g; $prH_sec{"$name"._prH_s} = $prH_sec }
		 if ($prE_sec_on==1){ $prE_sec =~s/ /_/g; $prE_sec{"$name"._prE_s} = $prE_sec }
		 if ($prL_sec_on==1){ $prL_sec =~s/ /_/g; $prL_sec{"$name"._prL_s} = $prL_sec }
		 if ($SUB_sec_on==1){ $SUB_sec =~s/ /_/g; $SUB_sec{"$name"._SUB_s} = $SUB_sec }
		 if ($P_3_acc_on==1){ $P_3_acc =~s/ /_/g; $P_3_acc{"$name"._P_3_a} = $P_3_acc }
		 if ($PHD_acc_on==1){ $PHD_acc =~s/ /_/g; $PHD_acc{"$name"._PHD_a} = $PHD_acc }
		 if ($Rel_acc_on==1){ $Rel_acc =~s/ /_/g; $Rel_acc{"$name"._Rel_a} = $Rel_acc }
		 if ($SUB_acc_on==1){ $SUB_acc =~s/ /_/g; $SUB_acc{"$name"._SUB_a} = $SUB_acc }
		 push(@out_hash_ref_list, \%PHD_sec) if $PHD_sec_on;
		 push(@out_hash_ref_list, \%Rel_sec) if $Rel_sec_on;
		 push(@out_hash_ref_list, \%prH_sec) if $prH_sec_on;
		 push(@out_hash_ref_list, \%prE_sec) if $prE_sec_on;
		 push(@out_hash_ref_list, \%prL_sec) if $prL_sec_on;
		 push(@out_hash_ref_list, \%SUB_sec) if $SUB_sec_on;
		 push(@out_hash_ref_list, \%P_3_acc) if $P_3_acc_on;
		 push(@out_hash_ref_list, \%PHD_acc) if $PHD_acc_on;
		 push(@out_hash_ref_list, \%Rel_acc) if $Rel_acc_on;
		 push(@out_hash_ref_list, \%SUB_acc) if $SUB_acc_on;
	 }
	 if(@out_hash_ref_list == 1){ $out_hash_ref_list[0] }
	 else{ return(@out_hash_ref_list) }
}




#________________________________________________________________________
# Title     : open_swissprot_seq_files
# Usage     :
# Function  : open swiss files and puts ONLY the sequences in a hash(s)
# Example   :
# Warning   : ONLY the seq.
# Keywords  : open_swiss_seq_files, open_swiss_seq, read_swissprot_seq_files,
#            read_swiss_seq, get_swissprot_seq, take_swissprot_seq,
# Options   : 'v' for STDOUT printout as well.
# Returns   :
# Argument  :
# Version   : 1.2
#--------------------------------------------------------------------
sub open_swissprot_seq_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  my(@names, $i, $name, $n, $seq, $s, %hash, @out_hash_ref_list);
  for($i=0; $i< @file; $i++){
	 open(FILE_1, "$file[$i]");
	 undef(%hash);
	 while(<FILE_1>){      # file1 needs to be xxxx.msf
		 if(/^ID +(\w+) */){
			 $name=$1;
			 if($char_opt =~ /v/i){ print "\>$name\n"; }
		 }elsif(/^ +([\w ]+)$/){
			 $seq.=$1;
		 }elsif(/^\/\//){
			 $seq=~s/ //g;
			 if($char_opt =~ /v/i){ print "$seq\n"; }
			 $hash{$name}=$seq;
			 push(@out_hash_ref_list, \%hash);
			 $seq='';
		 }
	 }
  }
  if(@out_hash_ref_list  == 1 ){ return(\%hash); }
  elsif(@out_hash_ref_list > 1){ return(@out_hash_ref_list); }
}


#______________________________________________________________
# Title     : open_clu_files
# Usage     : %clus=%{&open_clu_files(\$input)};
# Function  :
# Example   : Clu file eg)
#
#  Cluster 7360103
#    1  1 SLL1058         7-255       2   Origin: 3   736   Sub:3
#    1  1 MJ0422          17-283      2   Origin: 3   736   Sub:3
#    1  1 HI1308          3-245       2   Origin: 3   736   Sub:3
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
#              This automatically converts lower to upper letters
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             b  for to get just names ($simple_clu_reading)
#             r  for adding ranges in the names
# Returns   : a ref of hash of $clus{"$clus_size\-$id"}.=$m."\n";
#             Actual content:
#             3-133 => 'HI00111 HI00222 MG1233 '
# Argument  :
# Version   : 1.8
#--------------------------------------------------------------
sub open_clu_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my($simple_clu_reading, $possible_range, $add_ranges,
	  $id, $name_range, %clus, $found);
   my $file=$file[0];
   if($char_opt=~/b/){ $simple_clu_reading= 'b' };

   my $clus_size=1;
   open(CLU, "$file");
   while(<CLU>){
	  if($simple_clu_reading=~/\S/){ ## to get just names
		  if(/^ *\d+ +\d+ +\d+ +\d+ +\d+/){  ## To skip the very first summary columns
			 next;
		  }elsif(/^ *#/ ){ next;
		  }elsif(/^ *\d+ +\d+ +(\S+) +(\S+)/){
			 $seq_name=$1;
			 $possible_range=$2;
			 if($2=~/\d+\-\d+/ and $char_opt=~/r/){
				$name_range="$seq_name\_$possible_range";
				$clus{$name_range} = $name_range;
			 }else{
			    $clus{$seq_name}=$seq_name;
			 }
		  }
	  }else{
		  if(/^ *\d+ +\d+ +\d+ +\d+ +\d+/){  ## To skip the very first summary columns
			 next;
		  }elsif(/^ *#/ ){
			 next;
		  }elsif(/^ *Cluster +size +(\d+)/i ){
			 $clus_size=$1;
			 $found=1;
		  }elsif(/^ *Cluster +([_\d]+) *size:? *(\d+)/i){  # to match 'Cluster 14313'  or  'Cluster 234_1234_1'
			 $id  =$1;
			 $found=1;
			 $clus_size=$2; # if defined($2);
		  }elsif(/^ *Cluster +([\w]+)/i){  # to match 'Cluster 14313'  or  'Cluster 234_1234_1'
			 $id  = $1;
			 $found=1;
		  }elsif(($found==1)&&(/^ *\S* *\S* *(\S+)\.prot\,? *.*/)){ ## this is to correct MP genome names
			 $m=$1;
			 $clus{"$clus_size\-$id"}.="\U$m ";
		  }elsif(($found==1)&&(/^ *(\d+) *\d* *(\S{2,32}) *(\S*)/)){          # general clu match
			 $clus_size=$1 unless ($clus_size);
			 $m=$2;
			 $possible_range=$3;
			 if($2=~/\d+\-\d+/ and $char_opt=~/r/){
				$name_range="$m\_$possible_range";
				$clus{"$clus_size\-$id"}.="\U$name_range ";
			 }else{
				$clus{"$clus_size\-$id"}.="\U$m ";
			 }
		  }
	  }
   }
   return(\%clus);
}



#________________________________________________________________________
# Title     : open_msf_files
# Usage     : (*out, *out2) = @{&open_msf_files(\$inputfile1, \$inputfile2)};
#             : %hash_seq = %{&open_msf_files(\$inputfile1)};
#             : (@out)        = @{&open_msf_files(\$inputfile1, \$inputfile2)};
#             ---------- Example of MSF ---
#             PileUp
#
#             MSF:   85  Type: P    Check:  5063   ..
#
# Function  : open msf files and put sequences in a hash(s)
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : (*out, *out2)  or (@out_array_of_refs)
# Argument  : (\$inputfile1, \$inputfile2, .... )};
# Version   : 1.1
#--------------------------------------------------------------------
sub open_msf_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  my(@names,  %hash, @out_hash_ref_list);
  for($i=0; $i< @file; $i++){
	 open(FILE_1, "$file[$i]");
	 undef(%hash);
	 while(<FILE_1>){      # file1 needs to be xxxx.msf
		 if((/^([\S]+)\t* +$/)||(/^\#/)||(/^\-+/)){ next; }
		 if(/^([\S]+)\t* +([\.\w ]+)[\n]$/){
			 $n=$1;
			 $s=$2;
			 $s=~s/ //g;
			 $hash{$n}.= $s;
		 }
	 }
	 push(@out_hash_ref_list, \%hash);
  }
  if(@out_hash_ref_list  == 1 ){ return(\%hash); }
  elsif(@out_hash_ref_list > 1){ return(@out_hash_ref_list); }
}

#__________________________________________________________________________
# Title     : open_hmmls_files
# Usage     : %out=%{&open_hmmls_files(\@file)};
# Function  : hmmls matches the full length model to target seq. while, hmmfs
#             does for fragments as well.
# Example   :
# Keywords  :
# Options   :
#   "t=$thresh"  for bits score threshold
#    r for adding ranges
#    m for making MSP file format output
#    E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
# Returns   :
# Argument  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub open_hmmls_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	$thresh = 15; # default threshold
	if($vars{'t'}=~/\S/){  $thresh       = $vars{'t'} }
	if($vars{'E'}=~/\S/){  $enquiry_name = $vars{'E'}
	}else{ $enquiry_name='HMM' }

	if(@file<1){
	   print "\n# open_hmmls_files needs one or more files !!\n";
	}else{
 	   print "\n# open_hmmls_files: threshold used = $thresh \n";
	}

	for($i=0; $i< @file; $i++){
	   $input_file=$file[$i];
	   open(HMMLS, "$input_file");
	   while(<HMMLS>){
		  if(/^ {0,2}(\S+) +\S+ +\S+ +(\d+) +\S+ +(\d+) +\S+ +(\S+)/){
			  if($1 > $thresh){
				  $score=$1;
				  $target=$4;
				  $from=$2;
				  $to=$3;
				  if($char_opt=~/r/){
					  if($char_opt=~/m/){ ## convert to msp file format
						  $hash{"$target\_$from\-$to"}=
							 "$score $score  $from $to $enquiry_name   $from $to  $target\n";
					  }else{
						  $hash{"$target\_$from\-$to"}=$score;
					  }
				  }else{
					  if($char_opt=~/m/){ ## convert to msp file format
						  $hash{"$target"}=
						     "$score $score  $from $to $enquiry_name   $from $to  $target\n";
					  }else{
					      $hash{"$target"}=$score;
					  }
				  }
			  }
		  }
	   }
	}
	return(\%hash);
}



#__________________________________________________________________________
# Title     : open_hmmfs_files
# Usage     : %out=%{&open_hmmfs_files(\@file, "t=$thresh", $attch_ranges)};
# Function  :
# Example   :
# Keywords  :
# Options   :
#   "t=$thresh"  for bits score threshold
# Returns   :
# Argument  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub open_hmmfs_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	$thresh = 15; # default threshold
	if($vars{'t'}=~/\S/){  $thresh = $vars{'t'} }
	print "\n# open_hmmfs_files: threshold used = $thresh \n";

	for($i=0; $i< @file; $i++){
	   $input_file=$file[$i];
	   open(HMMLS, "$input_file");
	   while(<HMMLS>){
		  if(/^ {0,2}(\S+) +(\d+) +(\d+) +\d+ +\d+ +(\S+)/){
			  if($1 > $thresh){
				  $score=$1;
				  $target=$4;
				  $from=$2;
				  $to=$3;
				  if($char_opt=~/r/){
					  $hash{"$target\_$from\-$to"}=$score;
				  }else{
					  $hash{"$target"}=$score;
				  }
			  }
		  }
	   }
	}
	return(\%hash);
}



#________________________________________________________________________
# Title     : open_seq_files
# Usage     : %seq=%{&open_seq_files($tim_seq_file, ['MJ0084'], [15] )};
#             if you put additional seq name as MJ0084 it will
#             fetch that sequence only in the database file.
#             Any digit will be used as minimum seq size to be fetched.
# Function  : open seq files and put sequences in a hash
#             seq sequence file format is like this;
#
# 1l94   162 MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRTFRTGTWDAYK
# 1lye   162 MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRTFRTGTWDAYK
# 1lyj   162 MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRTFRTGTWDAYK
# 1mngA  203 PYPFKLPDLGYPYEALEPHIDAKTMEIHHQKHHGAYVTNLNAALEKYPYLHGVLNWDVAEEFFKKA
#
#             This can also return the sizes of sequences rather than seqs.
# Example   : %out = %{&open_seq_files(@ARGV)};
#                    while @ARGV at prompt was: 'pdb_40.seq'
#             %seq=%{&open_seq_files(@ARGV, '1cgpa_140-197')};
#                    to fetch 1cgbA but in range of 140-197 only
# Warning   :
# Keywords  : open_pdbs_files
# Options   : any digit for the minimum seq length
# Returns   :
# Argument  :
# Version   : 1.6
#--------------------------------------------------------------------
sub open_seq_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

   my (%sequence, @lines, $S_start, $S_end, @seq_Names, %Sizes, $name_stem,
	   $seq_start, $seq_leng, $min_size);
   $min_size=$num_opt[0];
   if(@file<1){ print "\n There is no fileinput for open_seq_files\n"; exit}

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  If the file is not found, it goes to @string
   #_________________________________________________
   for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];
	  unless (-e $input_file){
		  push(@string, $input_file);
		  splice(@file, $i, 1); $i--;
		  next;
	  }
   }


   for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];
	  if($debug eq 1){ print "\n open_seq_files: Inputfile is $input_file\n" };

	  open(FILE_1,"$input_file");
	  @lines=<FILE_1>;
	  close FILE_1;

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  #  when hash is directly given for selected sequence names
	  #_____________________________________________________________
	  if(@hash >=1){
		 for($h=0; $h< @hash; $h++){
			@string=(@string, keys %{$hash[$h]});
		 }
	  }
	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  #  When there are asked seqs to fetch
	  #_______________________________________________________
	  if( (@_ > 1)&&(@string > 0) ){
		  for $lines (@lines){
			for($j=0; $j< @string; $j++){
			  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			  #  If the seq name has ranges -> 1xxx_10-20
			  #___________________________________________
			  if($string[$j]=~/^(\S+)_(\d+)\-(\d+)$/){
				 $name_stem=$1;
				 $seq_start=$2-1;
				 $seq_leng=$3-$2+1;
				 if($lines=~/^\>?($name_stem) +(\d+) +(\w+)$/i){
					$seq_in_range=substr($3, $seq_start, $seq_leng);
					$sequence{$string[$j]}=$seq_in_range;
					splice(@string, $j, 1); ## for optimization
					$j--;
				 }
			  }else{
				 if($lines=~/^\>?($string[$j]) +(\d+) +(\w+)/i){
					if($min_size < $2){
					   splice(@string, $j, 1);
					   $j--; ## for optimization
					   $sequence{$1} = $3;
					}
				 }
			  }
			}
		  }
		  return(\%sequence);

	  }else{ # getting all seq in the given file(s)
		  for(@lines){                # file1 needs to be xxxx.fasta for the moment, automatic later
			if(/^\>?(\S+) +(\d+) +(\w+)$/){
			  if($min_size < $2){
				 $sequence{$1} = $3;
			  }
			}else{ next }
		  }
	  }
	  close FILE_1;
   }
   if(defined(@range)){
	  %seq_fragments=%{&get_seq_fragments(\%sequence, \@range)};
	  return(\%seq_fragments);
   }elsif($char_opt=~/s/i){ # when SIZE return only option is set
	  @seq_Names=keys %sequence;
	  for($i=0; $i < @seq_Names; $i++){
		 $Sizes{$seq_Names[$i]}=length($sequence{$seq_Names[$i]});
	  }
	  return(\%Sizes);
   }elsif(@hash >=1){
	  for($h=0; $h< @hash; $h++){
	     my %hash=%{$hash[$h]};
		 my @Keys=keys %hash;
		 for($k=0; $k<@Keys; $k++){
			if(defined($hash{$Keys[$k]})){
			   ($S_start, $S_end)=$hash{$Keys[$k]}=~/(\d+)\-(\d+)/;
			   $sequence{$Keys[$k]}=substr($sequence{$Keys[$k]}, ($S_start-1), ($S_end-$S_start));
			}
		 }
	  }
	  return(\%sequence);
   }else{
	  return(\%sequence);
   }
}


#______________________________________________________________________________________
# Title     : open_sso_files
# Usage     :  @sso=@{&open_sso_files(@file, $add_range, $add_range2, "u=$upper_expect_limit",
#			                            "l=$lower_expect_limit", "m=$margin", $new_format)};
# Function  : This reads the parseable( -m 10 option)
#              and non-parseable form of ssearch program output
#             If you give 5 files, it produces 5 hashes as a ref of array.
#             This understands xxxx.gz files.
#             This reads FASTA -m 10 output, too.
# Example   :
#  717    0         0.343  16    373    EC1260_16-373              74    434    YBL6_YEAST_74-434
#  348    9e-16     0.500  113   233    EC1260_113-233             27    146    YDBG_ECOLI_27-146
#  472    2.9e-08   0.271  13    407    EC1260_13-407              148   567    YHJ9_YEAST_148-567
#  459    1.9e-22   0.260  1     407    EC1260_1-407               65    477    YLQ6_CAEEL_65-477
#  452    4.5e-14   0.275  1     407    EC1260_1-407               103   537    YSCPUT2_103-537
#  1131   0         0.433  1     407    EC1260_1-407               112   519    ZMU43082_112-519
#
# Warning   : By default, the SW score comes to the first
#             If expect value is not found, it becomes '0'
#             By default, the offset of seq match with a seq name like seq_30-40
#               will be 30 not 1.
#             It ignores special chars like , : .prot in the name (eg, AADF_FASDF: will be AADF_FASDF)
# Keywords  : open_ssearch_output_files, ssearch_output, ssearch, FASTA,
# Options   : _  for debugging.
#             #  for debugging.
#             u= for upper E value limit
#             l= for lower E value limit
#             r  for attaching ranges to out seq names (eg> HI0001_1-20 as a key)
#             U  for making the matched seqname to upppercase
#             L  for making the matched seqname to lowercase
#             R  for attaching ranges to out seq names for both TARGET and MATCH
#             n  for new format (msp2)
#             a  for getting alignments of the pair
#
# Version   : 4.2
# Enclosed  :
#
#   >>MG032 ATP-dependent nuclease (addA) {Bacillus subtilis  (666 aa)
#    Z-score: 88.3 expect()  1.9
#   Smith-Waterman score: 77;  27.143% identity in 70 aa overlap
#
#           30        40        50        60        70        80
#   MJ0497 RSAGSKGVDLIAGRKGEVLIFECKTSSKTKFYINKEDIEKLISFSEIFGGKPYLAIKFNG
#                                        : .. ...  . .:.:::. :: : ..:
#   MG032  HDKVRYAFEVKFNIALVLSINKSNVDFDFDFILKTDNFSDIENFNEIFNRKPALQFRFYT
#        200       210       220       230       240       250
#
#           90       100             110       120       130
#   MJ0497 EMLFINPFLLSTNGK------NYVIDERIKAIAIDFYEVIGRGKQLKIDDLI
#          .   ::   :: ::.      : ....... . ::. . :
#   MG032  K---INVHKLSFNGSDSTYIANILLQDQFNLLEIDLNKSIYALDLENAKERFDKEFVQPL
#        260          270       280       290       300       310
#
# Parseable form -m 10 option =========================================
#   >>>MJ0497.fa, 133 aa vs GMG.fa library
#   ; pg_name: Smith-Waterman (PGopt)
#   ; pg_ver: 3.0 June, 1996
#   ; pg_matrix: BL50
#   ; pg_gap-pen: -12 -2
#   >>MG032 ATP-dependent nuclease (addA) {Bacillus subtilis
#   ; sw_score:  77
#   ; sw_z-score: 88.3
#   ; sw_expect    1.9
#   ; sw_ident: 0.271
#   ; sw_overlap: 70
#   >MJ0497 ..
#   ; sq_len: 133
#   ; sq_type: p
#   ; al_start: 58
#   ; al_stop: 121
#   ; al_display_start: 28
#----------------------------------------------------------------------------
sub open_sso_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@out_refs, @SSO, $create_sso, $parseable, @OUT, @temp_sso_lines,
		%match, $attach_range_in_names, $margin, $uppercase_seq_name,
		$lowercase_seq_name, $target_seq, $new_format, $get_alignment,
		$pvm_version_fasta_out, $original_target_seq);

	my ($upper_expect_limit, $lower_expect_limit)=(50,0);

	if($char_opt=~/R/){  $attach_range_in_names2=1; };
	if($char_opt=~/r2/){ $attach_range_in_names =1; $attach_range_in_names2=1 };
	if($char_opt=~/r/){  $attach_range_in_names =1; };
	if($char_opt=~/c/){  $create_sso   ='c' };
	if($char_opt=~/n/){  $new_format   ='n' };
	if($char_opt=~/a/){  $get_alignment='a' };
	if($char_opt=~/U[pperPPER]*/){ $uppercase_seq_name='U' };
	if($char_opt=~/L[owerOWER]*/){ $lowercase_seq_name='L' };
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'m'}=~/\d+/){ $margin = $vars{'m'} };

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# opening file input (can handle .gz  files)
	#_______________________________________________
	for($i=0; $i< @file; $i++){
		 if($file[$i]=~/\S+\.\gz$/ or -B $file[$i]){  ## if file has xxxx.gz extension
			 my (@sso);
			 @sso=`gunzip -c $file[$i]`;
			 if(@sso < 30){  @sso=`zcat $file[$i]`; }      # if zcat fails to produce output use gunzip -c
			 if(@sso > 3000){ # if @sso is very big, I remove the useless contents
				 print "\n# open_sso_files: size of \@sso for $file[$i] exceeds 3000 lines, ", scalar(@sso), " !!! \n";
			 }
	         push(@OUT, &read_sso_lines(\@sso, $create_sso, $attach_range_in_names, $attach_range_in_names2,
							 $new_format, $get_alignment) );
		 }else{
			 print "\n# openning text file format xxxx.sso $file[$i]";
			 open(SSO, "$file[$i]") or die "\n# open_sso_files: Failed to open $file[$i]\n";
			 my @sso=<SSO>;
			 if(@sso < 30){  @sso=`zcat $file[$i]`; }      # if zcat fails to produce output use gunzip -c
			 if(@sso > 3000){ # if @sso is very big, I remove the useless contents
				 print "\n# open_sso_files: size of \@sso is for $file[$i] exceeds 3000 lines, ", scalar(@sso), " !!! \n";
			 }
			 push(@OUT, &read_sso_lines([@sso], $create_sso, $attach_range_in_names, $attach_range_in_names2,
							 $new_format, $get_alignment) );
			 close SSO;
		 }
	}
	return(\@OUT); # @OUT has refs of hashes  (\%xxx, \%YYY, \%XXX,,,,)
}



#_____________________________________________________________________________
# Title     : open_msp_files
# Usage     : %seq=%{&open_msp_files(@file, $names_only)};
# Function  : opens Erik Sonhammer's MSPcrunch file output(default).
#             This looks up xxxxx.fa files in the pwd (with S opt) and see
#             if it can get the sequences as well.
#             With 'n' option you can just get the matched sequence
#              names with ranges.
# Example   : Example output(with 'n' opt):
#   d1bi6h1         d1bi6h1_1-24     IBR1_ANACO_20-42  IBR2_ANACO_19-42
#   e1bi6.1h1       IBR1_ANACO_38-52 e1bi6.1h1_1-18    IBR2_ANACO_38-52
#
# Keywords  : exchange_msp_file_columns,
# Options   :
#          s -s  for size return only
#          S -S  for the sequences are fetched if equivalent xxxx.fa files are in pwd
#          n -n  for matched seq NAMEs with ranges only (eg: HI0001_1-12,,), hash ref is out
#          R     for NO range attachment in Name only return option (n)
#          e=    for evalue threshhold, if e=1, ignores all which are over 1
#          t=    for score threshhold if t=100, ignores all which are less 100
#          l=    for match length threshold.
#          x     for exchange query with matched seqs. eg)      12 0.09 1 30 QUERY  1 29 MATCH
#                                                       becomes 12 0.09 1 30 MATCH  1 29 QUERY
#                This returns the same lines as input only with exchanged query and match seqs
#
# Returns   :
# Argument  :
# Version   : 2.8
#------------------------------------------------------------------------------
sub open_msp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my (%matched_seq, @MSP, %Sizes, $input_file, %new_exchanged_hash,
	   %matched_seq_names, $new_line, %simple_default_output, $simple_default_out_flag );
   my  $Evalue_thresh = 50; ### default very high evalue
   my  $Score_thresh  = 30; # 30 is very very tiny
   my  $query_length_thresh  = 5; ## absurdly low 5.

   if($vars{'t'}=~/^\d+$/){ $Score_thresh= $vars{'t'} }
   if($vars{'e'}=~/^\S+$/){ $Evalue_thresh= $vars{'e'} }
   if($vars{'l'}=~/^\S+$/){ $query_length_thresh= $vars{'l'} }

   for($i=0; $i< @file; $i++){
	  $input_file = ${$file[$i]} || $file[$i];

	  if($debug eq 1){ print "\n ${i}th inputfile is input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_msp_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  if($input_file=~/\S\.gz *$/){ # if input file is gzzipped
		  @MSP=`gunzip -c $input_file`;
	      print "\n# open_msp_files: Input file \"$input_file\" is gzipped !\n";
	  }elsif($input_file=~/\S\.bz2 *$/){ # if input file is bzzipped
		  @MSP=`bzip2 -c -d $input_file`;
	      print "\n# open_msp_files: Input file \"$input_file\" is bzipped !\n";
	  }else{
		  open(FILE,"$input_file") or die "\n# Error opening $input_file\n";
		  @MSP=<FILE>;		  close(FILE);
	  }

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	  #  When x option for exchanging query and match is set
	  #___________________________________________________________
	  if($char_opt=~/x/){
		  my($name1);
		  for($j=0; $j< @MSP; $j++){
			 if($MSP[$j]=~/^ *(\d+) +(\S+) *\S* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/){
				 $query_length=$4-$3;
				 if($1 < $Score_thresh or $2 > $Evalue_thresh
					or $query_length < $query_length_thresh){ next }
				 unless($char_opt=~/R/){
						 $name1="$5\_$3\-$4";
				 }else{  $name1="$5";    }
				 $new_line=sprintf("%-4s %-10s %-5s %-5s %-32s %-5s %-5s %-32s",
									$1, $2, $6,  $7,  $8,   $3,  $4,  $name1);
				 $new_exchanged_hash{"$5\_$8"}=$new_line;
			 }
		  }
		  return(\%new_exchanged_hash);
	  }
	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  #  When matched sequence NAMEs only are wanted
	  #________________________________________________
	  elsif($char_opt=~/n/){
		 F2: for($j=0; $j<@MSP; $j++){
			if($MSP[$j]=~/^ *(\d+) +(\S+) *\S* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/){
				$query_length=$4-$3;
				if($2 > $Evalue_thresh or $1 < $Score_thresh
				   or $query_length < $query_length_thresh){  next F2; }
				$name1=$8; 				$r_start=$6;
				$r_end  =$7;			$target =$5;
				if($name1=~/^(\S+)_\d+\-\d+/){
				   $bare=$1;
				   if($char_opt=~/R/){  ## When NO range option is set
					   $matched_seq_names{$target} .="$bare "  # returns string
				   }else{
				       $matched_seq_names{$target} .="$name1 ";   # returns string
				   }
				}else{
				   if($char_opt=~/R/){  ## When NO range option is set
					   $matched_seq_names{$target} .="$name1 ";
				   }else{
					   $matched_seq_names{$target} .="$name1\_$r_start\-$r_end ";
				   }
				}
			}
		 }
		 #$matched_seq_names{$4}=\@matched_seq_names;
	  }
	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  #  When matched ACTUAL sequences are wanted (without n opt)
	  #____________________________________________________________
	  elsif($char_opt=~/S/){
		 F3: for($j=0; $j<@MSP; $j++){
			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			#                   $1          $2     $3    $4      $5     $6    $7     $8
			#                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
			#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
			if($MSP[$j]=~/^ *(\d+) +(\S+) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/i){
				  $query_length=$4-$3;
				  if($2 > $Evalue_thresh or $1 < $Score_thresh
					 or $query_length < $query_length_thresh ){ next F3; } ##<-- filtering

				  if($5 eq $8){
					$query_name=$8;
					$query_leng=($4 - $3 + 1);
					my $matched_seq;
					for($k=0; $k<$query_leng; $k++){
					  $query[$k]=0;
					}
					$query_fasta="$query_name\.fa";
					open(QUERY_FASTA, "$query_fasta") || die "\n# open_msp_files, die: No $matched_fasta. Didn't you want \'n\' opt?\n";
					my @QUERY_FASTA=<QUERY_FASTA>;
					for(@QUERY_FASTA){
					   if(/\>$query_name/){
						  $query_found=1; next;
					   }elsif(($query_found==1)&&(/^(\w{10,})$/)){
						  $matched_seq .=$1;
					   }
					}
					$matched_seq{$query_name}=$matched_seq;
					close(QUERY_FASTA);
				 }elsif($5 ne $8){
					if($match_name ne $8){
					   $match_name=$8;
					   my $matched_seq;
					   push(@matched_members, $8);
					   $matched_fasta="$match_name\.fa";
					   open(MATCHED_FASTA, "$matched_fasta") || die "\n# open_msp_files, die: No $matched_fasta. Didn't you want \'n\' opt?\n";
					   my @MATCHED_FASTA=<MATCHED_FASTA>;
					   for(@MATCHED_FASTA){
						 if(/\>$match_name/){
						   $found=1; next;
						 }elsif(($found==1)&&(/^(\w{10,})$/)){
						   $matched_seq .=$1;
						 }
					   }
					   $matched_seq{$match_name}=$matched_seq;
					}
				  }
			 }
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~~~~~~~~~`
			 #  New MSP format
			 #_____________________________________________________________
			 elsif($MSP[$j]=~/^ *(\d+) +(\S+) +(\S+) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/i){ # new MSP format
				 if($6 eq $9){
					  $query_name=$9;
					  $query_leng=($5 - $4 + 1);
					  if($query_leng < $query_length_thresh){ next }
					  my $matched_seq;
					  for($k=0; $k<$query_leng; $k++){
						   $query[$k]=0;
					  }
					  $query_fasta="$query_name\.fa";
					  open(QUERY_FASTA, "$query_fasta") || die "\n# open_msp_files, die: No $matched_fasta. Didn't you want \'n\' opt?\n";
					  my @QUERY_FASTA=<QUERY_FASTA>;
					  for(@QUERY_FASTA){
						   if(/\>$query_name/){
							  $query_found=1; next;
						   }elsif(($query_found==1)&&(/^(\w{10,})$/)){
							  $matched_seq .=$1;
						   }
					  }
					  $matched_seq{$query_name}=$matched_seq;
					  close(QUERY_FASTA);
				}elsif($6 ne $9){
					  if($match_name ne $9){
						   $match_name=$9;
						   $query_leng=($5 - $4 + 1);
						   if($query_leng < $query_length_thresh){ next }
						   my $matched_seq;
						   push(@matched_members, $9);
						   $matched_fasta="$match_name\.fa";
						   open(MATCHED_FASTA, "$matched_fasta") || die "\n# open_msp_files, die: No $matched_fasta. Didn't you want \'n\' opt?\n";
						   my @MATCHED_FASTA=<MATCHED_FASTA>;
						   for(@MATCHED_FASTA){
							 if(/\>$match_name/){
							   $found=1; next;
							 }elsif(($found==1)&&(/^(\w{10,})$/)){
							   $matched_seq .=$1;
							 }
						   }
						   $matched_seq{$match_name}=$matched_seq;
					  }
				 }
			}
		 }
	  }else{ # default output. This filters with given E and Score thresholds
		 $simple_default_out_flag++;
		 F4: for($j=0; $j<@MSP; $j++){
			#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			#                   $1          $2     $3    $4      $5     $6    $7     $8
			#                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
			#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
			if($MSP[$j]=~/^ *(\d+) +(\S+) +\S* *(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/i){
				my $query_length = $4-$3;
				if($2 > $Evalue_thresh or $1 < $Score_thresh
				    or $query_length < $query_length_thresh ){ next F4;
				}else{
					$simple_default_output{$input_file} .= $MSP[$j];
				}
			}
		 }
	  }
   } #<<<<<-- for($i=0; $i< @file; $i++){

   if($simple_default_out_flag >=1){ %matched_seq=%simple_default_output; }
   if($simple_default_out_flag > 1){ print "\n# You have asked me to open more than one file. I merge output into one hash\n"; }

   if(defined(@range) && ($char_opt !~ /n/)){ ## m opt is for matched seq names only
	   %seq_fragments=%{&get_seq_fragments(\%matched_seq, \@range)};
	   return(\%seq_fragments);
   }elsif($char_opt=~/s/){ # when SIZE return only option is set
	   @seq_Names=keys %matched_seq;
	   for($i=0; $i<@seq_Names; $i++){
		  $Sizes{$seq_Names[$i]}=length($matched_seq{$seq_Names[$i]});
	   }
	   return(\%Sizes);
   }else{
	   if($char_opt=~/n/){
		  return(\%matched_seq_names);
	   }else{
		  return(\%matched_seq);
	   }
   }
}





#________________________________________________________________________
# Title     : open_dssp_files
# Usage     : (*out, *out2) = @{&open_dssp_files(\$inputfile1, \$inputfile2, \$H, \$S,,,,)};
#             (@out)        = @{&open_dssp_files(\$inputfile1, \$inputfile2, \$H, \$S,,,,)};
# Function  : open dssp files and put sequences in a hash(s)
#              It can take options for specific secondary structure types. For example,
#              if you put an option $H in the args of the sub with the value of 'H'
#              open_dssp_files will only read secondary structure whenever it sees 'H'
#              in xxx.dssp file ignoring any other sec. str. types.
#              If you combine the options of 'H' and 'E', you can get only Helix and long
#              beta strand sections defined as segments. This is handy to get sec. str. segments
#              from any dssp files to compare with pdb files etc.
#             With 'simplify' option, you can convert only all the 'T', 'G' and 'I' sec. to
#              'H' and 'E'.
# Example   :
# Warning   : 6taa.dssp  and 6taa are regarded as the same.
# Keywords  :
# Options   : H, S, E, T, I, G, B, P, C, -help
# $H        =        'H' by   -H or -h or H or h  # to retrieve 4-helix (alpha helical)
# $S        becomes  'S' by   -S or -s or S or s  # to retrieve Extended strand, participates in B-ladder
# $E        becomes  'E' by   -E or -e or E or e  # to retrieve residue in isolated Beta-bridge
# $T        becomes  'T' by   -T or -t or T or t  # to retrieve H-bonded turn
# $I        becomes  'I' by   -I or -i or I or i  # to retrieve 5-helix (Pi helical) segment output
# $G        becomes  'G' by   -G or -g or G or g  # to retrieve 3-helix (3-10 helical)
# $B        becomes  'B' by   -B or -b or B or b  # to retrieve only B segment
# $simplify becomes   1  by   -p or P or -P, p
# $comm_col becomes  'c' by   -c or c or C or -C or common
# $HELP     becomes   1  by   -help   # for showing help
#
# Returns   : (*out, *out2)  or (@out_array_of_refs)
# Argument  : files names like (6taa, 6taa.dssp) If you put just '6taa' without extension, it
#             searches if there is a '6taa.dssp' in both PWD and $DSSP env. set directory.
#             ---------- Example of dssp ---
#             **** SECONDARY STRUCTURE DEFINITION BY THE PROGRAM DSSP, VERSION JUL
#             REFERENCE W
#             HEADER    RIBOSOME-INACTIVATING PROTEIN           01-JUL-94   1MRG
#             COMPND    ALPHA-MOMORCHARIN COMPLEXED WITH ADENINE
#             SOURCE    BITTER GOURD (CUCURBITACEAE MOMORDICA CHARANTIA) SEEDS
#             AUTHOR    Q
#             246  1  0  0  0 TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN)                .
#             112 95.0   ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2)                                                                         .
#             171 69.5   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J)  , SAME NUMBER PER 100 RESIDUES                              .
#             12   4.9   TOTAL NUMBER OF HYDROGEN BONDS IN     PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES                              .
#             36  14.6   TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES                              .
#             1    0.4   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-5), SAME NUMBER PER 100 RESIDUES                              .
#             1    0.4   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-4), SAME NUMBER PER 100 RESIDUES                              .
#             74  30.1   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+4), SAME NUMBER PER 100 RESIDUES                              .
#             5    2.0   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+5), SAME NUMBER PER 100 RESIDUES                              .
#             1    2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30     *** HISTOGRAMS OF ***           .
#             0    0  0  0  1  1  0  2  0  0  1  0  0  1  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0    RESIDUES PER ALPHA HELIX         .
#             1    0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0    PARALLEL BRIDGES PER LADDER      .
#             2    0  1  2  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0    ANTIPARALLEL BRIDGES PER LADDER  .
#             2    0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0    LADDERS PER SHEET                .
#             #   RESIDUE AA STRUCTURE BP1 BP2  ACC   N-H-->O  O-->H-N  N-H-->O  O-->H-N    TCO  KAPPA ALPHA  PHI   PSI    X-CA   Y-CA   Z-CA
#             1    1   D              0   0  132    0, 0.0   2,-0.3   0, 0.0  49,-0.2   0.000 360.0 360.0 360.0 153.4   44.0   96.9  -23.8
#             2    2   V  E     -a   50   0A  10   47,-1.5  49,-2.8   2, 0.0   2,-0.3  -0.889 360.0-163.3-115.9 151.4   43.1  100.4  -22.5
#             3    3   S  E     -a   51   0A  63   -2,-0.3   2,-0.3  47,-0.2  49,-0.2  -0.961  10.3-172.8-131.0 152.3   44.8  103.7  -23.4
#             4    4   F  E     -a   52   0A   8   47,-2.2  49,-2.3  -2,-0.3   2,-0.4  -0.985   6.9-161.2-143.2 139.5   45.0  107.2  -22.0
#             5    5   R  E     -a   53   0A 144   -2,-0.3   4,-0.2  47,-0.2  49,-0.2  -0.993   9.7-156.0-121.0 125.9   46.6  110.2  -23.6
#             6    6   L  S    S+     0   0    1   47,-2.3   2,-0.5  -2,-0.4   3,-0.4   0.644  73.2  90.9 -73.3 -22.4   47.5  113.2  -21.4
#             7    7   S  S    S+     0   0   81   47,-0.3   3,-0.1   1,-0.2  -2,-0.1  -0.695 106.0   5.2 -75.5 121.0   47.4  115.6  -24.4
#             8    8   G  S    S+     0   0   72   -2,-0.5  -1,-0.2   1,-0.3   5,-0.1   0.269  97.6 147.8  90.2 -10.7   43.9  117.0  -24.7
#             9    9   A        +     0   0   10   -3,-0.4  -1,-0.3  -4,-0.2  -3,-0.1  -0.256  16.8 166.8 -58.8 142.4   42.9  115.2  -21.5
#             (\$inputfile1, \$inputfile2, .... )};
# Version   : 2.9
#             $debug feature has been added to make it produce error messages with '#' option.
#--------------------------------------------------------------------
sub open_dssp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my $gap_char = '_';

  if($char_opt !~ /[HEBGIST]/i){  ## This is default sec. str type setting. (full representation)
		$char_opt = 'HEBGIST';
  }
  if ($debug eq 1){
	  print __LINE__, " # open_dssp_files : \$simplify     is  $simplify\n" ;
	  print __LINE__, " # open_dssp_files : \@file given   is  @file \n" ;
	  print __LINE__, " # open_dssp_files : \@string given is  @string\n" ;
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### Big main loop for input argument handling   ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  This is to check if the given file is not in pwd but in ENV var $DSSP
  #  Or if the file name was given only by the base name of seq(eg. 1cdg rather
  #  than 1cdg.dssp
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for ($i=0; $i < @string; $i ++){
		 print __LINE__, " ${i}th string input is  $string[$i] \n" if $debug eq 1;
		 $string[$i] = "$string[$i]\.dssp"; ## adding  .dssp extension
		 print __LINE__, " ${i}th string inputwith \.dssp is now, $string[$i] \n" if $debug eq 1;

		 if(-f $string[$i]){
			 print chr(7) if $debug eq 1;
			 print __LINE__, " Your input filename exist in this File: $string[$i]\n" if $debug eq 1;
			 unshift(@file, "$string[$i]");
		 }
		 elsif(-l $string[$i]){
			 print chr(7) if $debug eq 1;
			 print "\n Your input filename exist as a Link to : $string[$i]\n" if $debug eq 1;
			 unshift(@file, "$string[$i]");
		 }
		 elsif( -d $ENV{'DSSP'} ){
			 $string[$i] =~ s/(\w+)\.dssp$/$1/; ## stripping .dssp extension
			 if( -e "$ENV{'DSSP'}\/$string[$i]\.dssp" ){
				unshift(@file, "$ENV{'DSSP'}\/$string[$i]\.dssp");
				$BASE = $string[$i];
			 }else{
				 print chr(7);
				 print __LINE__, " !! Error your DSSP env setting seems wrong. \n";
				 print __LINE__, " !! Your DSSP env path is also a link. \n" if (-l $ENV{'DSSP'});
				 print __LINE__, " I can't find  $ENV{'DSSP'}\/$string[$i] \n\n";
			 }
		 }
		 elsif( -l $ENV{'DSSP'} ){ #"""""""  IF $DSSP was a link
			 print __LINE__, " !! Your DSSP env path is also a link. \n" if $debug eq 1;
			 if( -e "$ENV{'DSSP'}\/$string[$i]\.dssp" ){
				unshift(@file, "$ENV{'DSSP'}\/$string[$i]\.dssp");
				$BASE = $string[$i];
			 }
			 elsif( -e "$ENV{'DSSP'}\/$string[$i]" ){
				unshift(@file, "$ENV{'DSSP'}\/$string[$i]\.dssp");
			 }
		 }
  }

  @file=@{remove_dup_in_array(\@file)};

  if ($debug eq 1){
	  print __LINE__, " # open_dssp_files : ENV set for dssp is $ENV{'DSSP'} \n" ;
	  print __LINE__, " # open_dssp_files : Final \@file given are \" @file \"\n" ;
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  END of File and string input checking in searching for the right dssp file.
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""


  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #"""""""""""""""""""""""" MAIN """"""""""""""""""""""""""""""""""""""
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for($i=0; $i< @file; $i++){  ## <<-- loops over the input files.
		 my($flag, %hash, $name, $s, $matched, $ori_name, $chain);
		 my($real_file) = $file[$i];
		 $file[$i] =~ s/(.*\/)(\w+)\.(\w+)$/$2/; ## stripping .dssp extension
		 $file[$i] =~ s/(\w+)\.(\w+)$/$1/;       ## stripping .dssp extension
		 $ori_name = $name = $file[$i];
		 print "\n",__LINE__, " VAR \$ori_name is  $ori_name , \$file\[\$i\] is $file[$i]\n" if $debug eq 1;
		 unless(-e $real_file){
			print "\n",__LINE__,"  !!! ERROR $real_file does not exists as the final filename\n" if $debug eq 1;
			splice(@file, $i, 1); $i--;
			print "\n",__LINE__,"  Skipping to the next file to open" if $debug eq 1;
			next;
		 }

		 open(FILE_1,"$real_file");
		 print "\n",__LINE__, " ${i}th file $real_file is being opened from \@file \n" if $debug eq 1;
		 print "_"x86,"\n", if $debug eq 1;

		 while(<FILE_1>){
			 if(/^[\s]*\#\s+RESIDUE/){
				 $flag =1;
				 print __LINE__," \"#  RESIDUE\"   string found at line $. in $real_file\n" if $debug eq 1;
				 next
			 }  ##   '#  RESIDUE' is the starting key

			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 #    Matching the column
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

			 if(  ($flag==1) && (/^[\s]*-*\d+\s+-*\d*\s+[\w]\s\s([\w ]) /)  ){
				 $matched = $1;
				 print __LINE__," \"$matched\" is matched\n" if $debug2 eq 1;

				 if( $char_opt =~ /$matched/){ ## Here OPTIONS affect the operation.
					 $s .= $matched;    ## $match_option is like 'HE'. If the
					 next;              ## single char $matched is H or E, it will be
				 }else{                ## annexed to $s as an output.
					 $s .= $gap_char;
					 next;
				 }  # <-- this is necessary to get the right length (not to ignore
			 }     #     not matched char by converting them to ' '.

			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 #    When there are chains like A, B, ,,,
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 elsif( ($flag==1) && (/^[\s]*-*\d+\s+-*(\d+)[\s\w]+(\w)\s+[\w]\s\s([\w ]) /) ){
				 $chain = $2;   ## $flag  is for the starting key
				 # ${"chain_start$name$2"} = $1 unless defined(${"chain_start$name$2"});
				 my($matched_chain) = $3;
				 if( $char_opt =~ /$matched_chain/){
					$s .= $matched_chain;   next; }
				 else{
					$s .= $gap_char; next; }
			 }elsif( (/^\s+\d+\s+\!/)&&($chain =~/\w/) ){
				 $name="$name$chain";
				 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
				 ##   IF simplify  option is set
				 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
				 if($simplify eq 1){
					 $s =~ tr/TGI/EHH/;   ### change the characters.
					 print __LINE__," Simplifying TGI to EHH by \"tr\"\n" if $debug eq 1;
				 }
				 if($debug eq 1){ print __LINE__, " Name of seq:  $name \n"; }
				 $hash{$name}=$s; $s='';
				 $name=$ori_name; next;
			 }
		 }
		 close(FILE_1);  ##<<---- Reading finished.

		 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 ##  Naming procedure
		 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 if($chain =~/^\w$/){     # when there are chains, put A,B, etc to seq. names.
			 $name="$name$chain";  ## <<-- This is for the last chain entry.
			 if($debug eq 1){ print __LINE__, " Name of seq:  $name, There were Chains !\n"; }
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 ##   IF simplify  option is set
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 if($simplify eq 1){
				 $s =~ tr/TGI/EHH/;   ### change the characters.
				 print __LINE__," Simplifying TGI to EHH by \"tr\"\n" if $debug eq 1;
			 }
			 $hash{$name}=$s;
			 $s='';   ##<<--- This is essential, a former bug!
			 $name=$ori_name;
		 }else{      # <<-- Without chains option.
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 ##   IF simplify  option is set
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 if($simplify eq 1){
				 $s =~ tr/TGI/EHH/;   ### change the characters.
				 print __LINE__," Simplifying TGI to EHH by \"tr\"\n" if $debug eq 1;
			 }
			 $hash{$name}=$s;
			 if($debug eq 1){ print __LINE__, " Name of seq:  $name \n"; }
			 $s='';   #<<--- This is essential, a former bug!
		 }

		 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 ### OUTput format determination according to options #####
		 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 if($debug eq 1){ print "\n", __LINE__, " The Hash out of \"$real_file\" is \n ";
			 &show_hash(%hash);
		 }
		 push(@out_hash_ref_list, \%hash) if ref(\%hash) eq 'HASH';
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #"""""""""""" END of Main """""""""""""""""""""""""""
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  if($comm_col =~ /c/i){ # $comm_col  is a global
	  if($debug eq 1){
		  print "\n", __LINE__;
		  print " # open_dssp_files : you have put 'c' option for common column only\n";
		  $temp = @out_hash_ref_list;
		  print __LINE__, " # open_dssp_files : No. of hashes passed to get_common_column is: $temp\n";
		  print __LINE__, " # open_dssp_files : The hash are(is) : @out_hash_ref_list\n";
	  }
	  $ref_hash_out = &get_common_column(@out_hash_ref_list);
	  return($ref_hash_out);
  }else{
	  if(@out_hash_ref_list == 1){ return($out_hash_ref_list[0]); }
	  elsif(@out_hash_ref_list > 1){ return(@out_hash_ref_list);  }
  }
}

#________________________________________________________________________
# Title     : open_dna_files  (genbank file opener)
# Usage     : ($out, $out2) = @{&open_dna_files(\$inputfile1, \$inputfile2)};
#             : (@out)        = @{&open_dna_files(\$inputfile1, \$inputfile2)};
#             ---------- Example of dna file --- dna files are genbank file format
#
#
#             1 ggatcttgct gaatacatgg tggcacaatt gaaattagat ccgcgaattt
#               tcatcaaaac
#             61 agcgggatta tggtcaacaa atccgtaaaa atgaaaagcc tgtcttgcga
#               caggcttttt
#             121 tatttgaatg taatcctcac tggtaaacgt ttaacgccaa agacaaaggg
#               actagggatc
#             181 gcttcaagct tttcatcatg agcagctttt tcgatacaag ctgacattga
#
# Function  : open dna files and put sequences in a hash(s)
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : (@out_array_of_refs)
# Argument  : (\$inputfile1, \$inputfile2, .... )};
# Version   :
#--------------------------------------------------------------------
sub open_dna_files{  my(@in)=@_; my(@names, $i,$n, $s, %hash,@out_hash_ref_list);
  for($i=0; $i<=$#in; $i++){
	 if(ref($in[$i])){ unless (-e ${$_[$i]}){ next; }
		 open(FILE_1,"${$_[$i]}");  undef(%hash);
		 while(<FILE_1>){      # file1 needs to be xxxx.msf for the moment, automatic later
			 if(/Name\:\s+(\S+)\s+/){ $n=$1; $n=~s/\,//g; }
			 if((/\s+\d+\s([acgt ]+)$/)||(/\s\s\s\s+([acgt ]+)$/)){
				$s=$1; $s=~s/ //g; $s=~tr/a-z/A-Z/; $hash{$n}.=$s; }     }
		 push(@out_hash_ref_list, \%hash); } }
  if(@out_hash_ref_list  == 1 ){ return(\%hash); }
  elsif(@out_hash_ref_list > 1){ return(@out_hash_ref_list); } # <-- contains (\%out_seq0, \%out_seq1, \%out_seq2, .... )
}
#________________________________________________________________________
# Title     : open_tem_files
# Usage     : ($r1, $r2, $r3, $r4, $r5)=&open_tem_files(\$infile1, \$inputfile2..)};
#             ---------- Example of xxxx
#             >P1;1cdg
#             sequence
#             APDTSVSNKQNFSTDVIYQIFTDRFSDGNPANNPTGAAFDGTCTN-LRLYCGGDWQGIINKINDGYLTGMGVTAI
#             >P1;1cdg
#             secondary structure and phi angle
#             CCCCCCCCCCCCCCCCEEECCHHHHCCCCHHHCCCPHHCCCCPCC-CCCCCPCCHHHHHHHHHCPHHHHHPCCEE
#             >P1;1cdg
#             solvent accessibility
#             TTTTTTTTTTTFFFFFFFFFFFFFFTTTTTTTTTTTTTTTTTFTT-TTTTFFFFFTFFTTTFTTTFFTTFTFTFF
#             >P1;1cdg
#             DSSP
#             CCCCCCCCCCCCCCCCEEECCHHHHCCCCGGGCCCGGGCCCCCCC-CCCCCCCCHHHHHHHHHCCHHHHHCCCEE
#             >P1;1cdg
#             percentage accessibility
#             67523272360000000000000002213792129b722248085-14110000030015105660028040200
#             2ltn           ----TETTSFLITKFSPDQQNLIFQGDGYTT-KEKLTLTK------AVKNTVGRALYSSP
#             1loe           ----TETTSFSITKFGPDQQNLIFQGDGYTT-KERLTLTK------AVRNTVGRALYSSP
#
#             2ltn           ----CEEEEEEECCCCCCCCCEEEEPCCEEP-PPCEEEEC------CCCPCEEEEEECCC
#             1loe           ----CEEEEEEECCCCCCCCCEEEEPCCEEE-PPEEEEEC------CCCPCEEEEEECCC
#
#             2ltn           ----TTTTTTTTTTFTTTTTTFTTTTTFTFT-TTTFTFFT------TTTTTTFFFFTTTT
#             1loe           ----TTTTTTTTTTFTTTTTTFTTTTTFTFT-TTTFFFFT------TTTTTTFFFFTTTT
#
#             2ltn           ----CEEEEEEECCCCCCCCCEEEEECCEEC-CCCEEEEC------CCCCCEEEEEECCC
#             1loe           ----CEEEEEEECCCCCCCCCEEEEECCEEE-CCEEEEEC------CCCCCEEEEEECCC
#
#             2ltn           ----543251b16504681c50422650502-75201006------35681200001453
#             1loe           ----6532e1508a07981b50422750404-8a200006------36672200001453
# Function  : opens JPO's xxxx.tem file, stores in 5 hashes. (usually one tem file)
# Example   :
# Warning   :
# Keywords  :
# Options   : -n, n, or N for removing any gaps in the sequences.
#             -s, s, or S for getting only the sequences.
# Returns   : ($r1, $r2, $r3, $r4, $r5) <= these are references for hashes.
# Argument  : (\$inputfile1, \$inputfile2, .... )};
# Version   :
#--------------------------------------------------------------------
sub open_tem_files{
  my($a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r,
	  $s, $t, $u, $v, $w, $x, $y, $z, $pwd, $file, $dir, $output, $in_dir,
	  %hash, @keys, @array, @hash, $option_string, $string, @in, $line,
	  $name, %out, $gap_chr, @str1, @str2, $num_opt, @file, @dir,
	  $char_opt, $char_opt_given, $num_opt_given,
	  @char_options, @file, $original_dir, @read_files, %array_msf, %array_jp,
	  $jp_file, $error_rate, $id_compos, @dir, @names, $name, $name_found,
	  @outref, %sequence, %secondary,%solvent_access, %DSSP, %percent_accessibility,
	  $name_found,$type_seq, $type_secon, $type_sol, $type_DSSP, $type_acc
  );
  ##################################################
  ##### Start of  general argument handling   ######
  ##################################################
  for($k=0; $k< @_ ;$k++){
	  if( !ref($_[$k]) ){
			  if($_[$k]=~ /^[\-]*(\w)$/){
				  $char_opt  .= "\,$1";
				  $char_opt_given =1;
			  }elsif($_[$k]=~ /^\-(\w\w+)$/){       ## When multiple option is given,
				  my(@char_options) = split(/|\,/, $1); ## '-' should be used. eg. '-HEGI'
				  $char_opt .= join("\,", @char_options);  ## as an option string.
				  $char_opt_given = 1;
			  }elsif($_[$k]=~ /^([\-]*\d)$/){
				  $num_opt   .= "\,$1";  ## delimiter is ','
				  $num_opt_given = 1;
			  }elsif(-f $_[$k]){     ## When file is given,
				  push(@file, \$_[$k] );
			  }elsif(-d $_[$k]){     ## When dir is given,
				  push(@dir, \$_[$k] );    }
	  }elsif( ref($_[$k]) ){
			if(ref($_[$k]) eq "SCALAR")
				{if(${$_[$k]} =~ /^[\-]*(\w)$/){  ## check if it has '-' before option char
					$char_opt  .= "\,$1";  ## delimiter for option char is ','
					$char_opt_given = 1;
				}elsif(${$_[$k]}=~ /^\-(\w\w+)$/){       ## When multiple option is given,
					my(@char_options) = split(/|\,/, $1); ## '-' should be used. for eg. '-HEGI'
					$char_opt  .= join("\,", @char_options);  ## as an option string.
					$char_opt_given =1;
				}elsif(${$_[$k]}=~ /^([\-]*\d)$/){
					$num_opt   .= "\,$1";  ## delimiter is ','
					$num_opt_given = 1;
				}elsif(-f ${$_[$k]}){     ## When file is given,
					push(@file, $_[$k] );
				}elsif(-d ${$_[$k]}){     ## When dir is given,
					push(@dir, $_[$k] );  }
			}elsif(ref($_[$k]) eq "ARRAY"){  ## When ARRAY is given,
					push(@array, $_[$k]);
			}elsif(ref($_[$k]) eq "HASH"){   ## When HASH is given,
					push(@hash, $_[$k]);
			}
	  }
	  ###################################################
	  ## The output of this option handling section is
	  ## one or combination of these:
	  ## $char_opt_given   ##<<-- Simple boolean '1' or none
	  ## $num_opt_given    ##<<-- Simple boolean '1' or none
	  ## $char_opt, as ('A,B,C')
	  ## $num_opt,  as ('1,-2,3')
	  ## @file          as (\file1, \file2,...)
	  ## @dir           as (\dir1, \dir2,...)
	  ## @array         as (\array1, \array2,,,)
	  ## @hash          as (\hash1, \hash2,,,,)
	  ###################################################
  }
  ################################################
  ##### END of  general argument handling   ######
  ################################################

  for($i=0; $i < @file; $i++){
	 if(ref($file[$i])){ unless(-T ${$file[$i]}){ next; }
		 open(FILE_1, "${$file[$i]}");
		 while(<FILE_1>){
			 if(/^\>P1\;([\w\-]+)/){ $name=$1; #=================== SEQUENCE
				($type_seq, $type_secon, $type_sol, $type_DSSP, $type_acc)=();
			 }elsif(/^sequence/){  $type_seq = 1;
			 }elsif(($type_seq ==1)&&(/^([\w\-]+)[\*]*$/)){
				my($line) = $1;
				if( $char_opt =~ /n/i){  ## to remove the gaps etc.
					$line=~s/\W//g;
					$sequence{$name}.=$line;
				}else{
					$sequence{$name}.=$line;
				} #from below ============== SECONDARY
			 }elsif(/^secondary structure and phi angle/){  $type_secon = 1;
			 }elsif(($type_secon ==1)&&(/^([\w\-]+)[\*]*$/)){
				$secondary{$name}.=$1;     #from below============= SOLVENT ACCESSIBILITY
			 }elsif(/^solvent accessibility/){  $type_sol = 1;
			 }elsif(($type_sol ==1)&&(/^([\w\-]+)[\*]*$/)){
				$solvent_access{$name}.=$1;     #from below========= DSSP
			 }elsif(/^DSSP/){  $type_DSSP = 1;
			 }elsif(($type_DSSP ==1)&&(/^([\w\-]+)[\*]*$/)){
				$DSSP{$name}.=$1;     #from below=================== PERCENTAGE ACCESSIBILITY
			 }elsif(/^percentage accessibility/){  $type_acc = 1;
			 }elsif(($type_acc ==1)&&(/^([\w\-]+)[\*]*$/)){
				$percent_accessibility{$name}.=$1;  } }
		  push(@outref,\%sequence,\%secondary,\%solvent_access,\%DSSP,\%percent_accessibility);
	  }  }
  if( ($char_opt =~ /s/i) || ( @outref  == 1 ) ){
	  return(\%sequence); }
  elsif( @outref > 1){ return(@outref); } # <-- contains (\%sequence,\%secondary,....)
}

#________________________________________________________________________
# Title     : open_hlx_files
# Usage     :
# Function  :
#             Example of hlx file (For Bo Nielson)
#             Residue Frame Score Probability
#             1 M   a  1.00563E+00 2.05479E-03
#             2 T   b  1.01814E+00 2.52053E-03
#             3 R   c  1.01814E+00 2.52053E-03
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : list of ref. for hash(es)
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub open_hlx_files{  my(@in)=@_; my(@names, $i,$n, $s, %hash,@out_hash_ref_list);
  for($i=0; $i< @in; $i++){
	 if(ref($in[$i])){ unless(-e ${$in[$i]}){ next; }}
	 open(FILE_1, "${$in[$i]}");
	 while(<FILE_1>){
		if(/^[\s]+([\d]+)\s+(\w+)\s+(\w+)\s+\S+\s+(\S+)$/){
		  $hash_residue{$1}=($2); # residue num is key, residue is value.
		  $hash_frame{$1}=($3);   # residue num is key, frame is  value.
		  $hash_prob{$1}=($4);    # residue num is key, probability is  value.
		}
	 }
	 push(@out_hash_ref_list, \%hash_residue, \%hash_frame, \%hash_prob);
  }
  if($#out_hash_ref_list  == 0 ){ return(\%hash_residue); }
  elsif($#out_hash_ref_list > 0){ return(@out_hash_ref_list); }
}


#________________________________________________________________________
# Title     : open_jp_files  (bug free!!)
# Usage     : %out_hash=%{&open_jp_files(\$file_name)};
# Function  : reads jp files and stores results in a hash.
# Example   :
# Warning   : All the spaces  '-' !!!
# Keywords  :
# Options   :
# Returns   : a reference of a hash for names and  their sequences.
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub open_jp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%hash_out, $s1);
	open(FILE_JP, "$file[0]");
	while(<FILE_JP>){    if(/^CLUSTAL/){ next; }
		if((/^([\S]+)[\t]* +$/)||(/^\#/)){ next; }
		if(/^([\w\.\-\=\+]+) +\t*(\S+)[\n]$/){ $n=$1; $s=$2; $hash_out{$n}.= $s; }
	}
	#&show_hash(%hash_out);
	\%hash_out;
}

#________________________________________________________________________
# Title     : open_ali_files
# Usage     : %anyhash = %{&open_ali_files(\$filename)};
# Function  : open fasta files and put sequences in a hash
#             FASTA sequence file format is like this;
#
#             >P1;1abp
#             structureX:1abp:   1 : : 306 : :L-arabinose-binding protein:Escherichia coli: 2.40:-1.00
#             ENLKLGFLVKQPEEPWFQTEWKFADKAGKDLG-FEVIKIAV-PDGEKTLNAIDSLAASGAKGFVICTPDPKLGSA
#             TEGQGFKAADIIGIGINGVDAVSELSKAQATGFYGSLLPSPDVHGYKSSEMLYNWVAK--------DVEPPKFTE
#             VTDVVLITRDNFKEELEKKGLGGK*
#             >P1;2gbp
#             structureX:2gbp:   1 : : 309 : :D-galactose/D-glucose-bind:Escherichia coli: 1.90:14.60
#             ADTRIGVTIYKYDDNFMSVVRKAIEQDAKAAPDVQLLMNDSQNDQSKQNDQIDVLLAKGVKALAINLVDPAAAGT
#             LKAHNKS-SIP-VFGVDA--LPEALALVKSGALAGTVLNDANNQAKATFDLAKNLADGKGAADGTNWKIDNKVVR
#             VP-YVGVDKDNLAEFSKK------*
#
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub open_ali_files{    my($input_file)=${$_[0]}; my(@names, %sequence, $flag,$name,$temp2);
	unless (-e $input_file){     print chr(7);
	  print "\n\n\t This is sub open_ali_files in th_lib.pl \n\n";
	  print "\n\n\t Fatal: The input file $input_file is not in the directory \n"; exit; }
	open(FILE_1,"$input_file");
	while(<FILE_1>){         		# file1 needs to be xxxx.fasta for the moment, automatic later
	  if(/^\>\S\S\;([\w\-\.]+)$/){ $name=$1; next; }
	  elsif(/\:/){ next; }
	  elsif(/^([\w\-]+)[\*]*$/){ $sequence{$name}.= $1; }
	  else{  next; }    }
	return(\%sequence);
}
#________________________________________________________________________
# Title     : open_pir_files  (nearly the same as .ali file)
# Usage     : %anyhash = &open_pir_files($any_sequence_file_fasta_form);
# Function  : open fasta files and put sequences in a hash
#             FASTA sequence file format is like this;
#
#             >P1;1abp
#             structureX:1abp:   1 : : 306 : :L-arabinose-binding protein:Escherichia coli: 2.40:-1.00
#             ENLKLGFLVKQPEEPWFQTEWKFADKAGKDLG-FEVIKIAV-PDGEKTLNAIDSLAASGAKGFVICTPDPKLGSA
#             VTDVVLITRDNFKEELEKKGLGGK*
#             >P1;2gbp
#             structureX:2gbp:   1 : : 309 : :D-galactose/D-glucose-bind:Escherichia coli: 1.90:14.60
#             LKAHNKS-SIP-VFGVDA--LPEALALVKSGALAGTVLNDANNQAKATFDLAKNLADGKGAADGTNWKIDNKVVR
#             VP-YVGVDKDNLAEFSKK------*
#
# Example   :
# Warning   : well tested. It skips lines starting with blank, lines with '-' in them.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub open_pir_files{
	my($input_file)=${$_[0]};
	my(@names, %sequence, $flag,$name,$temp2);
	unless (-e $input_file){     print chr(7);
	  print "\n\n\t This is sub open_ali_files in th_lib.pl \n\n";
	  print "\n\n\t Fatal: The input file $input_file is not in the directory \n"; exit; }
	open(FILE_1,"$input_file");
	while(<FILE_1>){         		# file1 needs to be xxxx.fasta for the moment, automatic later
	  if(/^\>\S\S\;([\w\-\.]+)$/){ $name=$1; next; }
	  elsif((/\:/)||(/^\*/)){ next; }
	  elsif(/^([\w\-]+)[\*]*$/){ $sequence{$name}.= $1; }
	  else{  next; }    }
	%sequence=%{&remov_com_column(\%sequence)};
	return(\%sequence);
}

#________________________________________________________________________
# Title     : open_aln_files
# Usage     : %out_hash=%{&open_aln_files(\$file_name)};
# Function  : reads jp files and stores results in a hash.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : a reference of a hash for names and  their sequences.
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub open_aln_files{       my(%hash_out, $s, $n, $s1);
  open(FILE_JP, "${$_[0]}");  # reading in (JP) file
  while(<FILE_JP>){       if(/^CLUSTAL/){ next; }
	 if((/^([\S]+)[\t]* +$/)||(/\#/)){ next; }
	 if(/^([\w\.\-\=\+]+) +\t*(.+)[\n]$/){ $n=$1; $s=$2; $hash_out{$n}.= $s; }}
  \%hash_out;
}


#________________________________________________________________________
# Title     : open_sst_files (also look at "open_sst_files_with_gap")
# Usage     : %out_sst_hash =%{&open_sst_files(\$jp_file_dir_and_name)};
# Function  : gets the name of a file(jp file) with its absolute dir path
#             reads the sequence names in the jp file and looks up all
#             the sst files in the same directory. Puts sst sequences
#             in a hash with keys of sequence names.
#
# Example   : jp file  ==  seq1 ABDSF--DSFSDFS   <- true sequence
#                              seq2 lkdf-jlsjlsjf
#
#                 sst files == seq1.sst, seq2.sst
#
#                 output hash == seq1 hHHHHHHHttEEEEEEEE
#                                seq2 hHHHHHHHHHEEEEEEhh
#
# Warning   : $jp_file_dir_and_name should be absolute dir and file name
# Keywords  :
# Options   :
# Returns   : a ref. for a hash
# Argument  : a ref. for scaler of "jp file name"
# Version   :
#--------------------------------------------------------------------
sub open_sst_files{
  my($dir)=${$_[0]};
  my(%jp_file)=%{&open_jp_files($_[0])};
  my(%out_sst_seq_hash, $directory);
  if (${$_[0]} =~/^(.+\/)\w+\.jp$/){
	  $directory = $1;   }
  my(@keys)= (keys %jp_file);
  for $seq_name (@keys){
	  my($sst_file_name)="$directory$seq_name\.sst";
	  my($sst_seqs);
	  open(SST, "$sst_file_name");
	  while(<SST>){
		  if(/^  summary  (.*)  summary  $/){
			 $sst_seqs.=$1;  }   }
	  $out_sst_seq_hash{$seq_name}=$sst_seqs;   }
  \%out_sst_seq_hash;
}
#________________________________________________________________________
# Title     : read_sst_files  (synonym of open_sst_files )
# Usage     : %out_sst_hash =%{&read_sst_files(\$jp_file_dir_and_name)};
# Function  : gets the name of a file(jp file) with its absolute dir path
#             reads the sequence names in the jp file and looks up all
#             the sst files in the same directory. Puts sst sequences
#             in a hash with keys of sequence names.
#
# Example   : jp file  ==  seq1 ABDSF--DSFSDFS   <- true sequence
#                              seq2 lkdf-jlsjlsjf
#
#                 sst files == seq1.sst, seq2.sst
#
#                 output hash == seq1 hHHHHHHHttEEEEEEEE
#                                seq2 hHHHHHHHHHEEEEEEhh
#
# Warning   : $jp_file_dir_and_name should be absolute dir and file name
# Keywords  :
# Options   :
# Returns   : a ref. for a hash
# Argument  : a ref. for scaler of "jp file name"
# Version   :
#--------------------------------------------------------------------
sub read_sst_files{  my($dir)=${$_[0]};  my(%jp_file)=%{&open_jp_files($_[0])};
  my(%out_sst_seq_hash, $directory);
  if (${$_[0]} =~/^(.+\/)\w+\.jp$/){
	  $directory = $1;   }
  my(@keys)= (keys %jp_file);
  for $seq_name (@keys){
	  my($sst_file_name)="$directory$seq_name\.sst";
	  my($sst_seqs);
	  open(SST, "$sst_file_name");
	  while(<SST>){
		  if(/^  summary  (.*)  summary  $/){
			 $sst_seqs.=$1;  }   }
	  $out_sst_seq_hash{$seq_name}=$sst_seqs;   }
  \%out_sst_seq_hash;
}


#________________________________________________________________________
# Title     : open_slx_files
# Usage     : %anyarray = &open_slx_files(\$any_sequence_file_slx_form);
# Function  : open slx files and put sequences in a hash
# Example   : selex file (foo.slx) looks like this:
#
#         #=SQ GLB_TUBTU  5.9393 - - 0..0::0 -
#         #=SQ GGZLB      20.9706 - - 0..0::0 -
#         #=RF        x.....x.xxxx.xxx.xxxxxx....xxxxxxxxxxxxxxx.xxxx
#         HAHU        ......VLSPADKTNVKAAWGKVGA......HAGEYGAEALERMFLS
#         HBA3_PANTR  ......VLSPADKTNVKAAWGKVGA......HAGZYGAEALERMFLS
#
# Warning   : The slx FORMAT SHOULD BE AT LEAST 30 residue long
# Keywords  :
# Options   :
# Returns   : a ref. of a hash
# Argument  : takes one ref. for a file.
# Version   : 1.0
#--------------------------------------------------------------------
sub open_slx_files{
	my(@names, $n, $s, %hash);
	if ((-z ${$_[0]})|| (-B _) || (-x _)){   print chr(7);
		print "\n\t I am $0: Input file $file1 isn't in the dir \n"; exit;
	}
	open(FILE_1,"${$_[0]}");  	# reading in (slx)
	while(<FILE_1>){         	# file1 needs to be xxxx.slx for the moment, automatic later
	  if((/^([\S]+)[\t]* +$/)||(/^\#\=/)){ next; }
	  if(/^(\w+)\s+([\w\.\-]+)$/){
		  $n=$1; $s=$2; $hash{$n}.= $s;  }}
	return( \%hash );
}

#________________________________________________________________________
# Title     : open_out_files
# Usage     : %anyarray = &open_out_files(\$any_out_file);
# Function  : open out files and put their sequences in a hash
# Example   :
# Warning   : well tested. It skips lines starting with blank, lines with '-' in them.
# Keywords  :
# Options   :
# Returns   : a ref. of a hash
#             Output example in a hash(fills the space)
#
#             3aat       --mfe---aapadp----adlfraderpGk---gigvY--etgktpvltS
#             1ama       ---eamiaakkmdkeylpiaGladFtraSA----eAfksgryVTV
#
# Argument  : takes one ref. for a file.
#             >>Out file looks like this===>
#
#             3aat         mfe   aapadp----adlfraderpGk   gigvY--etgktpvltS
#             1ama       sswwshvemgppdp  krdtns--kkMnLG---YrddngkpyvLnC-
#
# Version   :
#--------------------------------------------------------------------
sub open_out_files{    			# opening msf files. input is a file name.
	my($flag, %hash, @names, $n, $s);
	unless (-e ${$_[0]}){
	  print chr(7);	# beep warning for error
	  print "\n\n\t This is sub open_out_files in th_lib.pl \n\n";
	  print "\n\n\t I am $0: The input file $file1 is not in the directory \n";
	  exit; # these subs are in th_lib.pl
	}
	open(FILE_1,"${$_[0]}");  	# reading in (MSF)
	while(<FILE_1>){         	# file1 needs to be xxxx.msf for the moment, automatic later
	  if (/^pairwise sequence distances/){  # <<---!! recognising que !!
		 $flag =1;}
	  if((/^(...........)([\s\w\-]+)[\n]$/) && ($flag == 1)){ 	# matching patterns (includin tab space)
		 $n=$1; $s=$2; $n=~ s/ //g; $s=~ s/ /-/g;
		 $hash{$n}.= $s;   		# may and concatenate second set to first set(name)
	  }
	}
	%hash=%{&fill_ending_space(\%hash)};  ### << filling the ending gaps !!
	return ( \%hash );
}


#________________________________________________________________________
# Title     : package Roman;
# Usage     :
# Function  : Roman.pm : Roman <-> Arabic conversion package
# Example   :
# Warning   : From: ozawa@prince.pe.u-tokyo.ac.jp (OZAWA Sakuro)
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
package Roman;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(isroman arabic Roman roman);
sub isroman{
	 my($arg) = @_;
	 $arg ne '' and $arg =~ /^(?: M{0,3})           (?: D?C{0,3} | C[DM])
									  (?: L?X{0,3} | X[LC]) (?: V?I{0,3} | I[VX])$/ix;
}
%roman2arabic = qw(I 1 V 5 X 10 L 50 C 100 D 500 M 1000);
sub arabic{
	 my($last_digit, $arg) = (1000, @_);
	 isroman $arg or return undef;
	 my($arabic);
	 foreach (split(//, uc $arg)) {
		  my($digit) = $roman2arabic{$_};
		  $arabic -= 2 * $last_digit if $last_digit < $digit;
		  $arabic += ($last_digit = $digit);
	 }
	 $arabic;
}
%roman_digit = qw(1 IV 10 XL 100 CD 1000 MMMMMM);
@figure = reverse sort keys %roman_digit;
grep($roman_digit{$_} = [split(//, $roman_digit{$_}, 2)], @figure);
sub Roman{
	 my($arg) = @_;
	 0 < $arg and $arg < 4000 or return undef;
	 my($x, $roman);
	 foreach (@figure){
		  my($digit, $i, $v) = (int($arg / $_), @{$roman_digit{$_}});
		  if (1 <= $digit and $digit <= 3) {
				$roman .= $i x $digit;
		  } elsif ($digit == 4) {
				$roman .= "$i$v";
		  } elsif ($digit == 5) {
				$roman .= $v;
		  } elsif (6 <= $digit and $digit <= 8) {
				$roman .= $v . $i x ($digit - 5);
		  } elsif ($digit == 9) {
				$roman .= "$i$x";
		  }
		  $arg -= $digit * $_;
		  $x = $i;
	 }
	 $roman;
}
sub roman{
	 lc Roman @_;
}
1;
package main;


#________________________________________________________________________
# Title     : time_date
# Usage     :
# Function  : returns current time & date as 05/15/95 23:22:41
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub time_date{
	 my($sec, $min, $hour, $day, $mon, $year) = localtime(time);
	 sprintf("%.2d/%.2d/%.2d %.2d:%.2d:%.2d", $mon+ 1, $day, $year, $hour, $min, $sec);
}

#________________________________________________________________________
# Title     : sep
# Usage     : &sep;
# Function  : separater. \n#________________________________\n
# Example   :
# Warning   :
# Keywords  : separating_line
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sep{ print "\n#___________________________________________#\n"; }


#________________________________________________________________________
# Title     : diff_dates
# Usage     : $output = &diff_dates("05/15/1994", "05/15/1995")
# Function  : gets number of days between two dates ( "05/15/94" )
# Example   :
# Warning   : modified (originally from reb@serf.nsc.com (Edward Brown))
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub diff_dates{
		  print "\nNote: diff_dates subroutine accepts 00/00/0000 format \n";
		  print " If you put 11/15/94, diff_dates understands it as 11/15/0094 \n\n";
		  my($date1, $date2)  = @_;
		  my($absolute_days1) = &toJulian($date1);
		  my($absolute_days2) = &toJulian($date2);
		  my($diff_of_days)   = abs($absolute_days1 - $absolute_days2);
		  $diff_of_days;
}

#________________________________________________________________________
# Title     : fromJulian
# Usage     :
# Function  : taking the days between two dates.
# Example   : print &fromJulian(34469), "\n";
# Warning   : got from reb@serf.nsc.com (Edward Brown)
#             require "julian
#             $Value1 = &toJulian("05/15/1994");        # Assign $Value1 a Julian Day
#             print "$Value1\n";
#             $Value2 = &toJulian("05/20/1994");        # Assign Value2 a Julian Day
#             print "$Value2\n";
#             $Days = $Value2 - $Value1;              #Difference in Days
#             print "$Days\n";
#             print &fromJulian(34469), "\n";         # Give a Julian Day, give the date
#             print &fromJulian(34474), "\n";
#             What is the Date 25 Days from Today?  (You can get format from `date`)
#
#             $Value = &toJulian("05/16/1995");
#             $Value +=  25;
#             print &fromJulian($Value), "\n";
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fromJulian{
		  print "\n Note: fromJulian subroutine accepts any numbers for days \n";
		  print " It counts from 0000 year. \n\n";
		  my ($value) = @_;
		  my ($yr, $mo, $dy, $julian, $leapflag, $holdjulian)=0;
		  $yr = 0; 	$mo = 1;	$dy = 1;	 $julian = 0;
		  while ($yr>=0){
			   $holdjulian = $julian;
			   $leapflag = "FALSE";
			   if ($yr % 400 == 0){
									$julian += 365;
			   } elsif ($yr % 4 == 0){
									$leapflag = "TRUE";
									$julian += 366;
			   } else {
									$julian += 365;
			   }
			   if ($value < $julian){
									$julian = $holdjulian;
									last;
			   };
			   ++$yr;
		  }
		  $julian = $value - $julian;
		  while ($mo){
			  $holdjulian = $julian;
			  if ($mo == 1 || $mo == 3 || $mo == 5 ||
								   $mo == 7 || $mo == 8 || $mo == 10 ||
								   $mo == 12) {
								   $julian -= 31;
			  } elsif ( $mo == 4 || $mo == 6 ||
								   $mo == 9 || $mo == 11) {
								   $julian -= 30;
			  } elsif ($leapflag eq "TRUE") {
								   $julian -= 29;
			  } else {
								   $julian -= 28;
			  }

			  if ($julian < 0 ){
								   $julian = $holdjulian;
								   last;
			  };
		  ++$mo;
		  }
		  $dy = $julian;
		  $julRetDate = sprintf("%02d/%02d/%04d", $mo, $dy, $yr);
}
#________________________________________________________________________
# Title     : toJulian
# Usage     :
# Function  : taking the days between two dates.
# Example   : $Value1 = &toJulian("05/15/94"); print "$Value1\n";
# Warning   : got from reb@serf.nsc.com (Edward Brown)
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub toJulian {
		  print "\n Note: toJulian subroutine accepts 00/00/0000 format \n";
		  print " If you put 11/15/94, 'toJulian' understands it as 11/15/0094 \n\n";
		  my ($value) = @_;
		  my ($yr, $mo, $dy, $julian, $leapflag, $holdjulian)=0;
		  ($mo, $dy, $yr) = split(/\//, $value);
		  # checking if $mo, $dy, $yr are numbers.
		  if (!($mo =~ /^[\d]+$/)||!($dy =~ /^[\d]+$/)||!($yr =~ /^[\d]+$/)){
					 print "\nInputs to sub toJulian and sub diff_dates must be numbers.\n";
					 print "\nExiting sub toJulian \n";
					 &caller_info;
					 exit;
		  }
		  if ($yr % 400 == 0){
								$leapflag = "FALSE";
		  } elsif ($yr % 4 == 0){
								$leapflag = "TRUE";
		  } else {
								$leapflag = "FALSE";
		  }
		  $julian = 0;
		  for ($i = 0; $i < $yr; ++$i){
								if ($i % 400 == 0){
													 $julian += 365;
								} elsif ($i % 4 == 0){
													 $julian += 366;
								} else {
													 $julian += 365;
								}
		  }
		  for ($i = 1; $i < $mo; ++$i){
								if ($i == 1 || $i == 3 || $i == 5 ||
													 $i == 7 || $i == 8 || $i == 10 ||
													 $i == 12) {
													 $julian += 31;
								} elsif ( $i == 4 || $i == 6 ||
													 $i == 9 || $i == 11) {
													 $julian += 30;
								} elsif ($leapflag eq "TRUE"){
													 $julian += 29;
								} else {
													 $julian += 28;
								}
		  }
		  $julian += $dy;
}

#________________________________________________________________________
# Title     : opendir_and_go
# Usage     : &opendir_and_go($input_dir); #$inputdir='/nfs/ind4/ccpe1/people/A Biomatic /jpo/align';
# Function  : open dir and process all files if you wish, and then go in any sub
#             dir of it. Using recursion. created by A Biomatic
#             if any file is linked, it skips that file.
# Example   : as in my 'indexing.pl' for perl file indexer.
# Warning   : Seems to work fine.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub opendir_and_go{
		  my($original_dir)=$_[0];
		  my(@read_files)=&read_any_dir($original_dir);
		  foreach $file(@read_files){
					 my($dir)=$split_path[$#split_path];
					 if (-l $realfile1){
								next;
					 }elsif (-d $realfile1){
						  &opendir_and_go($realfile1);
					 }elsif (-f $realfile1){ #<<------ This is where things match
								chdir($original_dir);
						 if($realfile1 =~/(\d+\-$no\.msf)$/){
											@dir=split(/\//, $realfile1);
											$dir=$dir[($#dir-1)];  # where am I ?
										  # $jp_file = $original_dir.'/'.$dir.'.jp';
										  # %array_msf =&open_msf_files($realfile1);
										  # %array_jp  =&open_jp_files ($jp_file);
										  # $array_ref_msf = \%array_msf;
										  # $array_ref_jp  = \%array_jp;
										  # $error_rate =&get_posi_shift_hash($array_ref_msf, $array_ref_jp);
										  # $id_compos  =&amino_acid_compos_id_percent($array_ref_jp);
										  # push(@rates_accumu,$error_rate);
										  # push(@compos_id,$id_compos);
						 }
					 }
					 else
					 {
								next;
					 }
		  }
}
#________________________________________________________________________
# Title     : occurances
# Usage     : sort occurances (@any_array_with_repeating_element);
# Function  : this is for sort, to sort things according to the higher num. of occu.
# Example   :
# Warning   : This is from 21 DAYS book, page 373.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub occurances{
	 $occurance_hash{$a} <=> $occurance_hash{$b};
}

#________________________________________________________________________
# Title     : extract_ori_seq
#             nt5
# Usage     : &extract_ori_seq($input_file, $output_file, $out_seq_no, *array2);
# Function  : extract seqs. which are from struc. alignment only. to be analysed.
#             after mul. alignment with added seq. you can extract original str.
#             sequ. by using this. The output always has ...msff  ext.
#             *array_ali is the JPO's or true alignment hash.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub extract_ori_seq{
	 local($input_file, $output_file, $out_seq_no, *array1, *array2) = @_; # something like $dir.$mul_factor.msf
		  local(%array_ext) = &open_msf_files("$input_file");
		  %array_ext = &hash_substract(*array_ext, *array2); # getting rid of added seq.
		  %array_ext = &hash_common(*array_ext, *array1);
		  open(OUTPUT, ">$output_file"); # this is different from $dir.msff
	 printf OUTPUT "PileUp\n\n\n";
	 printf OUTPUT "  MSF:%5d  Type: P                     Check:    0  ..\n",$ls;
	 print  OUTPUT "\n\n";
		  my(@keys3) = ( keys %array_ext );
		  $max = &max_str_value_hash(%array_ext);
	 $ls = $max;
	 $seq3 = ($#keys3+1);
	 for($j=0; $j < $seq3 ;$j++){
					 $name3=$keys3[$j];
					 printf OUTPUT " Name: %10s     Len: %5d  Check:    0  Weight:  1.00\n",$name3,$ls;
	 }
	 print OUTPUT "\n//\n\n";
	 for($seq_len = 0;$seq_len < $ls;$seq_len = $seq_len + 50){
				print OUTPUT "\n";
				foreach $name(keys %array_ext){
						  $string=$array_ext{$name};
						  if (length($name)<= 7){
									 $out=$name."\t    ";
						  }elsif(length($name)==8){
									 $out=$name."    ";
						  }elsif(length($name)== 9){
									 $out=$name."   ";
						  }elsif(length($name)== 10){
									 $out=$name."  ";
						  }elsif(length($name)== 11){
									 $out=$name." ";
						  }else{
									 $out=$name."\t";
						  }
						  # loop over blocks of 10
						  $is=$seq_len;
						  $ie=$seq_len+50;
						  $ie=$ls if $ie > $ls;
						  while($is < $ie){
										  $iee=$is+10;
										  $iee=$ls if $iee > $ls;
										  $out.=' ';
										  while($is < $iee){
													 $char=substr($string,$is,1);
													 if($char ne ' '){
													 $char =~ tr/a-z/A-Z/;
													 $out.=$char;
													 $char='.' if $char eq '-';
													 }
													 $is++;
										  }
						  }
				print OUTPUT "$out\n";
				}
				print OUTPUT "\n";	# open(OUTPUT, ">$dir.msff");
	 }
}	#  end of extract_ori_seq
#________________________________________________________________________
# Title     : get_pair_homol_array
# Usage     : $hom_out_count = ${&get_pair_homol_array(\@any_array_of_2_elem)};= @ar=(ABCDE..., CDEGA..)
# Function  : get pair wise seq. !! Number of pair identical residues.
# Example   :
# Warning   : reliable, but input seq. strings shouldn't contain spaces.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_pair_homol_array{
	 my(@input)=@{$_[0]};
	 $input[0] =~ tr/a-z/A-Z/; # capitalizing.
	 $input[1] =~ tr/a-z/A-Z/; # capitalizing.
	 my(@string1)= split(//,$input[0]);
	 my(@string2)= split(//,$input[1]);
	 if (($#string1 == -1) || ($#string2 == -1)){
		  print "\n One of the string is empty O.K. ? \n";
	 }
	 my($larger)= &max($#string1, $#string2);
	 my($id_counter, $gap_counter, $non_equal_counter, $sum)=0;
	 for ($i = 0; $i<=$larger; $i++){
		  if (($string1[$i] eq '.')|| ($string2[$i] eq '.')){
				$gap_counter+=1;
		  }elsif ($string1[$i] eq $string2[$i]){
				$id_counter +=1;
		  }else{
				$non_equal_counter += 1;
		  }
	 }
	 $sum = ($id_counter + $gap_counter + $non_equal_counter);
	 if ($sum != ($larger+1)){
		  print "\n There is something wrong in getting homology in get_pair_homol \n";
		  &caller_info;
	 }
	 \$id_counter; # $id_counter is the homology counter;
}
#________________________________________________________________________
# Title     : get_percent_homol_arr
# Usage     : $homology_out = ${&get_pair_homol(\@any_array_of_2_elem)};= @ar=(ABCDE..., CDEGA..)
# Function  : get pair wise seq. identity of any two strings, outputs a scalar (%)
# Example   :
# Warning   : reliable, but input seq. strings shouldn't contain spaces.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_percent_homol_arr{
	 my(@input)=@{$_[0]};
	 $input[0] =~ tr/a-z/A-Z/; # capitalizing.
	 $input[1] =~ tr/a-z/A-Z/; # capitalizing.
	 my(@string1)= split(//,$input[0]);
	 my(@string2)= split(//,$input[1]);
	 if (($#string1 == -1) || ($#string2 == -1)){
		  print "\n One of the string is empty O.K. ? \n";
	 }
	 my($larger)= &max($#string1, $#string2);
	 my($id_counter, $gap_counter, $non_equal_counter, $sum,$percent_homol)=0;
	 for ($i = 0; $i<=$larger; $i++){
		  if (($string1[$i] eq '.')|| ($string2[$i] eq '.')){
				$gap_counter+=1;
		  }elsif ($string1[$i] eq $string2[$i]){
				$id_counter +=1;
		  }else{
				$non_equal_counter += 1;
		  }
	 }
	 $sum = ($id_counter + $gap_counter + $non_equal_counter);
	 if ($sum != ($larger+1)){
		  print "\n There is something wrong in getting homology in get_pair_homol \n";
		  &caller_info;
	 }else{
		  $percent_homol=($id_counter/$sum)*100;
	 }
	 \$percent_homol; # $id_counter is the homology counter;
}

#________________________________________________________________________
# Title     : get_pair_homol_hash
# Usage     : $homology_out = & get_pair_homol (%any_hash); , eg) %hash = (name1, ABCDE..., name2, CDEGA..)
# Function  : get pair wise seq. identity as a scalar count
# Example   :
# Warning   : reliable, but input seq. strings shouldn't contain spaces.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_pair_homol_hash{
	 my(%input)=@_;
	 &hash_chk(\%input);
	 my(@keys_input)= keys (%input);
	 my(@values_input) = values (%input);
	 $values_input[0] =~ tr/a-z/A-Z/; # capitalizing.
	 $values_input[1] =~ tr/a-z/A-Z/; # capitalizing.
	 my(@string1)= split(//,$values_input[0]);
	 my(@string2)= split(//,$values_input[1]);
	 if (($#string1 == -1) || ($#string2 == -1)){
					 print "\n One of the string is empty O.K. ? \n";
	 }
	 my($larger)= &max($#string1, $#string2);
	 my($id_counter, $gap_counter, $non_equal_counter)=0;
	 for ($i = 0; $i<=$larger; $i++){
		  if (($string1[$i] eq '.')|| ($string2[$i] eq '.')){
				$gap_counter+=1;
		  }elsif ($string1[$i] eq $string2[$i]){
				$id_counter +=1;
		  }else{
				$non_equal_counter += 1;
		  }
	 }
	 my($sum) = ($id_counter + $gap_counter + $non_equal_counter);
	 if ($sum != ($larger+1)){
		 print "\n There is something wrong in getting homology in get_pair_homol \n";
		 &caller_info;
			}
	 return ($id_counter); # $id_counter is the homology counter;
}
#________________________________________________________________________
# Title     : get_percent_homo_hash
# Usage     : $homology_out = &get_pair_homol_hash(%any_hash); , eg) %hash = (name1, ABCDE..., name2, CDEGA..)
# Function  : get pair wise seq. identity(%) of any two strings put in as a hash
# Example   :
# Warning   : reliable, but input seq. strings shouldn't contain spaces.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_percent_homo_hash{
	 my(%input)=@_;
	 &hash_chk(\%input);
	 my(@keys_input)= keys (%input);
	 my(@values_input) = values (%input);
	 $values_input[0] =~ tr/a-z/A-Z/; # capitalizing.
	 $values_input[1] =~ tr/a-z/A-Z/; # capitalizing.
	 my(@string1)= split(//,$values_input[0]);
	 my(@string2)= split(//,$values_input[1]);
	 if (($#string1 == -1) || ($#string2 == -1)){
					 print "\n One of the string is empty O.K. ? \n";
	 }
	 my($larger)= &max($#string1, $#string2);
	 my($id_counter, $gap_counter, $non_equal_counter,$percent_homol,)=0;
	 for ($i = 0; $i<=$larger; $i++){
		  if (($string1[$i] eq '.')|| ($string2[$i] eq '.')){
				$gap_counter+=1;
		  }elsif ($string1[$i] eq $string2[$i]){
				$id_counter +=1;
		  }else{
				$non_equal_counter += 1;
		  }
	 }
	 my($sum) = ($id_counter + $gap_counter + $non_equal_counter);
	 if ($sum != ($larger+1)){
		  print "\n There is something wrong in getting homology in get_pair_homol \n";
		  &caller_info;
	 }else{
		  $percent_homol=($id_counter/$sum)*100;
	 }
	 return ($percent_homol);
}


#________________________________________________________________________
# Title     : file_size
# Usage     : $outputfilesize = &file_size($input_file_name);
# Function  : returns the size of any single testing file
# Example   :
# Warning   : Q is for quality of this sub. This can't be wrong.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub file_size { my($infile)=$_[0];
  if ( $size=(-s "$infile")){ return $size; }
}

#________________________________________________________________________
# Title     : seq_comp_percent2
# Usage     : @outarray = &seq_comp_percent2(@any_input_string_array);
# Function  : get string seq COMPOSITION identities(a to z). gets array
#             of strings and outs array of % numbers
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub seq_comp_percent2{   	# simple and basic seq. id. eg. ABC on ABCABC is 50 % identical.
	my(@input)=@_;
	my(@array_of_ids2, $id2, @char1, @char2);
	&array_chk(sort @input);
	my($longest_str_size)  = &get_long_str_size (@input), "\n";
	my($shortest_str_size) = &get_short_str_size(@input), "\n";
	print "longest_str_size",$longest_str_size;
	print "shortest_str_size",$shortest_str_size;
	if (($longest_str_size/$shortest_str_size) > 4){
		  print "\n  The shortest string is less than 1/4 of the longest\n";
		  print "  This is quite meaningless, but will go on\n";
	}
	for ($i = 0; $i <= $#input ; $i++){
		  if ($input[$i]=~/(\W)/){
			  &remove_non_char($input[$i]);
		  }
		  @char1 = split(/|\s+|\.+|\-+/, $input[$i]);   # splitting into char.
		  foreach $char (@char1){
			  if ($char eq ' '){
					 next;
			  }
			  $charcount1{$char} +=1; # making array of ['A' => 6, 'B'=>2...]
		  }
		  for($j = $i+1 ; $j <= $#input; $j++){
			  if ($input[$j]=~/(\W)/){
				  &remove_non_char($input[$j]);
			  }
			  @char2 = split(/|\s+|\.+|\-+/, $input[$j]); # splitting into char.
			  for $char (@char2){
				  $charcount2{$char} +=1; 	 # making ary of ['A' => 6, ..]
			  }
			  $id2 = &get_id_among_2_2(*charcount1, *charcount2); # gets % id.
			  push (@array_of_ids2, $id2);
			  %charcount2=();
		  }
		  %charcount1=();
	}
	@array_of_ids2;
}

######################################################################################
###########  file and dir handling stuff #################
###############################################################################

#________________________________________________________________________
# Title     : get_full_file_name
# Usage     : $any_path = ${&get_full_dir_path($any_directory)}; or &dir_path('.') for pwd.
# Function  : returns full directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  : get_long_path_name, get_complete_path_name
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_full_file_name{
  my($pwd)=`pwd`;
  chomp($pwd);
  \$pwd;
  my($full_file_name) = "$pwd\/$_[0]";
  return(\$full_file_name);
}


#________________________________________________________________________
# Title     : dir_path  (same as  pwd_path )
# Usage     : $any_path = &dir_path($any_directory); or &dir_path('.') for pwd.
# Function  : returns directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub dir_path{   my($pwd)=`pwd`;  chomp($pwd);	return( \$pwd ); }


#________________________________________________________________________
# Title     : full_pwd_path  (same as dir_path)
# Usage     : $any_path = ${&full_dir_path($any_directory)}; or &dir_path('.') for pwd.
# Function  : returns full directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  : get_long_path_name, get_complete_path_name
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub full_pwd_path{  my($pwd)=`pwd`; 	chomp($pwd);	\$pwd;  }

#________________________________________________________________________
# Title     : get_full_pwd_path  (same as dir_path)
# Usage     : $any_path = ${&get_full_dir_path($any_directory)}; or &dir_path('.') for pwd.
# Function  : returns full directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  : get_long_path_name, get_complete_path_name
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_full_pwd_path{  my($pwd)=`pwd`; 	chomp($pwd);	\$pwd;  }

#________________________________________________________________________
# Title     : get_whole_pwd_path  (same as dir_path)
# Usage     : $any_path = ${&get_whole_dir_path($any_directory)}; or &dir_path('.') for pwd.
# Function  : returns full directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  : get_long_path_name, get_complete_path_name
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_whole_pwd_path{  my($pwd)=`pwd`; 	chomp($pwd);	\$pwd;  }

#________________________________________________________________________
# Title     : pwd_path  (same as dir_path)
# Usage     : $any_path = ${&dir_path($any_directory)}; or &dir_path('.') for pwd.
# Function  : returns directory path (= pwd ), eg.  /nfs/ind4/ccpe1/people/A Biomatic
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pwd_path{  my($pwd)=`pwd`; 	chomp($pwd);	\$pwd;  }


#________________________________________________________________________
# Title     : get_pwd_dir  (same as pwd_dir_base)
# Usage     : $dir = &get_pwd_dir($anydir); # to return say,  'perl' .
# Function  : returns present working dir base
# Example   :
# Warning   : well tested.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_pwd_dir{  my(@pwd)=split(/\//,`pwd`);  my($dir)=$pwd[$#pwd];
	chomp($dir);   return( \$dir  );
}

#________________________________________________________________________
# Title     : dir_name  (same as pwd_dir_base)
# Usage     : $dir = &pwd_dir($anydir); # to return say,  'perl' .
# Function  : returns present working dir base
# Example   :
# Warning   : well tested.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub dir_name{
	my(@pwd)=split(/\//,`pwd`);  my($dir)=$pwd[$#pwd];  # take last one.
	chomp($dir);    return( \$dir  );
}

#________________________________________________________________________
# Title     : pwd_dir_name  (same as  pwd_dir)
# Usage     : $dir = &pwd_dir($any_absolute_path_dir);
# Function  : returns present working dir name
# Example   : returns 'jong' with the input of '/nfs/ind5/A Biomatic '
# Warning   : well tested.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub pwd_dir_name{
	my(@pwd)=split(/\//,`pwd`); my($dir)=$pwd[$#pwd];
	chomp($dir);
	return (\$dir);
}

#________________________________________________________________________
# Title     : get_pwd_dir_name  (same as  pwd_dir)
# Usage     : $dir = &get_pwd_dir($any_absolute_path_dir);
# Function  : returns present working dir name
# Example   :
# Warning   : well tested.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_pwd_dir_name{   my(@pwd)=split(/\//,`pwd`); my($dir)=$pwd[$#pwd];
	chomp($dir);	 \$dir;
}


#________________________________________________________________________
# Title     : get_full_path_dir_names  (same as  pwd_dir)
# Usage     : @full_path_dirs = @{&get_full_path_dir_names(@short_dir_name)};
# Function  : returns full path dir names with given short dir names.
# Example   : with 'jong' it gives '/nfs/ind5/jong', '/nfs/ind4/ccep1/people/A Biomatic '...
#             when 'jong' is in /nfs/ind4/jong/Perl, it returns /nfs/ind4/A Biomatic
# Warning   : when 'jong' is in /nfs/ind4/jong/Perl, it returns /nfs/ind4/A Biomatic
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_full_path_dir_names{ my(@in)=@_; my($pwd)=`pwd`; chomp($pwd);
	my(@ENV_dirs)=@{&get_all_dirs_from_ENV};
	for ($i=0; $i <@in; $i++){
		if( ref($in[$i]) eq 'SCALAR' ){
			for $each_dir(@ENV_dirs){
				if( $each_dir =~/${$in[$i]}$/){
					push(@out_full_path_dirs, $each_dir);  }
				elsif( $each_dir =~/${$in[$i]}/ ){
					push(@out_full_path_dirs, "$`$&");
				}  ## when 'jong' is in /nfs/ind4/jong/Perl, it returns /nfs/ind4/A Biomatic
			}
		}
	}elsif( !ref($in[$i]) ){
			for $each_dir(@ENV_dirs){
				if( $each_dir =~/$in[$i]$/){
					push(@out_full_path_dirs, $each_dir);  }
				elsif( $each_dir =~/$in[$i]/ ){
					push(@out_full_path_dirs, "$`$&");     }
			}
		}
	}
	return(\@out_full_path_dirs);
}

#______________________________________________________________
# Title     : get_file_extensions
# Usage     : @ext=@{&get_file_extensions(\@file)}  or
#             $ext=${&get_file_extensions(\$file)}
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_file_extension, get_extension, get_file_ext, get_ext_names
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_file_extensions{
	my($x, @out_file, $ext, $file, @file, @ext);
	@file=@{$_[0]} || @_;

	for($x=0; $x < @file; $x ++){
		if( ref($file[$x]) eq 'SCALAR' ){
			$file = ${$file[$x]};
			$pos = rindex($file, ".");
			$ext= substr($file, ($pos+1));
		}else{
			$file = $file[$x];
			$pos = rindex($file, ".");
			$ext= substr($file, ($pos+1));
		}
		push(@ext, $ext);
	}
	if(@ext == 1 ){ \$ext[0] }else{ \@ext }
}


#________________________________________________________________________
# Title     : get_base_names
# Usage     : $base =${&get_base_names(\$file_name)};
#             :   or @bases = &get_base_names(\@files);  # <-- uses `pwd` for abs directory
# Function  : produces the file base name(eg, "evalign"  out of "evalign.pl" ).
# Example   : $base => 'test'  with 'test.txt' or '/home/dir/of/mine/text.txt'
# Warning   :
# Keywords  : get_base_name{, base_name, file_base_name ,  get_file_base_name
#             get_basename, basename, get_root_name
# Options   :
# Returns   :
# Argument  : handles both ref and non-ref.
# Version   : 1.3
#--------------------------------------------------------------------
sub get_base_names{
	my($x, $pos, $pos1, @out_file, $file_only, $file, @file, $base, @base);
	@file=@{$_[0]} if (ref($_[0]) eq 'ARRAY');
	@file=@_ if !(ref($_[0]) eq 'ARRAY');
	for($x=0; $x < @file; $x ++){
		if( ref($file[$x]) ){
			$file = ${$file[$x]};
			$pos1=rindex($file, "/");
	        $file_only=substr($file, ($pos1+1));
			$pos = rindex($file_only, ".");
	        $base= substr($file_only, 0, $pos);
		}else{
			$file = $file[$x];
			$pos1=rindex($file, "/");
	        $file_only=substr($file, ($pos1+1));
			$pos = rindex($file_only, ".");
	        $base= substr($file_only, 0, $pos);
		}
		push(@base, $base);
	}
	if(@base == 1 ){ \$base[0] }else{ \@base }
}

#________________________________________________________________________
# Title     : read_file_names_only
# Usage     : @all_files=@{&read_file_names_only(<dir>, [extension])};
# Function  : read any file names and REMOVES the '.', '..' and dir entries.
#             And then put in array.  This checks if anything is a real file.
#             You can use 'txt' as well as '.txt' as extension
#             You can put multiple file extension (txt, doc, ....)
#               and multiple dir path (/usr/Perl, /usr/local/Perl....)
#               It will fetch all files wanted in all the direc specified
#
#             It can handle file glob eg)
#             @all_files=@{&read_file_names_only(\$abs_path_dir_name, 'G1_*.txt')};
#               for all txt files starting with 'G1_'
#
# Example   : @all_files=@{&read_file_names_only(\$abs_path_dir_name, ..)};
#             @all_files=@{&read_file_names_only(\$dir1, '.pl', '.txt')};
#             @all_files=@{&read_file_names_only(\$dir1, '.', \$dir2, \$dir3, 'e=pl')};
#             @all_files=@{&read_file_names_only(\$abs_path_dir_name, 'G1_*.txt')};
#
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
#             extension size should be less than 15 char.
#             It sorts the results!
# Keywords  : filename only, filename_only, read_files_only, read files
#             get_file_names_only, get_files_only, read_files_only
# Options   : "extension name". If you put , 'pl' as an option, it will show
#             files only with '.pl' extension.
#  '-p'      for path also included resulting in '/path/path/file.ext'
#              rather than 'file.ext' in output @array
#  '-s'      for sorting the results
#  e='xxx'  for extention xxx
#  '.pl'    for files extended by '.pl'
#  'pl'     for files extended by 'pl', same as above
#
# Version   : 2.3
#--------------------------------------------------------------------
sub read_file_names_only{
  my($in_dir, $i,$k, $dir, @final_files, @possible_dirs, $sort_opt, $ext, @extensions,
	  $path_include, @in, $glob_given, @files_globed, @in_dir, $pwd, $extension_given, @read_files);
  $pwd=`pwd`; chomp($pwd);
  $in_dir=$pwd;
  @in=@_;

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  #  Directory entry and opts detection
  #_________________________________________
  for($k=0; $k < @in; $k++){
	 if   ( $in[$k] eq '.'){ push(@in_dir,$pwd); splice(@in, $k, 1);  $k--; next }
	 if( !(ref($in[$k]))){
		if( -d $in[$k]){
			if($in[$k]=~/\/$/){ chop($in[$k]) }
		    push(@in_dir, $in[$k]); splice(@in, $k, 1);    $k--;
		}elsif(!(-f $in[$k]) and $in[$k] =~ /^\-p *$/ ){ ## somehow, ' *' is essential
			$path_include=1; splice(@in, $k, 1); $k--;
		}elsif(!(-f $in[$k]) and $in[$k] =~ /^\-s *$/   ){$sort_opt=1; splice(@in, $k, 1); $k--; }
	 }elsif(ref($in[$k])){
		if( -d ${$in[$k]}){
			if(${$in[$k]}=~/\/$/){ chop(${$in[$k]}) }
		    push(@in_dir,${$in[$k]});  splice(@in, $k, 1);  $k--;
		}elsif(!(-f $in[$k]) and ${$in[$k]} =~ /^\-p$/ ){$path_include=1; splice(@in, $k, 1); $k--;
		}elsif(!(-f $in[$k]) and ${$in[$k]} =~ /^\-s$/ ){$sort_opt=1; splice(@in, $k, 1); $k--;}
	 }
  }
  if(@in_dir < 1){ push(@in_dir, $pwd) }
  print "\n# read_file_names_only: input directories are : @in_dir \n";
  print   "# read_file_names_only: going to \'File name and extension detection\' stage\n";

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  #  File name and extension detection
  #_________________________________________
  for($k=0; $k < @in; $k++){
	for $dir (@in_dir){
		chdir($dir);
		if( !(ref($in[$k]))){
		   if($in[$k]=~/\*/){
			  $glob_given=1;
			  #~~~~~~~~~~~~~~~~~~~~~  Reads globbed files and attaches path if opt -p is set
			  if($path_include==1){  @final_files=map{ "$dir/$_" } <$in[$k]>;  }else{ @final_files=<$in[$k]>;  }
			  splice(@in, $k, 1); $k--;
		   }elsif(!(-f $in[$k]) and $in[$k] =~ /e=\.?(\S+)/){ $extension_given =1; push(@extensions, $1); splice(@in, $k, 1);$k--; }
		   }elsif(!(-f $in[$k]) and $in[$k] =~ /^\.?(\S+)/){   $extension_given =1; push(@extensions, $1); splice(@in, $k, 1); $k--;
		}elsif(ref($in[$k])){
		   if(${$in[$k]}=~/\*/){
			  $glob_given=1;
			  if($path_include==1){  @final_files=map{ "$dir/$_" } <${$in[$k]}>; }else{ @final_files=<${$in[$k]}> }
			  splice(@in, $k, 1); $k--;
		   }elsif(!(-f ${$in[$k]}) && (${$in[$k]} =~ /e=(\S+)/ ) ){
		   }elsif(!(-f ${$in[$k]}) && (${$in[$k]} =~ /^\.?(\S+)/ ) ){ $extension_given = 1; push(@extensions, $1);  splice(@in, $k, 1);  $k--;
			  $extension_given =1; push(@extensions, $1);  splice(@in, $k, 1);  $k--; }
		}
	}
	chdir($pwd);
  }

  if( $glob_given == 1 and $extension_given !=1 ){  # when glob input is given only(without any extension input!
	 return(\@final_files);
  }

  ##########  Main READING PART ##########
  for($k=0; $k< @in_dir; $k++){
	 chdir($in_dir[$k]);
	 opendir(DIR1,"$in_dir[$k]");
	 @read_files = readdir(DIR1);
	 for($i=0; $i < @read_files; $i ++){
		if( -f "$read_files[$i]" ){
		  if($extension_given ==1 ){
			 for $ext (@extensions){
				if( $read_files[$i] =~ /\.$ext$/){
					if($path_include==1){
						push(@final_files, "$in_dir[$k]\/$read_files[$i]" );
					}else{
						push(@final_files, "$read_files[$i]" );
					}
				}
			 }
		  }else{ ## reading everything !!!
			  push(@final_files, $read_files[$i]);
		  }
		}
	 }
	 chdir($pwd);
  }
  sort @final_files if $sort_opt == 1;
  return(\@final_files);
}


#________________________________________________________________________
# Title     : read_dir_names_only
# Usage     : @all_dirs_list = @{&read_dir_names_only(\$absolute_path_dir_name, ....)};
# Function  : read any dir names and and then put in array. If no argument
#             for the target directory, it opens PWD automatically
#             You can specify the length of dir names to choose.
# Example   : @files=@{&read_dir_names_only('n', "s=1", '.')};
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
# Keywords  : read_dir_only, get_dir_names, get_dir_names_only, get_subdir_names,
# Options   : n   for names only reading(not the full path) , default is full path
#             s=  for the size of dirs name. If you want all the dir names
#                   with a size of 1 char, s=1
# Returns   : one ref. of array.
# Argument  : takes one or more scaler references. ('.', \$path, $path, ... )
# Version   : 3.4
#--------------------------------------------------------------------
sub read_dir_names_only{
  my($in_dir, $i,$k, @possible_dirs, @chopped_pwd_path, @args,
	  @final_files, $full_dir, $pwd, $path,@read_files,
	  $size_of_dir_name);
  $pwd=`pwd`;
  chomp($pwd);
  $full_dir=1;
  @args=@_;

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # Checking option
  #__________________________________________________
  for($k=0; $k < @args; $k++){
	 if(    $args[$k] eq 'n' or ${$args[$k]} eq 'n'){
	     $full_dir=0;
	     print "\n# read_dir_names_only: You put \'n\' option \n";
		 splice(@args, $k, 1); $k--;
	 }elsif( $args[$k] =~/s=(\d+)/ or ${$args[$k]} =~/s=(\d+)/){
		 $size_of_dir_name=$1;
		 print "\n# read_dir_names_only : You have put the size of dir names : $size_of_dir_name\n";
		 splice(@args, $k, 1); $k--;
	 }
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # When no arg, this opens PWD automatically
  #_________________________________________________
  if(@args == 0){
	 if($full_dir==1){
		 $in_dir=$pwd;
	 }else{
		 $in_dir='.';
	 }
	 @final_files=@{&open_and_read_dir_names_only(\$in_dir)};
  }elsif(@args > 0){
	 for($k=0; $k < @args; $k++){
		if(!(ref($args[$k]))){    $in_dir=$args[$k];
		}elsif(ref($args[$k])){   $in_dir=${$args[$k]};    }

		if($in_dir ne '..' and $in_dir !~ /\// ){
			push(@final_files, @{&open_and_read_dir_names_only(\$in_dir)} );
		}elsif($in_dir eq '..' and $full_dir==1){
			print "\n# read_dir_names_only: \"..\" is given to open\n";
			@chopped_pwd_path=split(/\//, $pwd);
			pop(@chopped_pwd_path);
			$in_dir=join('/', @chopped_pwd_path);
			push(@final_files, @{&open_and_read_dir_names_only(\$in_dir)} );
		}elsif($in_dir eq '..'){
		    $in_dir = '..';
			push(@final_files, @{&open_and_read_dir_names_only(\$in_dir)} );
			for(@final_files){ $_=~s/\.//; }
		}
		##########  Main READING PART ##########

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Embedded subroutine
		#_________________________________________________
		sub open_and_read_dir_names_only{
			my ($i, @final_files);
			my $in_dir=${$_[0]};
			opendir(DIR1,"$in_dir");
			my @read_files = readdir(DIR1);
			if($size_of_dir_name){
				for($i=0; $i < @read_files; $i ++){
					unless(length($read_files[$i]) == $size_of_dir_name){
						next;
					}
					if($full_dir ==1){
						$read_files[$i]="$in_dir\/$read_files[$i]";
					}
					if( ($read_files[$i] !~ /\/\.\.?$/) && ( -d $read_files[$i]) ){
						 $read_files[$i]=~s/[\.\/]*//; ## removing ./ in front of dirs (in bash)
						 push(@final_files, "$read_files[$i]");
					}
				}
				return([@final_files]);
			}else{
				for($i=0; $i < @read_files; $i ++){
					if($full_dir ==1){
						$read_files[$i]="$in_dir\/$read_files[$i]";
					}
					if( ($read_files[$i] !~ /\/\.\.?$/) && ( -d $read_files[$i]) ){
						 $read_files[$i]=~s/[\.\/]*//; ## removing ./ in front of dirs (in bash)
						 push(@final_files, "$read_files[$i]");
					}
				}
				return([@final_files]);
			}
  		}
	 }
  }
  return([sort @final_files]);
}



#______________________________________________________________
# Title     : take_file_name
# Usage     : $base_portion =${&take_file_name(\'/dir/file.name')};
# Function  : takes file name portion from long dir/filename
# Example   : will return file.name  from /dir/dir/file.name
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_file_name_only, extract_file_name, take_file_name_only
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub take_file_name {
	my($pos, $base_portion, $file);
	if( ref($_[0]) ){
	   $file=${$_[0]};
	}else{
	   $file=$_[0];
	}

	$pos = rindex($file, "/");
	$base_portion=substr($file, ($pos+1));
	return (\$base_portion);
}

#______________________________________________________________
# Title     : get_file_dir_name
# Usage     :
# Function  : returns the dir portion of long filename.
#             If file does not have dir portion it returns './'
# Example   : /dir/file.name
#             =>  /dir/
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_file_dir_name, take_file_dir_name, take_file_dir_names
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_file_dir_names{
	my($x, @out_file, $file_name, $file, @file, @file_name);
	@file=@{$_[0]} if (ref($_[0]) eq 'ARRAY');
	@file=@_ if !(ref($_[0]) eq 'ARRAY');
	for($x=0; $x < @file; $x ++){
		if( ref($file[$x]) ){
			$file = ${$file[$x]};
			$pos = rindex($file, "/");
			$file_name= substr($file, ($pos+1));
		}else{
			$file = $file[$x];
			$pos = rindex($file, "/");
			$file_name= substr($file, ($pos+1));
		}
		push(@file_name, $file_name);
	}
	if(@file_name == 1 ){ \$file_name[0] }else{ \@file_name }
}


#________________________________________________________________________
# Title     : get_dir_names_only
# Usage     : @all_dirs_list = @{&get_dir_names_only(\$absolute_path_dir_name, ....)};
# Function  : read any dir names and and then put in array.
# Example   :
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
# Keywords  :
# Options   :
# Returns   : one ref. of array.
# Argument  : takes one or more scaler references. ('.', \$path, $path, ... )
# Version   : 3.0
#--------------------------------------------------------------------
sub get_dir_names_only{
  my($in_dir, $i,$k, @possible_dirs,
	  @final_files, $full_dir, $pwd, $path,@read_files);
  $pwd=`pwd`; chomp($pwd); $full_dir=1;
  for($k=0; $k < @_; $k++){
	 if   ( ($_[$k] eq '.') || !(defined($_[$k]))){  $in_dir=$pwd;  }
	 elsif(!(ref($_[$k]))){   $in_dir=$_[$k];   }
	 elsif(ref($_[$k])){      $in_dir =${$_[$k]};    }
	 if($in_dir =~ /^([\w\-\.]+)$/){  $in_dir="$pwd\/$in_dir"; $full_dir = 0; }
	 else{ $full_dir =1; }
	 ##########  Main READING PART ##########
	 opendir(DIR1,"$in_dir");
	 @read_files = readdir(DIR1);
	 for($i=0; $i < @read_files; $i ++){
		$read_files[$i]="$in_dir\/$read_files[$i]";
		if( ($read_files[$i] !~ /\/\.\.?$/) && ( -d $read_files[$i]) ){
		  push(@final_files, "$read_files[$i]");
		}
	 }
  }
  sort @final_files;  \@final_files;
}

#________________________________________________________________________
# Title     : get_subdir_names
# Usage     : @all_sub_dirs_list = @{&read_dir_names_only(\$absolute_path_dir_name, ....)};
# Function  : Gets all subdir and subsubsub...dir names in absolute path names.
# Example   : <output example with   "get_subdir_names.pl /A Biomatic /Perl/Bio_Seq" at prompt
#                  It is an array shown in lines.
#
#             /A Biomatic /Perl/Bio_Seq/Evalign
#             /A Biomatic /Perl/Bio_Seq/Seq_hash_handling
#             /A Biomatic /Perl/Bio_Seq/Apple
#             /A Biomatic /Perl/Bio_Seq/Genome
#             /A Biomatic /Perl/Bio_Seq/Open_xxx_files
#             /A Biomatic /Perl/Bio_Seq/Open_xxx_files/Evalign
#
#             tk
#             ch1
#             sub2
#             sub3
#             xxxx.cong
#             whatever_sub_dir
# Warning   : This does not report '.', '..' ,  Also, this does not show full path
#             Only file names are reported. Compare with &read_any_dir
# Keywords  :
# Options   :
# Returns   : one ref. of array. (NOT full path names), refer  'read_full_dir_names'
# Argument  : takes one or more scaler references. ('.', \$path, $path, ... )
# Version   : 3.0
#--------------------------------------------------------------------
sub get_subdir_names{
  my(@in, $in_dir, $in_dir2, $i, $k, $in_dir, $pwd, @sub_dirs,
		@final_dirs, @read_dirs);
  my($pwd)=`pwd`; chomp($pwd);
  for($k=0; $k< @_ ;$k++){
	  if( !ref($_[$k]) ){
		  push(@in, $_[$k]);    }
	  elsif( ref($_[$k]) eq "ARRAY" ){
		  push(@in, @{$_[$k]});    }
	  elsif( ref($_[$k]) eq "SCALAR" ){
		  push(@in, ${$_[$k]});    }
	  elsif(ref($_[$k]) eq "HASH") {
		  push(@in, %{$_[$k]}); }
  }

  for($k=0; $k < @in; $k++){
	  if ( !ref($in[$k]) ){
		  if( $in[$k] eq '.'){ $in_dir = $pwd; }
		  else{  $in_dir =$in[$k]; }
	  }
	  elsif(ref($in[$k])){  $in_dir =${$in[$k]};  }
	  chop($in_dir) if $in_dir =~ /\/$/;
	  @read_dirs = @{&read_dir_names_only(\$in[$k])};
	  @final_dirs= @read_dirs;
	  for($i=0; $i < @read_dirs; $i ++){
		  push(@final_dirs, @{&get_subdir_names(\$read_dirs[$i])});
	  }
  }
  sort @final_dirs;  \@final_dirs;
}

#________________________________________________________________________
# Title     : read_full_dir_names
# Usage     : @all_files_list = @{&read_full_dir_names(\$absolute_path_dir_name, ....)};
# Function  :
# Example   : input>> &read_full_dir_names('/nfs/ind4/ccpe1/people/A Biomatic /perl');
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/code
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/tk
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/ch1
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/sub2
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/sub3
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/xxxx.cong
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/whatever
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
# Keywords  :
# Options   :
# Returns   : one ref. of array.
# Argument  : takes one or more scaler references. ('.', \$abs_path, $path, ... )
# Version   : 1.0
#--------------------------------------------------------------------
sub read_full_dir_names{
  my($in_dir, $i,$k, @possible_dirs, $full_dir, $path,@read_files, @final_files);
  my($pwd)=`pwd`; chomp($pwd);
  for($k=0; $k < @_; $k++){
	 if   ( ($_[$k] eq '.') || !(defined($_[$k]))){  $in_dir=$pwd;  }
	 elsif( !ref($_[$k]) ){   $in_dir=$_[$k];     }
	 elsif(ref($_[$k]))   {   $in_dir =${$_[$k]}; }
	 if($in_dir =~ /\//){  $full_dir =1;}
	 elsif($in_dir =~ /^([\w\-\.]+)$/){ $in_dir="$pwd\/$in_dir";  }
	 ##########  Main READING PART ##########
	 chop($in_dir) if ($in_dir=~/[\/\\]+$/);
	 opendir(DIR1,"$in_dir");
	 @read_files = readdir(DIR1); shift( @read_files);  shift( @read_files);
	 if( $full_dir==1 ){
		for($i=0; $i < @read_files; $i ++){
		  $read_files[$i]="$in_dir\/$read_files[$i]";
		  if( -d "$read_files[$i]"){
			  push(@final_files, $read_files[$i]);
		  }
		}
	 }elsif($full_dir != 1){
		for($i=0; $i < @read_files; $i ++){
		  $read_files[$i]="$pwd\/$read_files[$i]";
		  if( -d "$read_files[$i]"){
			  push(@final_files, $read_files[$i]);
		  }
		}
	 }
  }
  sort @final_files;  \@final_files;
}

#________________________________________________________________________
# Title     : get_full_dir_names
# Usage     : @all_files_list = @{&read_full_dir_names(\$absolute_path_dir_name, ....)};
# Function  :
# Example   : input>> &get_full_dir_names('/nfs/ind4/ccpe1/people/A Biomatic /perl');
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/code
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/tk
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/ch1
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/sub2
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/sub3
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/xxxx.cong
#             /tmp_mnt/nfs/ind4/ccpe1/people/A Biomatic /perl/whatever
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
# Keywords  :
# Options   :
# Returns   : one ref. of array.
# Argument  : takes one or more scaler references. ('.', \$abs_path, $path, ... )
# Version   : 1.0
#--------------------------------------------------------------------
sub get_full_dir_names{
  my($in_dir, $i,$k, @possible_dirs, $full_dir, $path,@read_files, @final_files);
  my($pwd)=`pwd`; chomp($pwd);
  for($k=0; $k < @_; $k++){
	 if   ( ($_[$k] eq '.') || !(defined($_[$k]))){  $in_dir=$pwd;  }
	 elsif( !ref($_[$k]) ){   $in_dir=$_[$k];     }
	 elsif(ref($_[$k]))   {   $in_dir =${$_[$k]}; }
	 if($in_dir =~ /\//){  $full_dir =1;}
	 elsif($in_dir =~ /^([\w\-\.]+)$/){ $in_dir="$pwd\/$in_dir";  }
	 ##########  Main READING PART ##########
	 chop($in_dir) if ($in_dir=~/[\/\\]+$/);
	 opendir(DIR1,"$in_dir");
	 @read_files = readdir(DIR1); shift( @read_files);  shift( @read_files);
	 if( $full_dir==1 ){
		for($i=0; $i < @read_files; $i ++){
		  $read_files[$i]="$in_dir\/$read_files[$i]";
		  if( -d "$read_files[$i]"){
			  push(@final_files, $read_files[$i]);
		  }
		}
	 }elsif($full_dir != 1){
		for($i=0; $i < @read_files; $i ++){
		  $read_files[$i]="$pwd\/$read_files[$i]";
		  if( -d "$read_files[$i]"){
			  push(@final_files, $read_files[$i]);
		  }
		}
	 }
  }
  sort @final_files;  \@final_files;
}

#________________________________________________________________________
# Title     : read_any_dir_simple
# Usage     : @file_list = @{&read_any_dir(\$absolute_path_dir_name)};
# Function  : read any dir and REMOVES the '.' and '..' entries. And then put in array.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one ref. of array.
# Argument  : takes one scaler reference.
# Version   : 1.1
#--------------------------------------------------------------------
sub read_any_dir_simple {  				# returns the subdir and files names
	 my($in_dir);
	 if( ref($_[0]) ){
		 $in_dir=${$_[0]};
	 }else{
		 $in_dir= $_[0];
	 }
	 opendir(DIR1,"$in_dir");
	 my(@read_files) = readdir(DIR1);	# readdir() is perl func.
	 closedir(DIR1);
	 splice(@read_files, 0, 2);  # this gets rid of leading . and ..
	 \@read_files;   				# to return arrays
}
#____________________________________________________________
# Title     : read_any_dir
# Function  : read any dir and REMOVES the '.' and '..' entries.
#             And then put in array.
# Usage     : @file_list = @{&read_any_dir(\$absolute_path_dir_name)};
# Argument  : takes one scaler reference.
# Returns   : one ref. of array. for the files in the given directory.
# Keywords  :
# Options   :
# Version   : 1.2
# Warning   :
#----------------------------------------------------------
sub read_any_dir{
	 my($in_dir, @possible_dirs, @read_files);
	 if( ($_[0] eq '.') || !(defined($_[0]))){
		$in_dir='.';
	 }else{  $in_dir=${$_[0]} || $_[0];	 }

	 if($in_dir =~ /^([\w\-\.]+)$/){
		 $in_dir="\.\/$in_dir";
	 }elsif($in_dir =~/\/([\w\-\.]+)$/){
		 $in_dir="\.\/$1";  # adjust to pwd.
	 }
	 opendir(DIR1, "$in_dir" );
	 @read_files = readdir(DIR1);
	 closedir(DIR1); splice( @read_files, 0, 2 );
	 \@read_files;
}


#________________________________________________________________________
# Title     : read_any_dir2
# Usage     : @file_list = @{&read_any_dir(\$absolute_path_dir_name, ....)};
# Function  : read any dir and REMOVES the '.' and '..' entries. And then put in array.
# Example   :
# Warning   : This does not report '.', '..', '#xxxx', ',xxxx', etc. only legitimate
#             file and dir names are reported.
# Keywords  :
# Options   :
# Returns   : one ref. of array.
# Argument  : takes one or more scaler references.
# Version   : 1.0
#--------------------------------------------------------------------
sub read_any_dir2{   my($in_dir, $i,$k, @possible_dirs, @read_files);
  for($k=0; $k < @_; $k++){
	 if   ( ($_[$k] eq '.') || !(defined($_[$k]))){    $in_dir='.';  }
	 elsif(!(ref($_[$k]))){   $in_dir=$_[$k];     }
	 elsif(ref($_[$k])){      $in_dir =${$_[$k]};    }

	 if($in_dir =~ /^([\w\-\.]+)$/){  $in_dir="\.\/$in_dir";   # if it is a short dir name
		 unless(-d $in_dir){ $in_dir=${&dir_search_special(\$in_dir)} } }
	 elsif($in_dir =~/\/([\w\-\.]+)$/){ $in_dir="\.\/$1";  # adjust to pwd.
		 unless(-d $in_dir){ $in_dir=${&dir_search_special(\$in_dir)}  }}

	 sub dir_search_special{   my($in_dir)=${$_[0]};  my(@ENV_dir, @probable_dir_list, @dirs,@possible_dirs, $final_dir);
		if($in_dir =~ /\/([\w\.\-]+)$/){   $in_dir = $1; }
		@probable_dir_list=('ALIGN', 'PDB', 'PATH', 'HOME', 'JPO', 'PIRDIR', 'PDBSST','PDBENT',
								  'BLASTDB', 'PIRDIR', 'SWDIR');
		for (@probable_dir_list){ @dirs=split(':', $ENV{$_});
		  for (@dirs){ if (/$in_dir$/){ $final_dir = $_; } }
		}
		if(@possible_dirs <1){  # goes up one level and tries to find dir.
		  my($pwd)=`pwd`; chomp($pwd); my(@temp)=split('/', $pwd);
		  pop(@temp);  my($up_pwd)=join('/', @temp);
		  $in_dir="$up_pwd\/$in_dir";  $final_dir=$in_dir if (-d $in_dir);
		}
		\$final_dir
	 }
	 opendir(DIR1,"$in_dir");
	 @read_files = sort readdir(DIR1);
	 for($i=0; $i < @read_files; $i ++){
		if( ($read_files[$i]=~/^[\W]+$/)||($read_files[$i] =~ / +/)){
			splice( @read_files, $i, 1 ); $i--  }
		if( ($read_files[$i]=~/\.\.+/)||($read_files[$i] =~ /\#+/)||($read_files[$i]=~/\,+/)){
			splice( @read_files, $i, 1 ); $i-- }
	 }
	 push(@final_files, @read_files);
  }
  \@final_files;
}



#________________________________________________________________________
# Title     : max_str_value_hash   	#$max = &max_str_value_hash(%array1);
# Usage     : $largest_str_length_of_values = &max_value_hash(%any_hash);
# Function  : gets the largest 'string' length in values of any one hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub max_str_value_hash{
	my(%hash)=@_; my($len, $s1, $max);
	foreach $s1 (values %hash){
		$len =length($s1);
		$max = $len  if $len > $max;
	}
	return(\$max);
}

#________________________________________________________________________
# Title     : get_max_hash_by_value
# Usage     : $largest_str_length_of_values = &max_value_hash(%any_hash);
# Function  : gets the largest 'string' length in values of any one hash
# Example   :
# Warning   :
# Keywords  : get_max_hash_value, get_largest_hash_value, get_max_hash_key_value
#             get_max_hash_num_value, max_hash_value
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub get_max_hash_by_value{
	my(%hash)=@_;
	my($max, %max);
	for (keys %hash){
	   if($hash{$_} > $max){
	      %max=($_, $max);
	   }
	}
	return(\%max);
}


#________________________________________________________________________
# Title     : max_str_key_hash   	#$max = &max_str_value_hash(%array1);
# Usage     : $largest_str_length_of_values = &max_value_hash(%any_hash);
# Function  : gets the largest 'string' length in keys of any one hash
# Example   :
# Warning   :
# Keywords  : largest key length,
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub max_str_key_hash{
	my(%hash)=%{$_[0]};
	my($len,$max);
	#&hash_chk(\%hash);		# checks if %hash is valid.
	foreach $s1 (keys %hash){
		$len = length($s1);
		$max = $len  if $len > $max;
	}
	return($max);
}

#________________________________________________________________________
# Title     : min_string_value_hash  #$max = &min_str_value_hash(%array1);
# Usage     : $small_str_length_of_values = &min_str_value_hash(%any_hash);
# Function  : gets the smallest 'string' length in values of any one hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub min_str_value_hash{
	my(%hash)=%{$_[0]};
	&hash_chk(\%hash);		# checks if %hash is valid.
	my($len)=0;
	my(@keys) = keys %hash;
	my($min) = length($hash{$keys[0]});
	for ($s1=1; $s1 <= $#keys; $s1++){
		$len =length($hash{$keys[$s1]});
		$min = $len  if ($len < $min);
	}
	return($min);
}
#________________________________________________________________________
# Title     : min_str_key_hash  #$max = &min_str_value_hash(%array1);
# Usage     : $small_str_length_of_values = &min_str_value_hash(%any_hash);
# Function  : gets the smallest 'string' length in values of any one hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub min_str_key_hash{
	my(%hash)=%{$_[0]};
	#&hash_chk(\%hash);		# checks if %hash is valid.
	my($len)=0;
	my(@keys) = keys %hash;
	my($min) = length($keys[0]);
	for ($s1=1; $s1 < @keys; $s1++){
		$len =length($keys[$s1]);
		$min = $len  if ($len < $min);
	}
	return($min);
}
#________________________________________________________________________
# Title     : min_string_key_hash  #$max = &min_str_value_hash(%array1);
# Usage     : $small_str_length_of_values = &min_str_value_hash(%any_hash);
# Function  : gets the smallest 'string' length in values of any one hash
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub min_str_key_hash{
	my(%hash)=%{$_[0]};
	#&hash_chk(\%hash);		# checks if %hash is valid.
	my($len)=0;
	my(@keys) = keys %hash;
	my($min) = length($keys[0]);
	for ($s1=1; $s1 < @keys; $s1++){
		$len =length($keys[$s1]);
		$min = $len  if ($len < $min);
	}
	$min;
}
#________________________________________________________________________
# Title     : fasta_append
# Usage     : &fasta_append($name, $string, $output_file);
# Function  : append addtional one fasta format sequence.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fasta_append{
	 my($name, $string, $output_file)=@_;	# $name is the name of seq.
	 open (FASTA_APPEND,">>$output_file");	# $string is the seq string.
	 my($ll)=0;								# $output_file is the name of output.
	 print FASTA_APPEND ">$name\n";			# this appends seq. in one file.
	 $ls2=length($string );
	 for($i=0; $i<$ls2; $i++){
		  $char=substr($string ,$i,1);
		  if(($char ne ' ') && ($char ne '.')){
			  print FASTA_APPEND $char;
			  $ll++;
			  if($ll == 60){
				  $ll=0;
				  print FASTA_APPEND "\n";
			  }
		  }
	 }
	 print FASTA_APPEND "\n";
}

#________________________________________________________________________
# Title     : fasta_output
# Usage     : &fasta_output($dir.$mul_factor.fasta,  $whole_seq, *array_ali, *array1);
# Function  : prints fasta format output which is using $mul_factor
#             $seq is the whole sequence number(largest).
#             $dir.$mul_factor.fasta can be any output name,
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fasta_output{
		  local($dir.$mul_factor.fasta, $seq_number, *array_ali, *array1)=@_;
		  my($output_file) = "$dir.$mul_factor.fasta";
		  my(@keys1)= keys (%array_ali);
		  my(@keys2)= keys (%array1);
		  unlink <"$output_file">;	# removing previous file to prevent appending.
		  foreach $name(keys %array_ali){
					 $dir  = &pwd_dir;
					 $string=$array_ali{$name};
					 print "\n\nchecking fasta write $name, $string\n\n"; &beep;
					 &fasta_append($name, $string, $output_file);
		  }
		  foreach $key1(@keys1){
					 $counter=0;
					 foreach $key2(@keys2){
								if ($key1 eq $key2){
										  splice(@keys2, $counter,1);
								}
								$counter+=1;
					 }
		  }
		  if ( $seq_number => ($#keys2+1) ){
					 $seq_number = $#keys2;
		  }
		  for ($x=0; $x <= $seq_number ;$x++){
					 $name2=$keys2[$x];
					 $string2=$array1{$name2};
					 print "\n seq number is = $seq_number \n";
					 print "\n\nchecking fasta write $name2, $string2\n\n"; &beep;
					 $dir  = &pwd_dir;
					 &fasta_append($name2, $string2, $output_file);
		  }
}
#________________________________________________________________________
# Title     : fasta_out_seq_no
# Usage     : &fasta_out_seq_no($dir, $out_seq_no, $seq, *array2, *array1);
# Function  : prints fasta format output with specified seq no from whole seq. no.
#             $seq is the whole sequence number(largest). $out_seq_no is the target
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub fasta_out_seq_no{
  my($dir, $out_seq_no, $seq_number);
  ($dir, $out_seq_no, $seq_number, *array1, *array2)=@_;
  &hash_chk(\%array1);&hash_chk(\%array2);
  %array2    = &hash_substract(*array2, *array1);
  %array_1_2 = &hash_catenate (*array1, *array2);
  &hash_chk(\%array_1_2);
  my($output_file) = "$dir\_$out_seq_no.fas"; 	# " is essential.
  my(@keys1)= keys (%array1);
  my(@keys2)= keys (%array2);
  my(@keys_1_2) = keys (%array_1_2);
  unlink <"$output_file">;						# this is essential as I use &fasta_append
  if (($#keys1+1) > $out_seq_no){					# if out_seq_no is less than structural
	  for ($no = 0; $no < $out_seq_no; $no++){	# appending first seq. set.
		  my($name1) = $keys1[$no];				# array1 first
		  $dir  = &pwd_dir;
		  my($string1) = $array1{$name1};
		  &fasta_append($name1, $string1, $output_file);
	  }
  }else{
	  for ($no1 = 0; $no1  <= $#keys1; $no1++){	# appending first seq. set.
		  my($name1) = $keys1[$no1];				# array1 first
		  $dir  = &pwd_dir;
		  my($string1) = $array1{$name1};
		  &fasta_append($name1, $string1, $output_file);
	  }
	  for ($no2 = 0; $no2  < ($out_seq_no-$#keys1-1); $no2++){	# appending first seq. set.
		  my($name2) = $keys2[$no2];			# array1 first
		  $dir  = &pwd_dir;
		  my($string2) = $array2{$name2};
		  &fasta_append($name2, $string2, $output_file);
	  }
  }
}
#________________________________________________________________________
# Title     : ctime
# Usage     : $Date = &ctime(time);
# Function  : a simple Perl emulation for the well known ctime(3C) function.
# Example   : $Date = &ctime(time);
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub ctime{
	 @DoW = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
	 @MoY = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');

	 local($time) = @_;
	 local($[) = 0;
	 my($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);

	 # Determine what time zone is in effect.
	 # Use GMT if TZ is defined as null, local time if TZ undefined.
	 # There's no portable way to find the system default timezone.

	 $TZ = defined($ENV{'TZ'}) ? ( $ENV{'TZ'} ? $ENV{'TZ'} : 'GMT' ) : '';
	 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
		  ($TZ eq 'GMT') ? gmtime($time) : localtime($time);

	 # Hack to deal with 'PST8PDT' format of TZ
	 # Note that this can't deal with all the esoteric forms, but it
	 # does recognize the most common: [:]STDoff[DST[off][,rule]]

	 if($TZ=~/^([^:\d+\-,]{3,})([+-]?\d{1,2}(:\d{1,2}){0,2})([^\d+\-,]{3,})?/){
		  $TZ = $isdst ? $4 : $1;
	 }
	 $TZ .= ' ' unless $TZ eq '';

	 $year += 1900;
	 sprintf("%s %s %2d %2d:%02d:%02d %s%4d\n",
		$DoW[$wday], $MoY[$mon], $mday, $hour, $min, $sec, $TZ, $year);
}
#________________________________________________________________________
# Title     : get_time
# Usage     : $Date = &get_time(time);
# Function  : a simple Perl emulation for the well known ctime(3C) function.
# Example   : "Nov30 4:37 1995"
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_time{
	 @DoW = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
	 @MoY = ('Jan','Feb','Mar','Apr','May','Jun',
				'Jul','Aug','Sep','Oct','Nov','Dec');

	 my($time) = @_;
	 local($[) = 0;
	 local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst,$final_time);

	 # Determine what time zone is in effect.
	 # Use GMT if TZ is defined as null, local time if TZ undefined.
	 # There's no portable way to find the system default timezone.

	 $TZ = defined($ENV{'TZ'}) ? ( $ENV{'TZ'} ? $ENV{'TZ'} : 'GMT' ) : '';
	 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
		  ($TZ eq 'GMT') ? gmtime($time) : localtime($time);

	 # Hack to deal with 'PST8PDT' format of TZ
	 # Note that this can't deal with all the esoteric forms, but it
	 # does recognize the most common: [:]STDoff[DST[off][,rule]]

	 if($TZ=~/^([^:\d+\-,]{3,})([+-]?\d{1,2}(:\d{1,2}){0,2})([^\d+\-,]{3,})?/){
		  $TZ = $isdst ? $4 : $1;
	 }
	 $TZ .= ' ' unless $TZ eq '';

	 $year += 1900;
	 ############### This is the original format ##################
	 #$final_time=sprintf("%s %s% 2d %2d:%02d:%02d %s %4d\n",
	 #            $DoW[$wday], $MoY[$mon], $mday, $hour, $min, $sec, $TZ, $year);

	 $final_time=sprintf("%s%2d% 2d:%02d %4d\n",
					  $MoY[$mon], $mday, $hour, $min, $year);
	 return(\$final_time);
}


#________________________________________________________________________
# Title     : get_date
# Usage     : @outformat = &get_date;  eg result >  (010595 1-May-1995)
# Function  : returns date: $date6d (6 digit format) and
#             $datec (dd-mmm-yyyy format), Tim's version is 'getdate' in th_lib.pl
# Example   : 30-Nov-1995
# Keywords  : get_present_date,
# Options   :
# Returns   : ref of an array for (1-May-1995 and 010595)
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub get_date{
	 my($date_alphabet, $date6d)
	 my(@time) = localtime(time);
	 my($ty,$tm,$td) = ($time[5],$time[4],$time[3]);
	 my($year) = '19' . $ty;
	 my($mon) = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec)[$tm];
	 my($day) = $td;
	 if($day < 10){
		  $day = ' ' . $day;
	 }
	 $date_alphabet = $day.'-'.$mon.'-'.$year;
	 $tm++;
	 if($tm < 10){
		  $tm = '0'.$tm;
	 }
	 if($td < 10){
		  $td = '0'.$td;
	 }
	 $date6d = $td.$tm.$ty;
	 return ([$date_alphabet, $date6d]);
}

#__________________________________________________________________________
# Title     : if_file_older_than_x_days
# Usage     : if( ${&if_file_older_than_x_days($ARGV[0], $days)} > 0){
# Function  : checks the date of last modi of file given and compares with
#             present time. Substracts diff and returns the actual diff days.
# Example   :
# Keywords  : how_old_file, how_old, is_file_older_than_x_days, file_age,
#             file_age_in_days,
# Options   :
# Returns   : the actual days older, so NON-ZERO, otherwise, 0
# Version   : 1.3
#----------------------------------------------------------------------------
sub if_file_older_than_x_days{
	if(@_ < 2){ print "\n# FATAL: if_file_older_than_x_days needs 2 args\n"; exit; }
	my $file=${$_[0]} || $_[0];
	my $days=${$_[1]} || $_[1];
	my ($new_idx_file, $how_old_days);
	unless(-s $file){
	    print "\n# FATAL, nearly!: if_file_older_than_x_days: $file does NOT exist !\n";
		$new_idx_file=${&make_seq_index_file($file)};
		print "        if_file_older_than_x_days called make_seq_index_file to make $new_idx_file\n";
	}

	$how_old_days=(localtime(time- (stat($file))[9]))[3];
	if($how_old_days > $days){
		print "\n# if_file_older_than_x_days: $file is older than $days\n";
		return(\$days);
	}else{
		print "\n# if_file_older_than_x_days: $file is NOT older than $days\n";
		return(0);
	}
}



#________________________________________________________________________
# Title     : array_chk
# Usage     : &array_chk(\@any_array_to_chk);
# Function  : checks if any inputting array is empty or with one element.
# Example   : This is used only with subs which accepts array inputs.
# Warning   :
# Keywords  : array_check
# Options   :
# Returns   : nothing, prints out messages to STDOUT
# Argument  : gets on ref. of array.
# Version   : 1.0
#--------------------------------------------------------------------
sub array_chk{  my(@input)=@{$_[0]};
  if (@input == 0){
	 &caller_info;
	 print "\n >>> $0 \n";
	 print "\n >>> Error: Input array to this subroutine was empty\n", chr(7);
	 print "\n To continue prog. type \'y\', or \'n\' to quit (with enter).\n ---->";
	 $key = ${&yes_or_no};
	 if($key ne 'y'){  print "\n !! Aborting the operation !! \n"; exit(0); }}
  elsif ($#input == 0){
	 print "\n >>> Warn: Input array to this subroutine was only one, O.K ?\n";
	 print "\n >>> It means your input was not an array at all, probable	error\n";
	 &caller_info;
	 #________________________________________________________
	 # Title    : caller_info
	 # Function : tells you calleing programs and sub's information with file, subname, main, etc
	 # Usage    : &caller_info; (just embed anywhere you want to check.
	 #----------------------------------------------------------------------
	 sub caller_info{	    # caller(1), the num. tells you which info you choose
		my($i)=1;
		while(($pack, $file, $line, $subname, $args) = caller($i++)){
		  my($level) = $i-1;
		  print "\n", chr(169)," This sub info was made by \&caller_info subroutine";
		  print "\n ", chr(164)," Package  from => $pack ";
		  print "\n ", chr(164)," Exe. file was => $file ";
		  print "\n ", chr(164)," Line was  at? => $line (in $file)";
		  print "\n ", chr(164)," Name of  sub? => $subname";
		  print "\n ", chr(164)," How many arg? => $args";
		  print "\n ", chr(164)," Level of sub? => $level (1 is for where \&caller_info is )\n\n";
		}
	 }
	 #________________________________________________________
	 #________________________________________________________
	 # Title    : yes_or_no
	 # returns  : ref. of a Scalar for 'y' or 'n'
	 # Usage    : $yes_or_no = ${&yes_or_no};
	 #---------------------------------------------------------
	 sub yes_or_no{
		my($key)=getc;
		if (($key eq 'y') || ($key eq 'Y')){
		  return(\$key);
		}elsif(($key eq 'n') || ($key eq 'N')){
		  return(\$key);
		}else{
		  print chr(7), "\n Type only (y or n) ----> ";
		  &yes_or_no;
		}
	 }
	 #________________________________________________________
  }
}

#________________________________________________________________________
# Title     : hash_chk
# Usage     : &hash_chk(\%input_hash);
# Function  : checks hash input of any subroutine.
# Example   :
# Warning   :
# Keywords  : hash_check
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_chk{ my(@input)=%{$_[0]};
  if ( @input == 0){
	  &caller_info;
	  print "\n >>> $0 \n", chr(7);
	  print "\n >>> Error: Input hash to this subroutine was empty\n";
	  print "\n To continue prog. type \'y\', or \'n\' to quit (with enter).\n ----> ";
	  $key = ${&yes_or_no};
	  if($key ne 'y'){
		 exit(0);
	  }
  }elsif ( @input == 1){
	  &caller_info;
	  print "\n >>> $0 \n", chr(7);
	  print "\n >>> Warn: Input hash to this subroutine was only one, O.K ?\n";
	  print "\n To continue prog. type \'y\', or \'n\' to quit (with enter).\n ----> ";
	  $key = &{&yes_or_no};
	  if($key ne 'y'){
		  exit(0);
	  }
  }
}
#________________________________________________________________________
# Title     : hash_output_chk
# Usage     : &hash_output_chk(\%outing_hash);
# Function  : checks hash output of any subroutine.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_output_chk{
  for($i=0; $i<= $#_; $i++){
	 my(%tem)=%{$_[$i]};  my(@keys)=keys %tem;
	 for ($j =0; $j<@keys; $j++){
		unless(($keys[$j]=~/[\s\S]+/)&&($tem{$keys[$j]}=~/[\s\S]+/)){
		  print "\n Err. at Hash_output_chk at $0 \n", chr(7); exit;
		}
	 }
  }
}

#________________________________________________________________________
# Title     : n
# Usage     : &n;
# Function  : puts one single new line
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub n{  print "\n";  }


#________________________________________________________________________
# Title     : cls
# Usage     : &cls;
# Function  : clears screen
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub cls{   my($cls) = `clear`;  print $cls;  }


#________________________________________________________________________
# Title     : seq_comp_percent1
# Usage     : @outarray = &seq_comp_percent1(@any_input_string_array);
# Function  : get string seq identities(a to z). gets array of strings and outs array of % numbers
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one ref. of an array
# Argument  : one ref. of an array
# Version   :
#--------------------------------------------------------------------
sub seq_comp_percent1{ 		# this is affected by seq. length
		  my(@input)=@{$_[0]};
		  my(@array_of_ids1, $id1, @char1, @char2);
		  &array_chk(\@input);
		  @input = sort (@input);
		  $longest_str_size  = &get_long_str_size (@input), "\n";
		  $shortest_str_size = &get_short_str_size(@input), "\n";
		  if (($longest_str_size/$shortest_str_size) > 4){
					 print "\n The shortest string is less than 1/4 of the longest\n";
					 print " This is quite meaningless, but will go on\n";
		  }
		  for ($i = 0; $i <= $#input ; $i++){
					 if ($input[$i]=~/(\W)/){
								print "\n Warn: seq($input[$i] contains non char\n";
								&remove_non_char($input[$i]);
					 }
					 @char1 = split(/|\s+|\.+|\-+/, $input[$i]);  # splitting into char.
					 foreach $char (@char1){
								$charcount1{$char} +=1; # making array of ['A' => 6, 'B'=>2...]
					 }
					 for($j = $i+1 ; $j <= $#input; $j++){
								if ($input[$j]=~/(\W)/){
										  print "\n Warn: seq($input[$i] contains non char\n";
										  &remove_non_char($input[$j]);
								}
								@char2 = split(/|\s+|\.+|\-+/, $input[$j]);  # splitting into
								foreach $char (@char2){
										  $charcount2{$char} +=1; # making array of ['A' => 6, 'B'=>2...]
								}
								$id1 = &get_id_among_2_1(*charcount1, *charcount2); # gets % id.
								# print %charcount1,"\n";
								push (@array_of_ids1, $id1);
								%charcount2=();
					 }
					 %charcount1=();
		  }
		  \@array_of_ids1;
}


#________________________________________________________________________
# Title     : get_id_among_2_1
# Usage     : $id = &get_id_among_2(*charcount1, *charcount2) <- hashes
# Function  : gets the % id of any two sequences, returns in  100.0% format.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub get_id_among_2_1{ 	# 66.67 % if ABC with ABCABC  (due to diff. seq. length.)
	 local(*hash1, *hash2)= @_;
	 my($identity, $no_of_same, $sum_of_same, $av);
	 my(@num_char1)=values %hash1;
	 my(@num_char2)=values %hash2;
	 for $key1 (sort keys %hash1){
					 for $key2 (sort keys %hash2){
								if ($key1 eq $key2){
										  $no_of_same = &min($hash1{$key1},$hash2{$key2});
										  $sum_of_same += $no_of_same;
										  last;
								}
					 }
	 }
	 $identity = $sum_of_same*2/(&sum_array(@num_char1,@num_char2))*100;
	 # print "percent iden = ", $identity, "\n";
}
#________________________________________________________________________
# Title     : get_id_among_2_2
# Usage     : $id = &get_id_among_2(*charcount1, *charcount2) <- hashes
# Function  : gets the % id of any two sequences, returns in  100.0% format.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_id_among_2_2{       #  eg) 50% if ABC with AABBCC or ABCABC
	 local(*hash1, *hash2)= @_;
	 my($identity, $no_of_same, $sum_of_same, $av);
	 #print %hash1,"\n";
	 #print %hash2,"\n";
	 my(@num_char1)=values %hash1;
	 my(@num_char2)=values %hash2;
	 for $key1 (sort keys %hash1){
			for $key2 (sort keys %hash2){
				if ($key1 eq $key2){
					 $no_of_same = &min($hash1{$key1},$hash2{$key2});
					 $sum_of_same += $no_of_same;
					 last;
				}
			}
	 }
	 $seq1=&sum_array(@num_char1);
	 $seq2=&sum_array(@num_char2);
	 $longer_seq = &max($seq1, $seq2);
	 $identity = $sum_of_same/$longer_seq*100;
	 #print "percent iden = ", $identity, "\n";
}

#________________________________________________________________________
# Title     : array_average
# Usage     : $output = &array_average(\@any_array);
# Function  : (the same as average_array)
# Example   :
# Warning   : If divided by 0, it will automatically replace it with 1
# Keywords  : get_array_average, av_array, average_array, get_average_array
#             average_of_array, average_array
# Options   :
# Returns   : single scaler digit.
# Argument  : takes one array reference.
# Version   : 1.2
#--------------------------------------------------------------------
sub array_average{
  my(@input)= @{$_[0]};
  my $int_option = ${$_[1]} || $_[1];
  my($item,$average,$num,$sum);
  my $num_of_elem = @input;

  for $item(@input){
	 if( $item =~ /^$/ ){  ## If it matches nothing. '$item == 0' does not work !!!
		$num_of_elem --; ## This is to make sure that the denominator does not
	 }                  ## count blank element. (to get correct element number)
	 else{ $sum += $item;  }
  }
  if($num_of_elem ==0){ $num_of_elem =1; }  ## To prevent 'Division by 0' error
  if($int_option =~ /[\-]*i[nt]*/){
	  $average= int( $sum/$num_of_elem );
  }else{   $average = $sum/$num_of_elem }

  return(\$average);
}

#________________________________________________________________________
# Title     : average_array
# Usage     : $output = &average_array(\@any_array);
# Function  : (the same as array_average)
# Example   :
# Warning   : If divided by 0, it will automatically replace it with 1
# Keywords  :
# Options   :
# Returns   : single scaler digit.
# Argument  : takes one array reference.
# Version   : 1.0
#--------------------------------------------------------------------
sub average_array{
  my(@input)= @{$_[0]};
  my $int_option = ${$_[1]} if ref($_[1]);
  my $int_option =  $_[1]  if !ref($_[1]);
  my($item,$average,$num,$sum);
  my $num_of_elem = @input;

  for $item(@input){
	 if( $item =~ /^$/ ){  ## If it matches nothing. '$item == 0' does not work !!!
		$num_of_elem --; ## This is to make sure that the denominator does not
	 }                  ## count blank element. (to get correct element number)
	 else{ $sum += $item;  }
  }
  if($num_of_elem ==0){ $num_of_elem =1; }  ## To prevent 'Division by 0' error
  if($int_option =~ /[\-]*i[nt]*/){
	  $average= int( $sum/$num_of_elem );
  }else{   $average = $sum/$num_of_elem }

  \$average;
}

#________________________________________________________________________
# Title     : average_of_array (the same as array_average)
# Usage     : $output = &average_of_array(\@any_array);
# Function  :
# Example   :
# Warning   : If divided by 0, it will automatically replace it with 1
#             '$item == 0' does not work !!! in the following
# Keywords  :
# Options   : -int to make the resultant numbers shown in integer
# Returns   : single scaler digit.
# Argument  : takes one array reference.
# Version   : 2.0
#--------------------------------------------------------------------
sub average_of_array{
  my(@input)= @{$_[0]};
  my $int_option = ${$_[1]} if ref($_[1]);
  my $int_option =  $_[1]  if !ref($_[1]);
  my($item,$average,$num,$sum);
  my $num_of_elem = @input;

  for $item(@input){
	 if( $item =~ /^$/ ){  ## If it matches nothing. '$item == 0' does not work !!!
		$num_of_elem --; ## This is to make sure that the denominator does not
	 }                  ## count blank element. (to get correct element number)
	 else{ $sum += $item;  }
  }
  if($num_of_elem ==0){ $num_of_elem =1; }  ## To prevent 'Division by 0' error
  if($int_option =~ /[\-]*i[nt]*/){
	  $average= int( $sum/$num_of_elem );
  }else{   $average = $sum/$num_of_elem }

  \$average;
}
#________________________________________________________________________
# Title     : hash_average
# Usage     : %out=%{&hash_average(\%in)};  or
#             ($out1, $out2)=&hash_average(\%in,\%in2);
# Function  :
# Example   : %in=(1, "13242442", 2, "92479270", 3, "2472937439");
# Warning   :
# Keywords  :
# Options   :
# Returns   : %out =(1, 2.13242, 2, 5.2702, 3, 1.72937439); <-- somethins like
#             numbers. So, undefined array element is not counted
#             This is more correct.
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_average{
  my(@out_av_hash, $v, $sum, $num_of_elem);
  for($v=0; $v< @_; $v++){
	  my(%input)=%{$_[$v]};
	  for(keys(%input)){
		 if($input{$_} =~ /\,/){ $split_char=',' }
		 else{ $split_char='' }
		 my(@num_arr)=split(/$split_char/, $input{$_});
		 $num_of_elem = @num_arr;
		 for $elem(@num_arr){
			 if( $elem == '' ){
				$num_of_elem -- ; ## This is to make sure that the denominator does not
			 }                   ## count blank element. (to get correct element number)
			 else{ $sum += $elem;  }
		 }
		 my($av)=$sum/$num_of_elem;
		 $out_hash{$_}=$av;
		 $sum=0;
	  }
	  push(@out_av_hash, \%out_hash);
  }
  if( @out_av_hash==1 ){  return($out_av_hash[0]); }
  elsif( @out_av_hash > 1){  return(@out_av_hash);  }
}

#________________________________________________________________________
# Title     : get_hash_value_average
# Usage     : %out=%{&get_hash_value_average(\%in)};  or
#             ($out1, $out2)=&hash_average(\%in,\%in2);
# Function  :
# Example   :
# Warning   :
# Keywords  : get_values_average, get_average_hash_value, get_average_value
# Options   :
# Returns   : %out =(1, 2.13242, 2, 5.2702, 3, 1.72937439); <-- somethins like
#             numbers. So, undefined array element is not counted
#             This is more correct.
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_hash_value_average{
  my(@out_av_hash, $i, $v, $sum, $num_of_elem);
  for($v=0; $v< @_; $v++){
	  my (@keys, $sum, $av);
	  my(%input)=%{$_[$v]};
	  @keys=keys %input;
	  for($i=0; $i< @keys; $i++){
		 $sum+=$input{$keys[$i]};
	  }
	  $av=$sum/@keys;
	  push(@out_av_hash, $av );
  }
  if( @out_av_hash==1 ){  return(\$out_av_hash[0]); }
  elsif( @out_av_hash > 1){  return(\@out_av_hash);  }
}



#________________________________________________________________________
# Title     : hash_stat_for_all
# Usage     : %out=%{&hash_average(\%in, \%in2,..)};
# Function  : gets the min, max, av, sum for the whole values of ALL the
#             hashes put in. (grand statistics)
# Example   : %in =(1, "13242442", 2, "92479270", 3, "2472937439");
#             %in2=(1, "28472", 2, "23423240", 3, "123412342423439");
#
#             %in =(name1, "1,3,2,4,2,4,4,2", name2, "9,2,4,7,9,2,7,0");
#
# Warning   :
# Keywords  :
# Options   :
# Returns   : normal array of ($min, $max, $sum, $av)
#             Example  out:>                 |  min max sum  av
#                            -----------------------------------
#                            of the whole    |   0   9  110   6
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_stat_for_all{
  package hash_stat_for_all;
  my($elem,@out_av_hash, @out_array,$v,@num_arr,$sum,$min, $av,$num_all, $max,$split_char);
  for($v=0; $v<@_; $v++){
	  my(%input)=%{$_[$v]};
	  for $name(keys(%input)){
		 if($input{$name} =~ /\,/){ $split_char=',';
		 }else{
		    $split_char='';
		 }
		 @num_arr=split(/$split_char/, $input{$name});
		 for $elem(@num_arr){
			if($elem =~/[\-]*\d+/){
			    $min=$elem unless(defined($min));
				$min =$elem if $elem < $min; $max =$elem if $elem > $max;
				$sum+=$elem; $num_all++;
			}
		 }
	  }
  }
  if($num_all == 0){ $av=0; $sum=0; $min=0; $max=0; }
  else { $av=$sum/$num_all; }
  push(@out_array, ($min, $max, $sum, $av));
  package main;
  return(@out_array);
}

#________________________________________________________________________
# Title     : min
# Usage     : $min = &min (37, 24, 3,1,5, \@array, @array2, \$arr_ref);
# Function  : accepts ref of array, scalar and normal digits to
#             find the min. Only gets numbers. If you put something
#             like 'H333333', it gets digits '333333' only and returns it.
#             this uses RECURSION.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub min{
  my(@in) = @_; my($i, @min, $min);

  for($i=0; $i < @in; $i++){
	 if( (ref($in[$i]) eq 'SCALAR') && (${$in[$i]} =~/([\-]?\d+)/) ){
		push(@min,  $1);    }
	 elsif(ref($in[$i]) eq 'ARRAY'){
		$min=&min(@{$in[$i]});
		push(@min, $min);    }
	 elsif( ( !ref($in[$i]) )&& ($in[$i] =~/([\-]?\d+)/)  ){
		push(@min, $1); }
  }

  for (@min) { $min = $_ if $min > $_; }
  $min;
}

#________________________________________________________________________
# Title     : max
# Usage     : $max = &max (37, 24, 3,1,5, \@array, @array2, \$arr_ref);
# Function  : accepts ref of array, scalar and normal digits to
#             find the min. Only gets numbers. If you put something
#             like 'H333333', it gets digits '333333' only and returns it.
#             this uses RECURSION.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub max{
  my(@in) = @_; my($i, $max, @max, $min);

  for($i=0; $i < @in; $i++){
	 if( (ref($in[$i]) eq 'SCALAR') && (${$in[$i]} =~/([\-]?\d+)/) ){
		push(@max,  $1);    }
	 elsif(ref($in[$i]) eq 'ARRAY'){
		$max = &max(@{$in[$i]});
		push(@max, $max);    }
	 elsif( ( !ref($in[$i]) )&& ($in[$i] =~/([\-]?\d+)/)  ){
		push(@max, $1); }
  }

  for (@max) { $max = $_ if $max < $_; }
  $max;
}
#________________________________________________________________________
# Title     : get_longest_str_size
# Usage     : $long_str_size = ${&get_long_str_size (\@any_array_of_string)};
#             $long_str_size = ${&get_long_str_size (\@any_array_of_string)};
# Function  : get_longest_str_size in an array. eg. get ABCDE among (A, CAB, CDE, ABCDE)
#             When hash is given it processes the values of it.
# Example   :
# Warning   :
# Keywords  : get_the_largest_string_size{, get_largest_string_size,
#             get_largest_str_size{,largest_string_size{, get_largest_string_size_hash
#             get_long_str_size, get_longest_string_size, lonest_string_size
# Options   :
# Returns   :
# Argument  : gets one reference of an array of strings.
# Version   : 1.2
#--------------------------------------------------------------------
sub get_longest_str_size{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my @input;
	if(@hash ==1){
	   @input=values %{$hash{0}};
	}elsif(@array ==1){
	   @input=@{$array[0]};
	}
	my ($factor,$max,$len);
	if( (@input<1)&&(@_ > 1) ){
	  @input=@_;
	}
	if(@num_opt < 1){ @num_opt=0..$#input; $factor=0 }
	else{ $factor =1 }
	print "\nPositions compared are: @num_opt\n" if $debug==1;
	for($j=0; $j < @num_opt; $j++){
  	   $len = length($input[$num_opt[$j]-$factor]);
	   $max = $len if ($len > $max);
	}
	return(\$max);
}



#________________________________________________________________________
# Title     : get_shortest_str_size
# Usage     : $short_str_size = &get_short_str_size (\@any_array_of_string);
# Function  : get_shortest_str_size in an array. eg. get A among (A, CAB, CDE, ABCDE)
# Example   :
# Warning   : once debugged. 1st May/95
# Keywords  : get_short_str_size, get_short_string_size, shortest_string_size,
# Options   :
# Returns   :
# Argument  : gets one reference of an array of strings.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_shortest_str_size{
  my(@input)=@{$_[0]}; my($min,$len);
  $min=length($input[0]);
  for ($i = 1; $i <=$#input; $i++){ $len = length($input[$i]); $min = $len if ($len < $min); }
  \$min;
}

#________________________________________________________________________
# Title     : get_id_among_2
# Usage     : $id = &get_id_among_2(\%charcount1, \%charcount2) <- hashes
# Function  : gets the % id of any two sequences
# Example   : %hash1=('A', 30, 'B', 99, 'C', 15 .....)
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : gets two references of hashes of chars and their occurances.
# Version   : 1.0
#--------------------------------------------------------------------
sub get_id_among_2{  my(%hash1)=%{$_[0]}; my(%hash2)=%{$_[1]};
  my($identity, $no_of_same, $sum_of_same, $av);
  for $key1 (sort keys %hash1){
  $k1 +=1;
	 for $key2 (sort keys %hash2){  $k2 +=1;
		if ($key1 eq $key2){
		  $no_of_same = &min($hash1{$key1},$hash2{$key2});
		  $sum_of_same +=$no_of_same;  last;  } } }
  if ($k1 == $k2){  $av = $k1;  }
  else{  $av = &array_average($k1, $k2); }
  $identity = ($sum_of_same/$av)*100;
  \$identity;
}

#________________________________________________________________________
# Title     : extract_num_to_array
# Usage     : @my_outarray = &extract_num_to_array($any_input_string);
# Function  : extract only numbers(including negatives) from a string and put into an array
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub extract_num_to_array{
  my($input) =${$_[0]};
  my(@input) =split(/\s+/, $input);
  my(@out_array);
  print "@input", "\n";
  for ($i=0; $i <=$#input; $i++){
		  if ($input[$i] =~ /^(-\d+\.\d+)$/) # for negatives( -3.5555)
		  {
					 push(@out_array	, $1);
		  }
		  elsif ($input[$i] =~ /^(\d+\.\d+)$/)  # for positives ( 33.5534 )
		  {
					 push(@out_array	, $1);
		  }
		  elsif ($input[$i] =~ /^(-\d+)$/) # sor single nega digit ( -1 )
		  {
					 push(@out_array	, $1);
		  }
		  elsif ($input[$i] =~ /^(\d+)$/) # sor single nega digit ( 25 )
		  {
					 push(@out_array	, $1);
		  }
  }
  \@out_array;
}
#________________________________________________________________________
# Title     : weighted_average
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub weighted_average{
	 my(@input)=@{$_[0]};
	 my(@array_av_diff,@array_diff, $sum, $num,);
		  my($diff,$weight,$average,$sum_of_av_diffs,$weight_x_input_item);
		  my($final_weighted_av) = 0;
					 for($i=0; $i<=$#input ; $i++){
								for ($j=0; $j<=$#input ; $j++)
								{
										  next if ($i==$j); # remove the self - self. !!
										  $diff=abs($input[$i]-$input[$j]);
										  push (@array_diff, $diff);
								}
								for $item(@array_diff){
								  $sum+=$item;
								}
								$num=$#array_diff+1;
								$average = $sum/$num;

								push(@array_av_diff, $average);
								$average=0; $sum=0;
								@array_diff=();
					 }
					 $sum_of_av_diffs = ${&sum_array(\@array_av_diff)};
					 if ($sum_of_av_diffs == 0){
								$sum_of_av_diffs =1;
								$final_weighted_av = $input[0];
								goto END_point;
					 }
					 for ($k=0; $k <= $#input ; $k++){
								$weight= (($array_av_diff[$k])/$sum_of_av_diffs);
								$weight_x_input_item  = ($input[$k])*$weight;
								$final_weighted_av  = $final_weighted_av + $weight_x_input_item;
					 }
			END_point:
	 \$final_weighted_av;  # for return
}
#________________________________________________________________________
# Title     : weighted_av
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub weighted_av{
	my(@input)=@{$_[0]};
	my(@array_av_diff,@array_diff, $sum, $num,);
		  my($diff,$weight,$average,$sum_of_av_diffs,$weight_x_input_item);
		  my($final_weighted_av) = 0;
					 for($i=0; $i<=$#input ; $i++){
								for ($j=0; $j<=$#input ; $j++)
								{
										  next if ($i==$j); # remove the self - self. !!
										  $diff=abs($input[$i]-$input[$j]);
										  push (@array_diff, $diff);
								}
								for $item(@array_diff){
								  $sum+=$item;
								}
								$num=$#array_diff+1;
								$average = $sum/$num;

								push(@array_av_diff, $average);
								$average=0; $sum=0;
								@array_diff=();
					 }
					 $sum_of_av_diffs = ${&sum_array(\@array_av_diff)};
					 if ($sum_of_av_diffs == 0){
								$sum_of_av_diffs =1;
								$final_weighted_av = $input[0];
								goto END_point;
					 }
					 for ($k=0; $k <= $#input ; $k++){
								$weight= (($array_av_diff[$k])/$sum_of_av_diffs);
								$weight_x_input_item  = ($input[$k])*$weight;
								$final_weighted_av  = $final_weighted_av + $weight_x_input_item;
					 }
			END_point:
	 \$final_weighted_av;  # for return
}


#________________________________________________________________________
# Title     : sum_array (the same as array_sum)
# Usage     : $out =  ${&sum_array(\@anyarray)};
# Function  : sum of all the  elements of an array .
# Example   :
# Warning   :
# Keywords  : get_array_sum get_sum_array, get sum of array
# Options   :
# Returns   : a ref. of a scaler.
# Argument  : ref. of an array of numbers.
# Version   :
#--------------------------------------------------------------------
sub sum_array{
  my($sum, $item);
  foreach $item(@{$_[0]}){ $sum += $item; }
  \$sum;
}
#________________________________________________________________________
# Title     : sum_of_array (the same as array_sum)
# Usage     : $out =  ${&sum_of_array(\@anyarray)};
# Function  : sum of all the  elements of an array .
# Example   :
# Warning   :
# Keywords  :
# Options   : -int for integerised output.
# Returns   : a ref. of a scaler.
# Argument  : ref. of an array of numbers.
# Version   : 1.0
#--------------------------------------------------------------------
sub sum_of_array{
  my $int_option = ${$_[1]} if ref($_[1]);
  my $int_option = $_[1] if !ref($_[1]);
  my($sum, $item);
  foreach $item(@{$_[0]}){
	 $sum += $item; }
  if($int_option =~ /[\-]*i[nt]*/){
	 $sum = int($sum);
  }
  \$sum;
}

#________________________________________________________________________
# Title     : array_sum (the same as sum_array)
# Usage     : $out =  ${&sum_array(\@anyarray)};
# Function  : sum of all the  elements of an array .
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : a ref. of a scaler.
# Argument  : ref. of an array of numbers.
# Version   :
#--------------------------------------------------------------------
sub array_sum{ my($sum, $item);
  foreach $item(@{$_[0]}){ $sum += $item; }
  \$sum;
}


#________________________________________________________________________
# Title     : sum_hash_values_of_string
# Usage     : $out = &sum_hash_values_of_string(\%anyhash);
# Function  : sum of all the  numbers in valuse of a hash
# Example   : %hashinput= ( name1, '12..3e',
#                            name2, '...234');
#             $result = 1+2+3+2+3+4 = 15 (from above example)
# Warning   : It only gets digits in the input strings and sums them up.
# Keywords  : sum_hash_string_values, get_sum_hash_string_values, get_hash_value_sum
# Options   :
# Version   : 1.1
#--------------------------------------------------------------------
sub sum_hash_values_of_string{
	my($elements) = join(',', values (%{$_[0]}));
	my(@elements) = split(',',$elements);
	my($sum);
	foreach $item(@elements){ if ($item =~ /[\-\d+]/){ $sum += $item; } }
	return(\$sum);
}

#________________________________________________________________________
# Title     : sum_hash_values
# Usage     : $out = &sum_hash_values(%anyhash);
# Function  : sum of all the  numbers in valuse of a hash
# Example   : %hashinput= ( name1, '12..3e',
#                            name2, '...234');
#             $result = 1+2+3+2+3+4 = 15 (from above example)
# Warning   : It only gets digits in the input strings and sums them up.
# Keywords  : sum_hash_number_values, get_sum_hash_values, get_hash_value_sum
# Options   :
# Version   : 1.0
#--------------------------------------------------------------------
sub sum_hash_values{
	my($sum);
	my %in_hash=%{$_[0]};
	my @values=values %in_hash;
	foreach (@values){
	   if (/\-?[e\-\d+]/){ $sum += $_;
	   }else{ print "\n# $_ is NON-numeric, exiting\n";
	      exit;
	   }
	}
	return(\$sum);
}


#________________________________________________________________________
# Title     : key_ready
# Usage     :
# Function  : detects keyboard input without reading it
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
#             You should check out the Frequently Asked Questions list in
#             comp.unix.* for things like this: the answer is
#             essentially the same.
#             It's very system dependent.  Here's one solution that
#             works on BSD systems:
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub key_ready{  my($rin, $nfd); vec($rin, fileno(STDIN), 1) = 1;
  return $nfd = select($rin,undef,undef,0);
}

#________________________________________________________________________
# Title     : round
# Usage     :
# Function  : gives rounded numbers
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub round{
	my(@numbers);
	my($num);
	@numbers  = @{$_[0]} if ref($_[0]) eq 'ARRAY';
	push(@numbers, ${$_[0]}) if ref($_[0]) eq 'SCALAR';

	for $num (@numbers){
		$num  = int($num + .5);
	}
	if(@numbers > 1){ \@numbers }elsif( @numbers == 1 ){ \$numbers[0] }
}
#________________________________________________________________________
# Title     : round_number
# Usage     :
# Function  : gives rounded integer numbers. 9.5 will be 10, 9.4 will be 9
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub round_number{
	my(@numbers);
	my($num);
	@numbers  = @{$_[0]} if ref($_[0]) eq 'ARRAY';
	push(@numbers, ${$_[0]}) if ref($_[0]) eq 'SCALAR';

	for $num (@numbers){
		$num  = int($num + .5);
	}
	if(@numbers > 1){ \@numbers }elsif( @numbers == 1 ){ \$numbers[0] }
}
#________________________________________________________________________
# Title     : round_numbers  (same as  round_number )
# Usage     : @output=@{&round_numbers(\@input_numbs)};
#             or  $output=${&round_numbers(\$input_numbs)};
# Function  : gives rounded integer numbers. 9.5 will be 10, 9.4 will be 9
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub round_numbers{
	my(@numbers);
	my($num);
	@numbers  = @{$_[0]} if ref($_[0]) eq 'ARRAY';
	push(@numbers, ${$_[0]}) if ref($_[0]) eq 'SCALAR';

	for $num (@numbers){
		$num  = int($num + .5);
	}
	if(@numbers > 1){ \@numbers }elsif( @numbers == 1 ){ \$numbers[0] }
}



#________________________________________________________________________
# Title     : trim_numbers
# Usage     : @output=@{&trim_numbers(\@input_numbs, \$size_of_posi)};
# Function  : gives trimmed numbers (not rounded)
# Example   : given num array( 1.33333, 3.555242424, 0.2342324, 4.9234723747)
#             >>>            (1.33,  3.56,  0.23,  4.92 )
#
# Warning   : If you put '1' with trimming value of 2 it will be '1.00'
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub trim_numbers{
	my(@numbers) = @{$_[0]};
	my($num);
	my($position)= ${$_[$#_]} if ref($_[$#_]);  ### last input arg is the trimming value
	my($position)=  $_[$#_]  if ! ref($_[$#_]);
	if (@_ < 2){ $position =4; }
	for $num (@numbers){
		$num  = sprintf("%-.$position f", $num);
	}
	\@numbers;
}

#________________________________________________________________________
# Title     : min_elem_array
# Usage     : ($out1, $out2)=@{&min_elem_array(\@array1, \@array2)};
#             ($out1)       =${&min_elem_array(\@array1)          };
# Function  : gets the smallest element of any array of numbers.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one or more ref. for scalar numbers.
# Argument  : numerical arrays
# Version   : 1.0
#--------------------------------------------------------------------
sub min_elem_array{
  my(@out_min_elem, @input, $min_elem);
  for($i=0; $i< @_; $i++){
	 @input=@{$_[$i]};
	 $min_elem=$input[$#input];
	 for (@input){
		 $min_elem=$_ if ((/[\-\d]+/)&&($_ < $min_elem));
	 }
	 push(@out_min_elem, $min_elem);
  }
  if(@_ == 1){  return( \$min_elem ); }
  elsif(@_ > 1 ){  return( \@out_min_elem ) };
}

#________________________________________________________________________
# Title     : max_elem_array
# Usage     : ($out1, $out2)=@{&max_elem_array(\@array1, \@array2)};
#             ($out1)       =${&max_elem_array(\@array1)          };
# Function  : gets the largest element of any array of numbers.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one or more ref. for scalar numbers.
# Argument  : numerical arrays
# Version   : 1.0
#--------------------------------------------------------------------
sub max_elem_array{
  my(@out_max_elem, $i, @input, $max_elem);
  for($i=0; $i< @_; $i++){
	 @input=@{$_[$i]}; $max_elem=$input[$#input];
	 for (@input){
		 $max_elem = $_ if ((/[\-\d]+/)&&($_ > $max_elem));    }
	 push(@out_max_elem, $max_elem);}
  if(@_ == 1){ return( \$max_elem ); }
  elsif(@_ > 1 ){  return( \@out_max_elem ) };
}

#________________________________________________________________________
# Title     : max_elem_string_array
# Usage     : ($out1, $out2)=@{&max_elem_array(\@array1, \@array2)};
#             ($out1)       =${&max_elem_array(\@array1)          };
# Function  : gets the largest string length of element of any array of numbers.
# Example   :
# Warning   :
# Keywords  : largest string length of array
# Options   :
# Returns   : one or more ref. for scalar numbers.
# Argument  : numerical arrays
# Version   : 1.0
#--------------------------------------------------------------------
sub max_elem_string_array{
  my(@input, $i, $max_elem);
  @input = @{$_[0]} if ref($_[0]) eq 'ARRAY';
  @input = @_       if ref($_[0]) ne 'ARRAY';
  for($i=0; $i< @input ; $i++){
		 $max_elem = length($input[0]);
		 if (length($input[$i]) > $max_elem){
			 $max_elem = length($input[$i]);
		 }
  }
  \$max_elem;
}

#________________________________________________________________________
# Title     : min_elem_string_array
# Usage     : ($out1, $out2)=@{&max_elem_array(\@array1, \@array2)};
#             ($out1)       =${&max_elem_array(\@array1)          };
# Function  : gets the largest string length of element of any array of numbers.
# Example   :
# Warning   :
# Keywords  : shortest string length of array
# Options   :
# Returns   : one or more ref. for scalar numbers.
# Argument  : numerical arrays
# Version   : 1.0
#--------------------------------------------------------------------
sub max_elem_string_array{
  my(@input, $i,  $min_elem);
  @input = @{$_[0]} if ref($_[0]) eq 'ARRAY';
  @input = @_ unless ref($_[0]) eq 'ARRAY';

  for($i=0; $i< @input ; $i++){
		 $min_elem = length($input[$#input]);
		 if (length($input[$i]) < $min_elem){
			 $min_elem = length($input[$i]);
		 }
  }
  \$min_elem;
}


#________________________________________________________________________
# Title     : maximum
# Usage     : $biggest = &maximum(37, 24);
# Function  : another way of finding maximum
# Example   :
# Warning   :
# Keywords  : get_maximum, get_bigger, get_largest
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub maximum{ if ($_[0] > $_[1]){ $_[0];  } else{ $_[1];  }  }

#________________________________________________________________________
# Title     : minimum
# Usage     : $biggest = &maximise(37, 24);
# Function  : another way of finding minimum
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub minimum{ if ($_[0] > $_[1]){$_[1];} else{ $_[0]; }  }


#_____________________________________________________________________
# Title     : get_largest_element
# Usage     : $max=${&get_largest_element(\@array_input)};
# Function  : If strings are given, it gets the largest string elem(by leng)
#             If numbers are given, it gets the largest number elem
#             It automatically checks if string is given
# Example   :
# Keywords  : get_largest_value, get_biggest_value,
#             get_maximum_element, get_largest_number,
#             get_largest_number_element, get_longest_element,
#             get_longest_string
# Options   : _  for debugging.
#             #  for debugging.
#             s  for string input (as the second input argument!)
#
# Version   : 1.3
#---------------------------------------------------------------------
sub get_largest_element{
   unless(ref($_[0])){ print "\n# get_largest_element needs REF of array\n";
	  exit;
   }
   my @arr=@{$_[0]};
   my $string_given=${$_[1]} || $_[1];
   my ($max, $jump, $i);

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # $jump is used only to check string is given or not, NO relevant to main algorithm
   #___________________________________________________________________________
   if(@arr > 10000){ $jump=500 }
   elsif(@arr > 1000){ $jump=50 }
   elsif(@arr > 100){ $jump=5 }
   elsif(@arr > 20 ){ $jump=2 }
   elsif(@arr > 5  ){ $jump=1 }

   #~~~~~~~ Checking if arr is string or not ~~~~~~~~~
   for($i=0; $i < @arr; $i += $jump){
	  if($arr[$i]=~/[a-zA-Z\D]/){
		 $string_given='s';
		 last;
	  }
	  if($i > 20){ last }
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  Real algorithm starts here
   #_____________________________________
   if($string_given=~/s/){
	  for($i=0; $i< @arr; $i++){
		$max=$arr[$i] if length($max) < length($arr[$i]);
	  }
   }else{
	  for($i=0; $i< @arr; $i++){
		$max=$arr[$i] if $max < $arr[$i];
	  }
   }
   return(\$max);
}



#________________________________________________________________________
# Title     : sqrt_array
# Usage     :
# Function  : sqrt all elements of an array
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub sqrt_array{
  my(@input)= @{$_[0]};
  my($sqrt_item);
  foreach $item(@input){ $item = sqrt($item); }
  \@input;
}
#________________________________________________________________________
# Title     : square_array
# Usage     :
# Function  : converts all the elements of an array to squared values.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub square_array{ my(@input)= @{$_[0]}; my($sqrt_item);
  foreach $item(@input){ $item = $item*$item; }
  \@input;
}

#________________________________________________________________________
# Title     : sum_of_squared_array
# Usage     : $out = &sum_of_squared_array(@anyarray);
# Function  : sum of all the squared elements of an array .
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sum_of_squared_array{ my(@input)= @{$_[0]};	my($sqrt_item);	my($sum);
  for $item(@input){ $item = $item*$item; $sum += $item;}
  \$sum;
}
#________________________________________________________________________
# Title     : x_mul_y_arrays
# Usage     : @out_array = &x_mul_y_arrays(*array1,*array2);
# Function  : multiplies each item of two arrays .
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub x_mul_y_arrays{
	my(@ar1)=@{$_[0]};
	my(@ar2)=@{$_[1]};
	$num_of_element = ($#ar1 + 1);
	for ($c=0; $c <= $num_of_element;$c++){
	    $multiplied = ($ar1[$c])*($ar2[$c]);
		push(@multiple, $multiplied);
	}
	return(\@multiple);
}
#________________________________________________________________________
# Title     : sum_x_mul_y_arrays
# Usage     : $out = &sum_x_mul_y_arrays(*array1,*array2);
# Function  : sums up multiplied items of two arrays .
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub sum_x_mul_y_arrays{
  my(@arr1)=@{$_[0]};
  my(@arr2)=@{$_[1]};
  my($sum_xy, $multiplied);
  for ($k=0;$k <=$#arr1;$k++){
	 $multiplied = ($arr1[$k])*($arr2[$k]);
	 $sum_xy += $multiplied; }
  return(\$sum_xy);
}

#________________________________________________________________________
# Title     : corelation_coefficient
# Usage     : $cc = &corelation_coefficient(\@array_not_hash1, \@array_not_hash2);
# Function  : gets corelation_coefficient of two equal length arrays
# Example   :
# Warning   : uses references for ARRAY.
# Keywords  : cc, get_cc, get_corelation_coefficient
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub corelation_coefficient{
	my($final_cc,$av1,$av2,$sum_of_squared_ar1,
		$sum_of_squared_ar2,$sum_of_xy,$upper_half, $under_half);
	my(@array1)=@{$_[0]};
	my(@array2)=@{$_[1]};
	$num_of_elem = @array1;
	$av1=${&array_average(\@array1)};
	$av2=${&array_average(\@array2)};

	$sum_of_squared_ar1=${&sum_of_squared_array(\@array1)};
	$sum_of_squared_ar2=${&sum_of_squared_array(\@array2)};
	$sum_of_xy= ${&sum_x_mul_y_arrays(\@array1,\@array2)};
	$upper_half=($sum_of_xy -($num_of_elem*($av1*$av2)));
	$under_half=sqrt(($sum_of_squared_ar1-($num_of_elem*($av1*$av1)))*($sum_of_squared_ar2-($num_of_elem*($av2*$av2))));

	if ($under_half ==0){ $under_half=1; }
	$final_cc  = $upper_half/$under_half;
	print "\n CC = $final_cc\n";
	\$final_cc;
}

#________________________________________________________________________
# Title     : cc
# Usage     : $cc = &cc(\@array_not_hash1, \@array_not_hash2);
# Function  : synonmym of  corelation_coefficient
# Example   :
# Warning   : uses references for ARRAY
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub cc{ my(@array1)=@{$_[0]};  my(@array2)=@{$_[1]};
	my($final_cc, $av1, $av2,$sum_of_squared_ar1, $sum_of_squared_ar2,$sum_of_xy, $upper_half, $under_half);
	$num_of_elem = @array1;
	$av1=&array_average(@array1);    $av2=&array_average(@array2);
	$sum_of_squared_ar1=${&sum_of_squared_array(\@array1)};
	$sum_of_squared_ar2=${&sum_of_squared_array(\@array2)};
	$sum_of_xy= ${&sum_x_mul_y_arrays(\@array1,\@array2)};
	#----- comment out if you do not want STDOUT of these vars. ----#
	print "Number of elem in the array for cc $num_of_elem  ", $num_of_elem,"\n";
	print "Average of array1  ", $av1, "\n";
	print "Average of array2  ", $av2,"\n";
	print "sum_of_squared_ar1 = $sum_of_squared_ar1  ", $sum_of_squared_ar1, "\n";
	print "sum_of_squared_ar2 = $sum_of_squared_ar2  ", $sum_of_squared_ar2, "\n";
	print "sum_of_xy = $sum_of_xy", $sum_of_xy, "\n";
	$upper_half=($sum_of_xy -($num_of_elem*($av1*$av2)));
	$under_half=sqrt(($sum_of_squared_ar1-($num_of_elem*($av1*$av1)))*($sum_of_squared_ar2-($num_of_elem*($av2*$av2))));
	$final_cc  = $upper_half/$under_half;
	\$final_cc;
}

#________________________________________________________________________
# Title     : sd
# Usage     : $sd=${&sd(\@array_of_numbers)};
# Function  :
# Example   :
# Warning   :
# Keywords  : standard deviation, get_standard_deviation,
#             standard_deviation,
# Options   :
# Returns   : a ref. of a scaler
# Argument  : array references are accepted. outputs scalar single val.
# Version   : 1.0
#--------------------------------------------------------------------
sub sd{
  my(@array)=@{$_[0]};
  my($i, $variance,$average, $deviation,$number,$sum,$standard_deviation,
	  $squared_deviation, $sum_of_squared_deviation );
  for($i=0 ; $i< @array ; $i++){ $sum=$sum+$array[$i]; }
  $average=($sum/@array);
  for($i=0 ; $i< @array ; $i++){ $deviation=($array[$i]-$average);
	 $squared_deviation=($deviation*$deviation);
	 $sum_of_squared_deviation = $sum_of_squared_deviation + $squared_deviation;  }
  $variance=($sum_of_squared_deviation/@array);
  $standard_deviation = sqrt($variance);
  return(\$standard_deviation);
}


#________________________________________________________________________
# Title     : se
# Usage     :
# Function  : gets standard error of any given array
# Example   :
# Warning   :
# Keywords  : standard error
# Options   :
# Returns   :
# Argument  : ref. for an array.
# Version   : 1.0
#--------------------------------------------------------------------
sub se{
  my(@array)=@{$_[0]};
  my($i,$j,$variance,$average, $deviation,$number,$sum,
	 $standard_deviation,$squared_deviation);
  $number=@array;
  for($i=1; $i<=$number; $i++){ $sum=$sum+$array[$i];  }
  $average=($sum/$number);
  for($j=1;$j<=$number;$j++){  $deviation=($array[$i]-$average);
	 $squared_deviation=($deviation*$deviation);
	 $sum_of_squared_deviation = $sum_of_squared_deviation + $squared_deviation;  }
  $variance=($sum_of_squared_deviation/$number);
  $standard_deviation=sqrt($variance); $standard_error = $standard_deviation/(sqrt($number));
  return(\$standard_error);
}

#________________________________________________________________________
# Title     : remove_non_char
# Usage     : $outstring = &remove_non_char($input_string);
# Function  : removes non chars on any input string. (scaler context)
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub remove_non_char{ my($input)=${$_[0]};$input =~ s/\W//g; \$input;}


#________________________________________________________________________
# Title     : numerically
# Usage     : sort numerically (@array);
# Function  : sorts elements by nemerical size.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub numerically{  $a <=> $b;  }

#________________________________________________________________________
# Title     : abs_numerically
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : numerically_abs, numerically_absolutely
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub abs_numerically{ abs($a) <=> abs($b); }

#________________________________________________________________________
# Title     : rev_abs_numerically
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub rev_abs_numerically{  abs($b) <=> abs($a);  }

#___________________________________________________________________
# Title     : randomise_lines
# Usage     : To randomize th_lib.pl just type &random_lines(300,500,"th_lib.pl");
#             &random_lines(300, 50, "th_lib.pl"); <-- to get 300 lines
#                                                      from 50 numbers
# Function  :
#             outs line numbers with lines
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#---------------------------------------------------------------
sub randomise_lines{
	my($len)=$_[0];
	my($len_1)=$_[1];
	my($inputfile)=$_[2];
	srand(time()^$$);
	for ($i=1;$i<=$len;$i++){
	    $r2=int(rand($len_1));
	    push(@random, $r2);
		 $random{$i}=$random[$i];
	}
	$counter=-1;
	open(IN,"$inputfile");
	while(<IN>){
	    $counter+=1;
	    $input{$counter}=$_;
	}
	for $elem(@random){
		for $key(keys %input){
		   if ($elem == $key){
		     print $key,"\t";
		     print $input{$key};
		   }
		}
	}
}


#___________________________________________________________________
# Title     : pick_random_hash_pairs
# Usage     : @array = @{&pick_random_hash_pairs(\%hash1, \$xx)};
# Function  : randomly pick any num of pairs of hash elements.
#             outs line numbers with lines
#             Default pick number is 1.
# Example   : in signature roation or FVWM rc file menu color rotation.
# Warning   :
# Keywords  : choose_random_hash_pairs
# Options   :
# Returns   : ARRAY ref not HASH  ref
# Argument  :
# Version   : 1.3
#---------------------------------------------------------------
sub pick_random_hash_pairs{
  my %hash = %{$_[0]};
  my @keys = keys %hash;
  my $num_of_pick = ${$_[1]} || $_[1] || 1 ; #<-- This sets the default 1
  my (@pairs, $random_num, $i);
  srand(time()^$$);

  for($i=0; $i<$num_of_pick; $i++){
	 $random_num= int( rand( @keys ) );
	 @pairs=(@pairs, $keys[$random_num], $hash{$keys[$random_num]});
  }
  return(\@pairs);
}

#___________________________________________________________________
# Title     : pick_random_files.pl
# Usage     : @array = @{&pick_random_files(\@files, \$num_of_pick)};
# Function  : randomly pick any num of files given.
# Example   : @array=@{&pick_random_files(\@files, \$num_of_pick)};
# Warning   :
# Keywords  : choose_random_files pick_files_randomly
# Options   :
# Returns   : ARRAY ref not HASH  ref
# Argument  :
# Version   : 1.0
#---------------------------------------------------------------
sub pick_random_files{
  my @files = @{$_[0]};
  my $num_of_pick = ${$_[1]} || $_[1] || 1 ; #<-- This sets the default 1
  my (@out, %count, $random_num, $i);
  srand(time()^$$);
  unless($num_of_pick=~/\d/){
	 $num_of_pick=1;  ## default pick number is 1
  }

  for($i=0; $i<$num_of_pick; $i++){
	 $random_num= int( rand( @files ) );
	 push(@out, $files[$random_num]) unless $count{$random_num};
	 $count{$random_num}++;
  }
  return(\@out);
}

#________________________________________________________________________
# Title     : hash_substract_by_keys
# Usage     : %hash1 = %{&hash_substract_by_keys(\%hash1, \%hash2)};
# Function  : removes overlapping entries in hashes.
# Example   : %hash1 = %hash1 - %hash2, ==> (4,4)=(2,2, 4,4) - (2,2)
# Warning   :
# Keywords  : substract_hash, substract_hash_by_value, hash_substract
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub hash_substract_by_keys{
  my(%hash1)=%{$_[0]};
  my(%hash2)=%{$_[1]};
  grep($hash2{$_} && $hash1{$_} && delete $hash1{$_}, keys %hash2);
  return(\%hash1);
}

#________________________________________________________________________
# Title     : substract_hash_by_keys
# Usage     : %hash1 = %{&substract_hash(\%hash1, \%hash2)};
# Function  : removes overlapping entries in hashes.
# Example   : %hash1 = %hash1 - %hash2, ==> (4,4)=(2,2, 4,4) - (2,2)
# Warning   :
# Keywords  : substract_hash, substract_hash_by_keys
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub substract_hash_by_keys{
  my(%hash1)=%{$_[0]};
  my(%hash2)=%{$_[1]};
  grep($hash2{$_} && $hash1{$_} && delete $hash1{$_}, keys %hash2);
  return(\%hash1);
}

#________________________________________________________________________
# Title     : substract_hash_by_values
# Usage     : %hash1 = %{&substract_hash_by_values(\%hash1, \%hash2)};
# Function  : removes overlapping value entries in hashes.
# Example   : %hash1 = %hash1 - %hash2, ==> (4,4)=(2,2, 4,4) - (2,2)
# Keywords  : substract_hash, substract_hash_by_values
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub substract_hash_by_values{
  my(%hash1)=%{$_[0]};
  my(%hash2)=%{$_[1]};
  my($i, $j, @values, @keys);
  @values=values %hash2;
  @keys=keys %hash1;
  for($i=0; $i< @values; $i++){
	 for($j=0; $j < @keys; $j++){
	    if($values[$i] eq $hash1{$keys[$j]}){
		    delete $hash1{$keys[$j]};
	    }
	 }
  }
  return(\%hash1);
}


#________________________________________________________________________
# Title     : substract_array
# Usage     : @subs = @{&substract_array(\@array1, \@array2)};
# Function  : removes any occurances of certain elem. of the first
#             input array with second input array.
# Example   : Following will produce (A K C);
#		@array1= qw( A B K B B C);
#  		@array2= qw( B E D);
#  		@subs = @{&substract_array(\@array1, \@array2)};
# Keywords  : array_subtract, substract_array, ary1_minus_ary2
# Options   :
# Version   : 1.6
#--------------------------------------------------------------------
sub substract_array{
	my(@first)=@{$_[0]};
	my(@second)=@{$_[1]};
	my %counter;
	grep($counter{$_}++, @second );
	return ( [grep(!$counter{$_}, @first)] );
}


#________________________________________________________________________
# Title     : hash_catenate
# Usage     : %output = %{&hash_catenate(\%hash1, \%hash2)};
# Function  : removes overlapping entries in hashes.
# Example   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
sub hash_catenate{
  my(%hash_1)=%{$_[0]};
  my(%hash_2)=%{$_[1]};
  %hash1=(%hash_1, %hash_2);
  \%hash1;
}
#________________________________________________________________________
# Title     : merge_hash  (same as hash_catenate)
# Usage     : %output = %{&merge_hash(\%hash1, \%hash2)};
# Function  : removes overlapping entries in hashes.
# Example   :
# Warning   : one bug caught.
# Keywords  : merge_hash_elements,add_hash, merge two hashes.
#             merge hashes, merge_hashes.
# Options   :
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub merge_hash{
  my %out, $i, $j;
  for($i=0; $i< @_; $i++){
	 %{"hash$i"}=%{$_[$i]};
	 $j=$i+1;
	 %out=(%out, %{"hash$i"}, %{"hash$j"});
  }
  return(\%out);
}

#________________________________________________________________________
# Title     : superpose_hash
# Usage     : %output = %{superpose_hash(\%template, \%target));
# Function  : superpose hash keys and values to another hash. %target
#             is the superposing hash(new ones will have the values of
#             this target hash. For example, if you superpose
#                (1, 123, 2, 343)
#             to (1, 111, 2, 2222, 3, 3333), you will get
#                (1, 123, 2, 343,  3, 3333) as the result.
#             Template provide blank key entries.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub superpose_hash{
  ##########################################################
	my($c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r,
	  $s, $t, $u, $v, $w, $x, $y, $z,
	  $average1, $dir, $file, $in_dir, $end_found, $entry, $entry_match,
	  $error_rate, $gap_chr, $half_win, $id_compos, $jp_file, $length, $line,
	  $name, $name_found, $name_found, $type_seq, $offset, $option_string,
	  $original_dir, $output, $out_string, $pre, $pwd, $string, $string1,
	  $sum, $sum1, $type_secon, $type_sol, $title_found, $type_DSSP,
	  $type_acc, $variable_string, $win_size,
	  @arg_output, @string, @k, @keys, @names, @out_hash,
	  @out_hash_final, @output_box, @outref, @read_files, @str1, @str2,  @string1,
	  @Tem_keys, @whole_file,
	  %correct_head_box_entry, %dummy, %Final_out, %hash, %input, %out_hash, %out_hash_final,
	  %template,   %target
	 );
  ##########################################################
	%template=%{$_[0]};
	%target  =%{$_[1]};
	my(%out_hash);
	@Tem_keys = keys %template;
	for($i=0; $i < @Tem_keys; $i ++){
		$out_hash{$Tem_keys[$i]}= $target{$Tem_keys[$i]};
	}
	return(\%out_hash);
}

#________________________________________________________________________
# Title     : hash_common2
# Usage     : %output = &hash_common($ref1, $ref2);
# Function  :
# Example   : %hashout= %hash1 - %hash2, ==> (4,4)=(2,2, 4,4) - (2,2)
# Warning   : NOT working
# Keywords  :
# Options   :
# Returns   : a ref of a hash.
# Argument  : accepts only two references of hashes
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_common2 {
  my($h1, $h2)=@_;
  my(%h1)=%{$h1};
  my(%h2)=%{$h2};
  my(%h3);
  grep(($h1{$_} eq $h2{$_}) && ($h3{$_}=$h1{$_}) , keys %h1);
  return(\%h3);
}

#________________________________________________________________________
# Title     : remove_dup_in_hash
# Usage     : %out=%{&remove_dup_in_hash(\%input_hash)};
# Function  : removes the duplicate  values of any hashes
# Example   : If %input was
#                (1,1, 2,1, 3,1);
#              The values are the same, so the last key value (3 1) will
#              be the result.
#             If %input was
#                (1,1, 2,1, 3,1, 4,2, 5,2)
#              result=(3 1, 5 2)
#
# Warning   :
# Keywords  : remove_dupplicate_values_in_hash, remove_duplicate_values,
#             remov_hash_dup,
# Options   :
# Returns   : one or more hash ref.
# Argument  : one or more hash ref.
# Version   : 1.0
#--------------------------------------------------------------------
sub remove_dup_in_hash{
  my(@out_hash_ref, %reverse);
  for($i=0; $i<@_; $i++){
	 if($_[$i] eq 's'){
		$sort_opt=1;
		splice(@_, $i, 1);
		$i--;
	 }elsif(${$_[$i]} eq 's'){
		$sort_opt=1;
		splice(@_, $i, 1);
		$i--;
	 }
  }
  for($i=0; $i<@_; $i++){
	  if(ref($_[$i])  eq 'HASH'){
		  my(%input)=%{$_[$i]};
		  my($key, $val);
		  while (($key,$val) = each %input) {
			 $reverse{$val} = $key;
		  }
		  while(($key, $val) = each %reverse){
			 $reverse2{$val} =  $key ; ## reverse again to normal
		  }
		  push(@out_hash_ref, \%reverse2);
	  }else{
	     print "\n remove_dup_in_hash accepts only hash ref. in $0\n";
		 print chr(7); exit;
	  }
  }
  if(@out_hash_ref ==1){ $out_hash_ref[0];
  }elsif(@out_hash_ref >1){@out_hash_ref;}
}

#________________________________________________________________________
# Title     : reverse_hash
# Usage     : %out=%{&reverse_hash(\%input_hash)};
# Function  : exchanges the value and key of any hashes
# Example   :
# Warning   : Takes ALIGNED sequences.
# Keywords  : invert_hash, inverse_hash
# Options   :
# Returns   : one or more hash ref.
# Argument  : one or more hash ref.
# Version   : 1.0
#--------------------------------------------------------------------
sub reverse_hash{
  my(@out_hash_ref, %reverse);
  for($i=0; $i<@_; $i++){
	  if(ref($_[$i])  eq 'HASH'){
		  my(%input)=%{$_[$i]};
		  my($key, $val);
		  while (($key,$val) = each %input) {
		     $reverse{$val} = $key;
		  }
		  push(@out_hash_ref, \%reverse);
	  }else{
	     print "\n reverse_hash accepts only hash ref. in $0\n";
		 print chr(7); exit;
	  }
  }
  if(@out_hash_ref ==1){ $out_hash_ref[0];
  }elsif(@out_hash_ref >1){@out_hash_ref;}
}

#________________________________________________________________________
# Title     : hash_common
# Usage     : %hash1_value = %{&hash_common(\%hash1, \%hash2,...)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : the VALUES OF THE FIRST HASH which occur in later hashes
#             are returned
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_common{
  my(%common)=();
  for($i=0; $i< @_; $i++){  my(%common2)=();
	  if( !(defined(%common) )){ %common=%{$_[$i]}; next;}
	  elsif(defined(%common)){ %h1=%{$_[$i]};
		 for(keys %common){ $common2{$_}=$common{$_} if (defined $h1{$_});}
	  %common=%common2;}  }
  return(\%common);
}

#________________________________________________________________________
# Title     : hash_common_by_keys
# Usage     : %hash1_value = %{&hash_common_by_keys(\%hash1, \%hash2,...)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : the VALUES OF THE FIRST HASH which occur in later hashes
#             are returned
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_common_by_keys{
  my(%common)=();
  for($i=0; $i< @_; $i++){
	  my(%common2)=();
	  if( !(defined(%common) )){ %common=%{$_[$i]}; next;}
	  elsif(defined(%common)){ %h1=%{$_[$i]};
		 for(keys %common){ $common2{$_}=$common{$_} if (defined $h1{$_});}
	  %common=%common2;}
	  undef(%common2);  }
  return(\%common);
}

#________________________________________________________________________
# Title     : get_common_hash_keys
# Usage     : @output_array_of_keys = @{&get_common_hash_keys(\%hash1, \%hash2)};
# Function  : gets the common hash keys of two hashes.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_common_hash_keys{
  my @array1 = keys %{$_[0]};
  my @array2 = keys %{$_[1]};
  grep($common{$_}++, @array1);
  my @common_keys=grep($common{$_}, @array2);
  return(\@common_keys);
}


#________________________________________________________________________
# Title     : hash_no_common
# Usage     : %output = &hash_catenate(*hash1, *hash2);
# Function  : removes overlapping entries in hashes.
# Example   : %hashout= %hash1 - %hash2, ==> (4,4)=(2,2, 4,4) - (2,2)
# Warning   : surely working, This grep version is faster than for and defined loop.
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub hash_no_common{
  local(*h1, *h2)=@_;
  grep(($h1{$_} eq $h2{$_}) && delete $h1{$_} && delete $h2{$_},  keys %h1);
  local(%h_no_com) = (%h1, %h2);
}

#________________________________________________________________________
# Title     : beep
# Usage     : &beep;
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub beep{  print chr(7);  }

#________________________________________________________________________
# Title     : capitalize_word.pl
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : capitalise word,  capitalise_word
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub capitalize_word{
	my($sentence, $tmp);
	if(ref($_[0])){
		$sentence=${$_[0]};
	}else{ $sentence=$_[0]; }
	$sentence =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	return(\$sentence);
}
#________________________________________________________________________
# Title     : capitalize_sentence.pl
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub capitalize_sentence{
	my($sentence, $tmp);
	if(ref($_[0])){
		$sentence=${$_[0]};
	}else{ $sentence=$_[0]; }
	$sentence =~ s#^\S|\s(\S)#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#ge;
	return(\$sentence);
}


#________________________________________________________________________
# Title     : shift_word_recursively
# Usage     : @new_lines=shift_word_recursively(\@lines, '/'); or
#             @new_lines=shift_word_recursively(\@lines, '\W'); or
#             @new_lines=shift_word_recursively(\@lines, 'a-zA-Z'); or
#             @new_lines=shift_word_recursively(\@lines, '/', 2); <--- for multiple chop unit
#             or $new_line = shift_word_recursively(\$line, '.'); <--- for scalar input.
# Function  : shift lines word by word. This needs delimiter like '/' or '.'
#             and stores the resulting arrays. This is to get all the possible
#             directories.
#             For example, with /nfs/A Biomatic /perl/temp/here  input, you get
#             (  /A Biomatic /perl/temp/here,   /perl/temp/here ,
#             temp/here, /here, )  in an array.
#
# Example   : @new_lines=shift_word_recursively(\@lines, '/-', 2); to chop lines
#             off two words with the two delimiters of '/' and '-'.
#             /jong1/perl-jong2/perl-jong3  will become   /perl-jong2/perl-A Biomatic 3
#             /bin/-kkk/-jjj/-jj will become  /-kkk/-jjj/-jj
#             @out=@{&shift_word_recursively($testline, '/-', 2)};
#             You can use perl regexp patterns for  $delimiter as it is directly
#             used in a pattern matching in the sub. So, you canuse '\W'
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : SCALAR or ARRAY refs. and delimitor ('/', '.', '-'.....)
#             delimitor can be multi line => '#$%/=.'
#             default delimiter is space ' ';
# Version   : 1.0
#--------------------------------------------------------------------
sub shift_word_recursively{
  my(@in)=@_; my($i, @input_lines, $delimiter, @chopped_line, @output_lines);
  my($multi_chop)=1; ### default is 1 (one word chopping)
  for($i=0; $i < @in; $i++){
	 if(ref($in[$i]) eq 'ARRAY'){
		 push(@input_lines, @{$in[$i]}); }
	 elsif(ref($in[$i]) eq 'SCALAR'){
		 if( length(${$in[$i]}) > 2 ){
			 push(@input_lines, ${$in[$i]}); }
		 elsif( ${$in[$i]} =~ /^\W+$/ ){
			 $delimiter .= ${$in[$i]};  }
		 elsif( ${$in[$i]} =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= ${$in[$i]};  }    }
	 elsif( !ref($in[$i]) ){
		 if( length($in[$i]) > 2 ){
			 push(@input_lines, $in[$i]); }
		 elsif( $in[$i] =~/^\W+$/ ){
			 $delimiter .= $in[$i];  }
		 elsif( $in[$i] =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= $in[$i];  }
	 }
  }

  if( $delimiter =~/^$/ ){ $delimiter = ' '; }  ## default delimiter is space
  for($i=0; $i<@input_lines; $i++){
	  @chopped_line = split(/([$delimiter]+)/, $input_lines[$i]);
	  while(@chopped_line > 1){
		  splice(@chopped_line, 1, $multi_chop*2 );
		  push(@output_lines, join("$1", @chopped_line) );
	  }
  }
  \@output_lines;
}
#________________________________________________________________________
# Title     : shift_word
# Usage     : @new_lines=shift_word(\@lines, '/'); or
#             @new_lines=shift_word(\@lines, '\W'); or
#             @new_lines=shift_word(\@lines, 'a-zA-Z'); or
#             @new_lines=shift_word(\@lines, '/', 2); <--- for multiple chop unit
#             or $new_line = shift_word(\$line, '.'); <--- for scalar input.
# Function  : shift lines word by word. This needs delimiter like '/' or '.'
# Example   : @new_lines=shift_word(\@lines, '/-', 2); to shift off lines two words
#             with the two delimiters of '/' and '-'.
#             /jong1/perl-jong2/perl-jong3  will become   /jong1/perl-A Biomatic 2
#             /bin/-kkk/-jjj/-jj will become  /jong1/perl-A Biomatic 2 by
#             @out=@{&shift_word($testline, '/-', 2)};
#             You can use perl regexp patterns for  $delimiter as it is directly
#             used in a pattern matching in the sub. So, you canuse '\W'
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : SCALAR or ARRAY refs. and delimitor ('/', '.', '-'.....)
#             delimitor can be multi line => '#$%/=.'
#             default delimiter is space ' ';
# Version   : 1.0
#--------------------------------------------------------------------
sub shift_word{
  my(@in)=@_;
  my($i, @input_lines, $delimiter, @chopped_line, @output_lines);
  my($multi_chop)=1; ### default is 1 (one word chopping)
  for($i=0; $i < @in; $i++){
	 if(ref($in[$i]) eq 'ARRAY'){
		 push(@input_lines, @{$in[$i]}); }
	 elsif(ref($in[$i]) eq 'SCALAR'){
		 if( length(${$in[$i]}) > 3 ){
			 push(@input_lines, ${$in[$i]}); }
		 elsif( ${$in[$i]} =~ /^\W+$/ ){
			 $delimiter .= ${$in[$i]};  }
		 elsif( ${$in[$i]} =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= ${$in[$i]};  }
	 }
	 elsif( !ref($in[$i]) ){
		 if( length($in[$i]) > 3 ){
			 push(@input_lines, $in[$i]); }
		 elsif( $in[$i] =~/^\W+$/ ){
			 $delimiter .= $in[$i];  }
		 elsif( $in[$i] =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= $in[$i];  }    }
  }
  if( $delimiter =~/^$/ ){ $delimiter = ' '; }  ## default delimiter is space
  for($i=0; $i<@input_lines; $i++){
	  @chopped_line = split(/([$delimiter]+)/, $input_lines[$i]);
	  splice(@chopped_line, 1, $multi_chop*2 );  ## *2 is essential.
	  push(@output_lines, join("$1", @chopped_line) );
  }
  return(\@output_lines);
}

#________________________________________________________________________
# Title     : chop_word
# Usage     : @new_lines=chop_word(\@lines, '/'); or
#             @new_lines=chop_word(\@lines, '\W'); or
#             @new_lines=chop_word(\@lines, 'a-zA-Z'); or
#             @new_lines=chop_word(\@lines, '/', 2); <--- for multiple chop unit
#             or $new_line = chop_word(\$line, '.'); <--- for scalar input.
# Function  : chop lines word by word. This needs delimiter like '/' or '.'
# Example   : @new_lines=chop_word(\@lines, '/-', 2); to chop off lines two words
#             with the two delimiters of '/' and '-'.
#             /jong1/perl-jong2/perl-jong3  will become   /jong1/perl-A Biomatic 2
#             /bin/-kkk/-jjj/-jj will become  /jong1/perl-A Biomatic 2 by
#             @out=@{&chop_word($testline, '/-', 2)};
#             You can use perl regexp patterns for  $delimiter as it is directly
#             used in a pattern matching in the sub. So, you canuse '\W'
# Warning   : The returning value is not the chopped off word.
# Keywords  : chop_word_recursively, remove_word, chop_word_one_by_one
# Options   : -w, w, Word, etc,  for getting the chopped off word(s) rather
#             than the original lines minus the word.
# Returns   :
# Argument  : SCALAR or ARRAY refs. and delimitor ('/', '.', '-'.....)
#             delimitor can be multi line => '#$%/=.'
#             default delimiter is space ' ';
# Version   : 2.0
#--------------------------------------------------------------------
sub chop_word{
  my(@in)=@_;
  my($i, @input_lines, $delimiter, @chopped_line, @output,
	  @chopped_word_list, $get_chopped_word_option);
  my($multi_chop)=1; ### default is 1 (one word chopping)
  for($i=0; $i < @in; $i++){
	 if(ref($in[$i]) eq 'ARRAY'){
		 push(@input_lines, @{$in[$i]}); }
	 elsif(ref($in[$i]) eq 'SCALAR'){
		 if( length(${$in[$i]}) > 3 ){  ### the delimiter can be upto 3 chars by
			 push(@input_lines, ${$in[$i]}); }  ## one arg input.
		 elsif( ${$in[$i]} =~ /^\W+$/ ){
			 $delimiter .= ${$in[$i]};  }
		 elsif( ${$in[$i]} =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( ${$in[$i]} =~/^\-?[wW][ord]*$/ ){  ## for -w option
			 $get_chopped_word_option = 1;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= ${$in[$i]};  }
	 }
	 elsif( !ref($in[$i]) ){
		 if( length($in[$i]) > 3 ){
			 push(@input_lines, $in[$i]); }
		 elsif( $in[$i] =~/^\W+$/ ){
			 $delimiter .= $in[$i];  }
		 elsif( $in[$i] =~/^\d$/ ){
			 $multi_chop = $&;  }
		 elsif( $in[$i] =~ /^\-?[wW][ord]*$/ ){  ## for -w option
			 $get_chopped_word_option = 1;  }
		 elsif( length(${$in[$i]}) < 4 ){
			 $delimiter .= $in[$i];  }
	 }
  }
  if( $delimiter =~/^$/ ){ $delimiter = ' '; }  ## default delimiter is space

  if($get_chopped_word_option == 1){
	  for($i=0; $i< @input_lines; $i++){
		  @chopped_line = split(/([$delimiter]+)/, $input_lines[$i]);
		  push(@output, $chopped_line[$#chopped_line] );
	  }  }
  else{  ## when original lines minus the chopped word are wanted.(default).
	  for($i=0; $i<@input_lines; $i++){
		  @chopped_line = split(/([$delimiter]+)/, $input_lines[$i]);
		  splice(@chopped_line, @chopped_line-$multi_chop*2 );
		  push(@output, join("$1", @chopped_line) );
	  }
  }
  if(@output == 1){ return($output[0]);
  }elsif(  @output > 1 ){ return(@output); }
}

#________________________________________________________________________
# Title     : get_median
# Usage     : $median = ${&get_median(\@array)};
# Function  :
# Example   :
# Warning   :
# Keywords  :  median_array, get_median_array, get_array_median, array_median
# Options   :
# Returns   : \$median
# Argument  : \@array
# Version   : 1.0
#--------------------------------------------------------------------
sub get_median{
  my(@array)=@{$_[0]};
  $median=( sort { $a <=> $b } @array )[ @array/2 ];
  return(\$median);
}


#________________________________________________________________________
# Title     : array_median
# Usage     : $median = ${&array_median(\@array)};
# Function  :
# Example   :
# Warning   :
# Keywords  :  median_array, get_median_array, get_array_median, array_median
# Options   :
# Returns   : \$median
# Argument  : \@array
# Version   : 1.0
#--------------------------------------------------------------------
sub array_median{
  my(@array)=@{$_[0]};
  my $median=( sort { $a <=> $b } @array )[ @array/2 ];
  return(\$median);
}

#________________________________________________________________________
# Title     : get_median
# Usage     : $median = ${&get_median(\@array)};
# Function  :
# Example   :
# Warning   :
# Keywords  :  median_array, get_median_array, get_array_median, array_median
# Options   :
# Returns   : \$median
# Argument  : \@array
# Version   : 1.0
#--------------------------------------------------------------------------
sub get_median{
  my(@array)=@{$_[0]};
  $median=( sort { $a <=> $b } @array )[ @array/2 ];
  return(\$median);
}



#________________________________________________________________________
# Title     : push_if_not_already
# Usage     : @out=@{&push_if_not_already(@mother_array, @adding_array )};
#             @out=@{&push_if_not_already(@mother_array, $adding_scalar)};
# Function  : returns ref. of an array for a list of non-repetitive entry.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : a ref. of an array.
# Argument  : two references. The first should be an array ref. The 2nd can be either
#             scalar or array reference.
# Version   : 1.0
#--------------------------------------------------------------------
sub push_if_not_already{
	my($already_in, $already, $i, @push_items_given);
	my(@out_array)=@{$_[0]};
	push(@push_items_given, ${$_[1]}) if(ref($_[1]) eq 'SCALAR');
	@push_items_given=@{$_[1]} if(ref($_[1]) eq 'ARRAY');
	for $already (@out_array){  ## This for is to remove repetitive
	  for ($i=0; $i< @push_items_given; $i++){
		 if($already eq $push_items_given[$i]){ splice(@push_items_given,$i); }
	  }
	}
	push(@out_array,@push_items_given); \@out_array;
}

#________________________________________________________________________
# Title     : replace_lines
# Usage     : &replace_lines(@files, 'removing_string', 'match_str' );
# Function  : replace_lines in any txt files
# Example   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub replace_lines{
	my(@in)=@_;
	my($i, $file, @lines);
	my($replacing_lines     ) = pop(@in);
	my($matching_line_string) = pop(@in) unless (-f $in[$#in]);
	print "\n $matching_line_string";
	for $file (@in){
	  open(FILE, "$file");
	  @lines=<FILE>;
	  for($i=0; $i <@lines; $i++){
		  if($lines[$i]=~/$matching_line_string/){
		      print "\n;lkjasljf \n";
			  $lines[$i]="$replacing_lines\n";
		  }
	  }
	  open(NEW_FILE, ">$file");
	  print NEW_FILE @lines;
	}
	close NEW_FILE;
}


#________________________________________________________________________
# Title     : insert_lines_anywhere
# Usage     : &insert_lines_anywhere(\@files, \$inst_str,'after', \@match_str);
# Function  : insert lines anywhere in any txt files
# Example   :
# Warning   : Case Insensitive by default.
# Keywords  : insert_text, insert_lines, insert_something
# Options   : 'after'  only two options exactly in the middle of two strings.
#             'before'
# Returns   :
# Argument  :
# Version   : 1.1
#--------------------------------------------------------------------
sub insert_lines_anywhere{
	my(@in)=@{$_[0]};
	my $new_lines=${$_[1]};
	my($option_string)=$_[2];
	my(@match_patterns)=@{$_[3]};

	for $file (@in){
	  open(FILE, "$file");
	  @lines=<FILE>;
	  if($option_string =~/[Aa]fter/){
		  for($i=0; $i <@lines; $i++){
			  for($j=0; $j< @match_patterns; $j++){
			     if(($lines[$i]=~/$match_patterns[$j]/)&&
			     ($lines[$i+1] !~ /$inserting_lines/i)){
					  $lines[$i]=$lines[$i]."$new_lines\n";
					  $last;
				  }
			  }
		  }
	  }
	  elsif( $option_string =~/[Bb]efore/){
		  for($i=@lines; $i > -1; $i--){
			  for($j=0; $j< @match_patterns; $j++){
	 			  if(($lines[$i]=~/$match_patterns[$j]/)&&
	   		  ($lines[$i-1] !~ /$inserting_lines/i)){
		   		  $lines[$i]="$new_lines".$lines[$i];
		   		  last;
			     }
			  }
		  }
	  }
	  open(NEW_FILE, ">$file");
	  print NEW_FILE @lines;
	}
}




#________________________________________________________________________
# Title     : get_all_dirs_from_ENV
# Usage     : my(@default_env_dirs) = @{&get_all_dirs_from_ENV};
# Function  : extracts all the directories from %ENV  setting.
# Example   : my(@default_env_dirs) = @{&get_all_dirs_from_ENV}; in handle_arguments
# Warning   : produces repetitive pathes (ie, can output identical path several times)
# Keywords  :
# Options   : None
# Returns   : a ref. of an array of directories.
# Argument  : NONE
# Version   : 1.0
#--------------------------------------------------------------------
sub get_all_dirs_from_ENV{
	  my($each_dir, $i, @default_env_dirs);
	  my($pwd)=`pwd`;
	  chomp($pwd);
	  unshift(@default_env_dirs, $pwd);
	  push(@default_env_dirs,  split(/:/, join(":", (values %ENV)) ));
	  for ($i=0; $i < @default_env_dirs; $i++){
		  $each_dir=$default_env_dirs[$i];
		  $each_dir =~ s/^\.$//;
		  unless( -d $each_dir ){
			 splice(@default_env_dirs, $i, 1); $i--;
			 next;
		  }  ## remove if it is not a directory.
		  elsif( (-d $each_dir) && ($each_dir=~/\/$/) ){
			 chop($each_dir);
		  }  ## removing all the last back slash in  .../xxx/
	  }
	  @default_env_dirs=@{&remove_dup_in_array(\@default_env_dirs)};
	  return(\@default_env_dirs);
}


#________________________________________________________________________
# Title     : get_path_dirs_from_ENV
# Usage     : my(@default_env_dirs) = @{&get_path_dirs_from_ENV};
# Function  : extracts path directories from %ENV  setting.
# Example   : my(@default_env_dirs) = @{&get_path_dirs_from_ENV}; in handle_arguments
# Warning   : Replaces '.' to $pwd.
# Keywords  :
# Options   : None
# Returns   : a ref. of an array of directories.
# Argument  : NONE
# Version   : 1.0
#--------------------------------------------------------------------
sub get_path_dirs_from_ENV{
	  my($each_dir, $i, @default_path_dirs);
	  my($pwd)=`pwd`; chomp($pwd);

	  if(defined($ENV{'path'}) )   {  ## if 'path' is used,
		  push(@default_path_dirs, $pwd, split(/:/, $ENV{'path'}) ) }
	  elsif(defined($ENV{'PATH'}) ){  ## if 'PATH' is used,
		  push(@default_path_dirs, $pwd, split(/:/, $ENV{'PATH'})) }
	  elsif(defined($ENV{'Path'}) ){  ## if 'Path' is used,
		  push(@default_path_dirs, $pwd, split(/:/, $ENV{'PATH'})) }

	  for ($i=0; $i < @default_path_dirs; $i++){
		  $each_dir=$default_path_dirs[$i];
		  $each_dir =~ s/^\.$//;
		  unless( -d $each_dir ){
			 splice(@default_path_dirs, $i, 1); $i--; next; }  ## remove if it is not a directory.
		  elsif( (-e $each_dir) && ($each_dir=~/\/$/) ){
			 chop($each_dir);  }  ## removing all the last back slash in  .../xxx/
	  }
	  @default_env_dirs=@{&remove_dup_in_array(\@default_env_dirs)};
	  return(\@default_env_dirs);
}


#________________________________________________________________________
# Title     : handle_arguments_old
# Usage     : my(@in)=&handle_arguments_old(\@input_args);   Do not dereference it.
# Function  : Sub argument handling for opening files with options. General
#             form of 'handle_arguments_xxxx', while xxxx can be files, hashes, arrays,,,,
# Example   :
# Warning   :
# Keywords  :
# Options   : None yet, extendable by adding refs. of something.
# Returns   : an array of refs for file names, hashes, arrays and  the opion string
# Argument  : one single ref. (\@input_args);
# Version   : 1.0
#--------------------------------------------------------------------
sub handle_arguments_old{
  my($i, @in, @out, $k, @names, $chain, $n  );
  my($real_file, $s, @out_hash_ref_list);
  my(@input_options);
  my($default_option_string)='ETSIBHG'; # This is the string for the default option chars.
  my($full_option_string)='ETSIBHGR'; # This is the string of all the option chars.
  my($match_option);  # <-- This is for option handling at prompt. If you put -e -h at
							 #    the prompt, you will have $match_option value of 'EH' to match
  my(@extension_db)=('sst','msf','fasta','jp','fas','aln','brk','pdb', 'rms',
							'dssp', 'hssp', 'fssp', 'phd', 'ent','slx','fa');

  ##########################################
  ##  Getting just directories from ENV   ##
  ##########################################
  my(@default_env_dirs) = @{&get_all_dirs_from_ENV};
  my($pwd)=`pwd`; chomp($pwd);  # This is necessary for full path '$real_file' var.

  if(ref($_[0])){  @in=@{$_[0]}; }elsif(!ref($_[0])){ @in=@_; }

  for($k=0; $k< @in ;$k++){  my($file_found);
	  #######################
	  ##   If it is ref.   ##
	  #######################
	  if( ref($in[$k]) ){
		  if( ref($in[$k]) eq 'SCALAR' ){
			  if ((-f ${$in[$k]})&&(${$in[$k]}=! /\//) ){
					push( @out, \"$pwd\/${$in[$k]}" ); $file_found=1; last; }  # push as a ref.
			  elsif ((-f ${$in[$k]})&&(${$in[$k]}=~ /\//) ){
					push( @out, \"${$in[$k]}" ); $file_found=1; next; }
			  for $ENV_dir (@default_env_dirs){
					 if (-f "$ENV_dir\/${$in[$k]}"){
						 push(@out, \"$ENV_dir\/${$in[$k]}"); $file_found=1; last ;
					 }
			  }
			  if($file_found != 1){ my($ext, $ENV_dir);
				  for $ENV_dir (@default_env_dirs){
					 for $ext (@extension_db){
						 if (-f "$ENV_dir\/${$in[$k]}\.$ext"){
							 push(@out, \"$ENV_dir\/${$in[$k]}\.$ext" ); $file_found=1; last ;  }
					 }
				  }
			  }
		  }
		  elsif( ref($in[$k]) eq 'HASH'  ){ push(@out, $in[$k]);   }
		  elsif( ref($in[$k]) eq 'ARRAY' ){ push(@out, $in[$k]);   }
		  ########################################################################
		  ############  Following is to get option strings              ##########
		  ########################################################################
		  elsif( (ref($in[$k]) eq 'SCALAR') && (length(${$_[$k]}) < 4 )){
			  if(${$in[$k]}=~ /^([\w]+)$/){  ## <-- Adding options to a one string vAR.
				  my($opt)=$1; $opt=~tr/a-z/A-Z/; $match_option  .= $opt; next;
			  }
		  }
	  }
	  #######################
	  ##  If it is no ref. ##
	  #######################
	  elsif( !ref($in[$k]) ){  my($file_found);
			if( (-f $in[$k])&&($in[$k] =!/\//) ){
				 push( @out, \"$pwd\/$in[$k]" ); next; }
			elsif( (-f $in[$k])&&($in[$k] =~/\//) ){
				 push( @out, \"$in[$k]" );  next; }
			for $ENV_dir (@default_env_dirs){
				 if (-f "$ENV_dir\/$in[$k]"){
					push(@out, \"$ENV_dir\/$in[$k]"); $file_found=1; last ;
				 }
			}
			if($file_found != 1){ my($ext, $ENV_dir);
			  X2: for $ENV_dir (@default_env_dirs){
					  for $ext (@extension_db){
						  if (-f "$ENV_dir\/$in[$k]\.$ext"){
							 push(@out, \"$ENV_dir\/$in[$k]\.$ext" ); $file_found=1; last X2;  }
					  }
					}
			}
			elsif( length($in[$k]) < 4 ){   # <<-- number 3 limits the option char size.
			  if($in[$k]=~ /^([\w]+)$/){ ## <-- Adding options to a one string vAR.
				 local($opt)=$1; $opt=~tr/a-z/A-Z/; $match_option  .= $opt; next;
			  }
			}
	  }
  } ## <<-- End of for loop
  if($match_option =~ /^$/){
	  $match_option = $default_option_string;  } # <<-- When there is no options given, use full options.
  return(@out, $match_option); # The last elem. is the option string
}


#________________________________________________________________________
# Title     : mv
# Usage     : &mv( \$srcFile, \$dstFile); or  &mv( $srcFile, $dstFile);
#             or &mv(FILEHANDLE1, FILEHANDLE2),  or  &mv(FILEHANDLE1, $output)
# Function  : moves files fast, replacement of 'system("mv xxx xxxx"); '
# Example   : mv("mv.pl", *STDOUT);  # This will print mv.pl contents to your screen.
# Keywords  : move files fast. mv_file, mv_files, move_files, move_file
# Options   :
# Argument  : 2 references of file name or  2 file names.
# Warning   : 27 times slower than 'mv' at prompt.  using system is 32 times slower
# Version   : 1.4
#--------------------------------------------------------------------
sub mv{
  croak(" Usage:  &mv($file1, $file2)" ) unless(@_ >= 2);

  my($in)=$_[0];
  my($out)=$_[1];

  if( (ref($in) eq 'GLOB') || (ref($in) eq 'FileHandle')){
	  *IN_CP = *$in; }  ## for  \*STDOUT like input
  elsif( ref(\$in) eq 'GLOB'){
	  *IN_CP = $in;  }
  else{ open(IN_CP, "<$in") or die "Can't open output $in: $!\n";  }

  if( (ref($out) eq 'GLOB') || (ref($_[1]) eq 'FileHandle')){
	  *OUT_CP = *$out;   }
  elsif( ref(\$out) eq 'GLOB'){  print "\n2\n";
	  *OUT_CP = $out;  ## for  *STDOUT like input
  }else{  open(OUT_CP, ">$out") or die "Can't open output $out: $!\n";  }

  my ($access,$mod) = (stat IN_CP)[8,9];
  syswrite(OUT_CP, $buf, $len) while $len = sysread(IN_CP, $buf, 8192);
  close IN_CP;
  close OUT_CP;
  unlink($in);
  utime $access, $mod, $dstFile;
}


#________________________________________________________________________
# Title     : cp
# Usage     : &cp( \$srcFile, \$dstFile); or  &cp( $srcFile, $dstFile);
#             or &cp(FILEHANDLE1, FILEHANDLE2),  or  &cp(FILEHANDLE1, $output)
# Function  : copies files fast, replacement of 'system("cp xxx xxxx"); '
# Example   : cp("cp.pl", *STDOUT);  # This will print cp.pl contents to your screen.
# Keywords  : copy files fast. cp_file, cp_files, copy_files, copy_file
# Options   :
# Argument  : 2 references of file name or  2 file names.
# Warning   : 27 times slower than 'cp' at prompt.  using system is 32 times slower
# Version   : 1.4
#--------------------------------------------------------------------
sub cp{
  croak(" Usage: cp ($file1, $file2)" ) unless(@_ >= 2);

  my($in)=$_[0];
  my($out)=$_[1];

  if( (ref($in) eq 'GLOB') || (ref($in) eq 'FileHandle')){
	  *IN_CP = *$in; }  ## for  \*STDOUT like input
  elsif( ref(\$in) eq 'GLOB'){
	  *IN_CP = $in;  }
  else{ open(IN_CP, "<$in") or die "Can't open output $in: $!\n";  }

  if( (ref($out) eq 'GLOB') || (ref($_[1]) eq 'FileHandle')){
	  *OUT_CP = *$out;   }
  elsif( ref(\$out) eq 'GLOB'){  print "\n2\n";
	  *OUT_CP = $out;  ## for  *STDOUT like input
  }else{  open(OUT_CP, ">$out") or die "Can't open output $out: $!\n";  }

  my ($access,$mod) = (stat IN_CP)[8,9];
  syswrite(OUT_CP, $buf, $len) while $len = sysread(IN_CP, $buf, 8192);
  close IN_CP;
  close OUT_CP;
  utime $access, $mod, $dstFile;
}




#________________________________________________________________________
# Title     : wh
# Usage     :
# Function  : shows the path for a file you want
#             similar to which in UNIX
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   :
#--------------------------------------------------------------------
for $file (@ARGV){
  for $dir (split(/:/, $ENV{PATH})){
	 -e ($path="$dir/$file") && print "$path\n";
  }
}

#________________________________________________________________________
# Title     : condense_script
# Usage     : condense_script.pl  xxxxxx.pl
# Function  : makes compact size subroutines of developed perl codes
# Example   : condense_script.pl th_lib.pl th-test.pl xxx xxxx ....
# Warning   : The only condition is that you need to have 'sub xxxxx' from the
#             first column and the last '}' should be again at the first column
#             This is due to the pattern matching for any sub routines.
# Keywords  :
# Options   : None
# Returns   : xxxxxx.pl.out  but sub routines condensed.
# Argument  : one or more files.
# Version   : 1.0
#--------------------------------------------------------------------
sub condense_script{
  my($k, $f, @in, $file, $outfile, $format_found);
  my($sub_contents, $sub_found); $sub_found=0; $format_found=0;
  for($k =0; $k < @_; $k++){
	 if(ref($_[$k]) eq 'ARRAY'){
		 @in = @{$_[$k]};
	 }
  }
  for $f (@in){
	 open(FILE, "$f");
	 $outfile = "$f"."\.exe";
	 open(FILE_OUT, ">$outfile"); $|=1;
	 while(<FILE>){
		if(/^#!\//){ print FILE_OUT "$_"; next;}
		if(/^\#/){ next; }
		if(/^[\s]*$/){ next; }
		#$_ =~ s/(.+)\#.*$/$1/g;
		if(/^format +[\w]* *\=/){ $format_found =1; $_="\n$_"; }
		if(/^\.[\#.]*$/){ $format_found =0; $_="$_\n";}
		if( (/^\} *$/) || (/^\} *\#.*/) ){
		  chomp($_);
		  print FILE_OUT "$sub_contents\}";
		  $sub_found =0; $sub_contents =''; next;}
		if(/^sub +(\w+) *\{/){
		  $_ =~s/[ ]+$//;
		  $sub_contents .= $_;
		  $sub_found=1; next;}
		if($sub_found == 0){
		  print FILE_OUT "$_"; next;  }
		if($sub_found==1){
		  if($format_found == 0){
			 #chomp($_);
			 $_ =~s/^[ ]+//g;
			 $_ =~s/[ ]+$//g;
			 #$_ =~s/(\W)\s+(\W)/$1$2/g;
			 #$_ =~s/\s*(\{)\s*/$1/g ;
			 #$_ =~s/(\})[\s]*$/$1/g;
			 $_ =~s/[\s]*(\})/$1/g;
			 $_ =~s/\t//g;
		  }
		  $sub_contents .= $_; next;
		}
	 }
  }
}

#________________________________________________________________________
# Title     : initialize_code   28/11/1995, jong, /nfs/ind5/A Biomatic /Perl/Utils
# Usage     : &initialze_code;
# Function  : initialize all developing codes by putting Header section infor
# Example   :
# Warning   : This writes over the program you run (itself). temp file is ini_code.temp
# Keywords  :
# Options   :
# Returns   : None
# Argument  : None
# Version   : 1.0
#--------------------------------------------------------------------
sub initialize_code{
  my($user)=getlogin();      my($perl_version) = "Perl$]";
  my($date)=${&get_time(time)};    my($pwd)=`pwd`;   chomp($date, $pwd);
  my($i, @header, @whole_prog, $sepa_line,$date_line,$auth_line);
  my($head_found)=0;
  $sepa_line1 = "#"."_"x88;
  $sepa_line2 = "#"."-"x88;
  $| =1;  ## <<--  no buffer
  my(@head)=( "$sepa_line1\n",
				  "\# Title    : $0  by $pwd, Last Mod: $date\n",    	# 1
				  "\# Function : \n",                     			# 2
				  "\# Usage    : $0 @ARGV\n",            	 		# 3
				  "\# Argument : @ARGV\n",                			# 4
				  "\# Example  : \n",                     			# 5
				  "\# Argument : \n",                     			# 6
				  "\# Returns  : \n",                     			# 7
				  "\# Options  : \n",                     			# 8
				  "\# Tips     : \n",                     			# 9
				  "\# Author   : by $user\n",             			# 10
# Version  :
				  "\# Warning  : $perl_version\n",        			# 11
				  "$sepa_line2\n",    );

  open(SELF, "<$0");
  @whole_prog = <SELF>;

  for ($i=1; $i < @whole_prog; $i++){
	  if( ($whole_prog[$i]=~ /^#_+$/)&&($whole_prog[$i+1] =~ /^(# +[Tt]itle +: *)([\w\.\-]+pl) +/) ){
	  ###############################################################
		  $head_found =1;
		  if($0 == $2){
			  $whole_prog[$i+1] ="$1$0 , $pwd, Last Mod: $date\n"; $i++; next;  } }
	  ###############################################################
	  elsif( ($whole_prog[$i]=~ /^#_+$/)&&($whole_prog[$i+1] =~ /^(# +[Tt]itle +: *)$/) ){
		  $whole_prog[$i+1] ="$1$0 , $pwd, Last Mod $date\n"; $i++; next; }
	  ###############################################################
	  elsif(($whole_prog[$i]=~ /^#_+$/) &&
			  ($whole_prog[$i+1] =~/^(# +[Tt]itle +[:] *[\w\-\.]+ *[Last Mod:]* +[\d+\/\d+\/\d+], +\w+)/)){
		  $i++; next;    }   # <-- when the date is in 11/11/95 format,
	  elsif(($whole_prog[$i]=~ /^#_+$/) &&
			  ($whole_prog[$i+1] =~/^(# +[Tt]itle +[:] *[\w\-\.]+ *[Last Mod:]* +[\d+\-\w+\-\d+], +\w+)/)){
		  $i++; next;    }   # <-- when the date is in 1-Nov-1995 format,
	  elsif(($whole_prog[$i]=~ /^#_+$/) &&                   # Nov30 4:39 1995
			  ($whole_prog[$i+1] =~/^# +[Tt]itle +[:] *[\w\-\.]+ *[Last Mod:]* +[\w+\d+ +\d+:\d+ +\d+]/)){
		  $i++; next;    }   # <-- when the date is in Nov30 4:39 1995 format,
	  ###############################################################
	  elsif( $whole_prog[$i] =~ /^# +[Uu]sage +[:] +$/ ){
		  $whole_prog[$i] = "# Usage    : $0 @ARGV\n"; next;    }
	  ###############################################################
	  elsif( ($head_found==1)&&($whole_prog[$i]=~ /^# +[Ww]arning +:/) &&
				($whole_prog[$i+1] =~ /^#\-+$/) ){  $head_found=2; last; } }
	  ###############################################################

  #####################
  ###  Final writing ##
  #####################
  if(   $head_found ==2 ){
	  open (SELF, ">$0"); print SELF @whole_prog; close; }
  elsif($head_found ==0){
	  open (SELF, ">$0"); splice(@whole_prog, 1, 0, @head); print SELF @whole_prog; close; }
}

#________________________________________________________________________
# Title     : parse_arguments
# Usage     : &parse_arguments; or  (file1, file2)=@{&parse_arguments};
# Function  : Parse and assign any types of arguments on prompt in UNIX to
#             the various variables inside of the running program.
#             This is more visual than getopt and easier.
#             just change the option table_example below for your own variable
#             setttings. This program reads itself and parse the arguments
#             according to the setting you made in this subroutine or
#             option table in anywhere in the program.
# Example   : &parse_arguments(1);
#             @files=@{&parse_arguments(1)};
# Warning   : HASH and ARRAY mustn't be like = (1, 2,3) or (1,2 ,3)
# Keywords  :
# Options   : '0'  to specify that there is no argument to sub, use
#              &parse_arguments(0);
#             parse_arguments itself does not have any specific option.
#             '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
#             'e=xxxx' for filtering input files by extension xxxx
#
# Returns   : Filenames in a reference of array
#             and input files in an array (file1, file2)=@{&parse_arguments};
# Argument  : uses @ARGV
# Version   : 1.8
#--------------------------------------------------------------------
sub parse_arguments{
  my( $c, $d, $f, $arg_num, $option_table_seen, $n, $option_filtered,
		$option_table_example, $input_line, @input_files,
		$extension);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Checks if there were arguments
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( @ARGV < 1 ){ #<-- If Argument is not given at prompt
	  for(@_){
		 if($_ eq '0'){
			 last;
		 }else{
			 print "\n \"$0\" requires at least one Argument, suiciding.\n\n";
			 print chr(7); #<-- This is beeping
			 print "  To get help type \"$0  h\"\n\n\n ";
			 exit;
		 }
	  }
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  #  Checking some input options like 'e=txt' for extension filtering
  #_____________________________________________________________________
  for($i=0; $i< @_; $i++){
	  if($_[$i]=~/e=(\S+)/){
		  push(@extension, $1);
	  }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Some DEFAULT $debug variables for debugging purposes
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  &set_debug_option;
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  If there is only one prompt arg. and is [-]*[hH][elp]*, it calls
  #   &default_help and exits
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( ( @ARGV == 1 ) && ($ARGV[0] =~ /^[\-]*[hH\?][elp ]*$/) ){
		&default_help;
		exit;
  }
  for($f=0; $f < @ARGV; $f++){
	 if( $ARGV[$f] =~ /\w+[\-\.\w]+$/ and -f $ARGV[$f] ){
		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		 # When extension is defined, filter files by it
		 #____________________________________________
		 if(@extension > 0){
		     for($e=0; $e < @extension; $e++){
				 $extension=$extension[$e];
				 if($ARGV[$f]=~/\S\.$extension/){
					 push(@input_files, $ARGV[$f] );
				 }else{ next }
			 }
		 }else{
			 push(@input_files, $ARGV[$f] );
			 next;
		 }
	 }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #     Reading the running program script
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  &assign_options_to_variables;
  if($HELP == 1){ &default_help }
  return(\@input_files);
}

#________________________________________________________________________
# Title     : assign_options_to_variables
# Usage     : &assign_options_to_variables(\$input_line);
# Function  : Assigns the values set in head box to the variables used in
#             the programs according to the values given at prompt.
#             This produces global values.
#             When numbers are given at prompt, they go to @num_opt
#              global variable. %vars global option will be made
#
# Example   : When you want to set 'a' char to a variable called '$dummy' in
#             the program, you put a head box commented line
#             '#  $dummy    becomes  a  by  -a '
#             Then, the parse_arguments and this sub routine will read the head
#             box and assigns 'a' to $dummy IF you put an argument of '-a' in
#             the prompt.
# Warning   : This is a global vars generator!!!
# Keywords  :
# Options   : '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
# Returns   : Some globaly used variables according to prompt options.
#             @num_opt,
#
# Argument  : None.
# Version   : 2.6
#--------------------------------------------------------------------
sub assign_options_to_variables{
  my($i, %vars, $j, $op, $z, $n, $symb, $value, $var, %val, @val, $ARG_REG,
	 $option_table_example, @input_options, $first_border_and_title, $sym, @arg);

  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #      Defining small variables for option table reading
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($g)='gets';                my($if)='if';
  my($is)='is';                 my(@input_files);
  my($o)='or';   my(@arguments) = sort @ARGV;

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  Assigning global arguments(@num_opt, %vars) variables
  #_______________________________________________________________
  for($i=0; $i< @arguments; $i++){
	 if(($arguments[$i]=~/^(\-?\d+[\.\d+]?)$/)&&   ### it mustn't be a file
		( !(-f $arguments[$i]) ) ){                ### getting NUM opt
		push(@num_opt, $1);
	 }elsif( $arguments[$i]=~/^(\S+)=(\S+)$/){
		$vars{$1}=$2;
	 }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Some DEFAULT $debug variables for debugging purposes
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  &set_debug_option;

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #   The main processing of self
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  open(SELF, "$0");    ## opens the program you ran to get the options table.
  while(<SELF>){

	  if( $first_border_and_title > 6 ){  ## This is to make it read only the first headbox.
		  last;                            #  $first_border_and_title is an incremental counter.
	  }elsif( /^ *#[_\*\-]{15,}$/ and /^ *# *[Tt][itle]*[ :]*/ ){
		  $first_border_and_title++;
		  print __LINE__, "# assign_options_to_variables : Title line found\n" if $debug eq 1;
	  }elsif(/^ {0,5}# {1,50}[\$\%\@].+$/){
		  $op = $&;  ## $op is for the whole input option line which has $xxxx, @xxx, %xxxx format
		  $op =~ s/^( *\# *)(\W\w+.+)$/$2/;  ## This is removing '#  ' in the line.
		  $op =~ s/^(\W\w+.+)(\s+\#.*)$/$1/;  ## This is removing any comments in the line.
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 ## matching the following line input format.
			 ## $av_sc_segment     becomes    a  by  a  # To smooth the SC rates. Gets the averages of
			 ## $ARG_REG is for arguments regular expression variable.
			 ##  This reg. exp. matches = 'a or A or E or e' part
			 ##  which represents alternative prompt arguments possibilities. \=$b$g$is$e$set
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 $ARG_REG ='(\S*) *[or=\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*)';
			 if($op=~/^([\$\@\%])([\w\-]+) {0,20}[=|$g|$is] *[\$\@\%]*([\- \w\.\d]+) *[bB]y +$ARG_REG/){
							 ## $sym     $var        becomes          a [$a...]       by       a -a -A
				  my $sym = $1;  #### The symbols like ($, @, %), '$' in the above.
				  my $var = $2;  #### Actual variable name 'var' from $var, 'av_sc_segment' in the above.
				  my $val = $3;  #### The becoming value  first 'a' in the above.
				  my @arg = ($4, $5, $6, $7, $8);  ## The alternative prompt arguments, second 'a' in the above..
			      print "\n $sym $var $val \n" if $debug==1;
			      print "\n \@arg are @arg \n" if $debug==1;

				  #""""""""""""""""""""""""""""""""""""""""""""""""""""
				  #  Going through the PROMPT args.
				  #""""""""""""""""""""""""""""""""""""""""""""""""""""
				  for($z=0; $z < @arguments; $z++){     ## $arguments[$z]  is from @ARGV
					  if($arguments[$z]=~/^\-\w+$/){
						  $arguments[$z] =~ s/\-//;
					  }
					  for ($i=0; $i < @arg; $i ++ ){
						 if( ("$arg[$i]" eq "$arguments[$z]" )&& ($arg[$i] !~ /\=/)
							 && ($sym eq '$') ){
							 ${"$var"}="$val";
							 if($debug == 1){
								 print __LINE__," \$${var} is set to \"$1\"\n";
							 }

						 }#'''''''''''''''' $arg = by s=  syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~
						 elsif( ( $arg[$i] =~ /^(\w+) *\=/ ) &&
							( $arguments[$z] =~ /^${1}= *([\w\.*\-*]+)$/) &&
							( $sym eq '$') ){
							  ${"$var"}="$1";
							  if($debug eq 1){ print __LINE__,"\$${var} is set to \"$1\"\n";  }
						 }
					  }
				  }
			  }
		}
	}
}



#________________________________________________________________________
# Title     : read_head_box
# Usage     : %entries = %{&read_head_box([\$file_to_read, \@BOXED ] )};
# Function  : Reads the introductory header box(the one you see on top of sub routines of
#             Jong's programs.). Make a hash(associative array) to put entries
#             and descriptions of the items. The hash values have new lines '\n' are
#             attached, so that later write_head_box just sorts Title to the top
#             and prints without much calculation.
#             This is similar to read_head_box, but
#             This has one long straight string as value(no \n inside)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : open_head_box, open_headbox, read_headbox
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Version   : 2.7
#--------------------------------------------------------------------
sub read_head_box{
  my($i, $c, $d, $j, $s, $z, @whole_file, $title_found, %Final_out,
	  $variable_string, $TITLE, $title, @keys, $end_found, $line, $entry,
	  $entry_match, $End_line_num, $remove_blank,  $title_entry_null,
	  $end_found, $Enclosed_entry, $Enclosed_var, $blank_counter,
	  $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

  if(ref($_[0]) eq 'ARRAY'){ ## When array is given
	  @whole_file = @{$_[0]};
  }elsif(-e ${$_[0]}){       ## When filename is given in a ref
	  open(FILE, "${$_[0]}");
	  @whole_file=(<FILE>);
  }elsif(-e $_[0]){          ## When filename is given
	  open(FILE, "$_[0]");
	  @whole_file=(<FILE>);
  }elsif( $_[0] eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }elsif( ${$_[0]} eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }else{
	  open(SELF, "$0");
	  @whole_file=(<SELF>);
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for($i=0; $i<@whole_file; $i++){
	 $whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	 #########################################
	 ##  The first and second line of box 1 ##
	 #########################################
	 if( ($whole_file[$i]=~/^#[_\*\~\-\=]{20,}$/)&&    ##  '#______' is discarded
		 ($whole_file[$i+1]=~/ *\# {0,3}([TitlNam]+e) {0,8}: {1,10}([\w\.:]*) *(Copyright.*)/i) ){
		 $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
		 $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
		 if($TITLE =~ /^Title|Name$/i){   #
			  if( ($title=~/^\s+$/)||( $title eq "\n") ){
				  $title_entry_null =1;  $title = '';  }    }
		 $Final_out{$TITLE}=$title;
		 $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
		 last if $title_found > 1;    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ## The first and second line of box 2, #__________ or #**************
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($whole_file[$i]=~/^#[_\*]{20,}$/)&&
		 ($whole_file[$i+1]=~/^# {1,3}(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */i) ){
		 $title_found ++ ;        $i++;
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
		 $Final_out{$entry_match}.= "$entry_value\n";
		 last if $title_found > 1;  next;   }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  'Enclosed' : section. After this, everything is read without discrimination ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($Enclosed_entry == 1)&&($whole_file[$i] =~/^#{1,} {1,}(.*)$/) ){
		 $Final_out{$Enclosed_var}.= "$1\n";    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With proper entry 1 : for  'eg)'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,12}(eg ?\)) {0,8}(.*)/i)){
		 $entry_match='Example';
		 $Final_out{$entry_match}.= "$2\n";
	 }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,6}(.*) */i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= "$entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;        }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= " $entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;      }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  $option variable matching                ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1) && ($title_found==1) &&
		 ($whole_file[$i]=~ /^# {1,15}([\$\@]\w+ +[\w=\>]+ +\S+ \w+ \S+ *.*)/ )){
		 $Final_out{$entry_match} .= "$1\n";  }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  all space line matching                 ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
		 ($title_found==1)&&($whole_file[$i]=~/^# {0,}$/) ){
		 $blank_counter++;
		 if($blank_counter > 2){ $blank_counter--; }
		 else{ $Final_out{$entry_match}.= " \n";  }     }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 3 space to 12 positions  ##
	 ###  To match 'examples' etc. INC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^#( {2,12})(.+)/) ){
		 $Final_out{$entry_match}.= "$1$2\n"; $blank_counter=0; }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 1 space to 11 positions  ##
	 ###  To match 'examples' etc. EXC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# {1,12}([^:.]+)/) ){
		 $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###-------End of the read_box reading--------##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($title_found==1)&&
		 ($whole_file[$i]=~ /^#[\~\=\*\-]{15,}/)){  ## to match '#-----..' or '#******..'(Astrid's)
		 $End_line_num = $i;       $end_found++;
		 last;      }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  <<<<  Check if there is option table >>>>  #
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  } ## < End of for loop


  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### If title is not there at all     ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  @keys=keys %Final_out;
  for(@keys){
	  if(/^Title$/i){    ## No Entry of Title at all??
		  $TITLE =$&;
		  $title_entry_exist = 1;
		  if($Final_out{$_}=~/^ *$/){   ## if Title => Null or just space
			  $title_entry_null = 1;    }  }  }

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title entry is not there    ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( $title_entry_exist != 1){
		for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
				$Final_out{'Title'} = "$1\n";   last;       }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title is blank              ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
		### $End_line_num is the last line read.
		for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
				$Final_out{$TITLE} = "$1\n";    last;     }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ## Error handling, if no head box is found   ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
  \%Final_out;
}               ##<<--- ENd of the sub read_head_box

#________________________________________________________________________
# Title     : read_first_head_box
# Usage     : %entries = %{&read_first_head_box(\$file_to_read )};
# Function  : Reads the header box(the one you see on top of sub routines of
#             Jong's programs.)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 2.0
#--------------------------------------------------------------------
sub read_first_head_box{
  my($title_found, %Final_out, $variable_string, $end_found, $line,
	  $entry, $entry_match );

  open(SELF, "${$_[0]}");
  my(@whole_file)=(<SELF>);
  for(@whole_file){
	 if($title_found > 2){ ### This is to stop reading the file while it has found a box
		 last;              ### already.
	 }elsif( / *\# *([Tt]*itle) *\: *(.*)$/){
		 $Final_out{$1}=$2;
		 $title_found ++
	 }elsif( ($end_found != 1)&&($title_found==1)&&(/^\#(         +[:]* +)(.+)$/) ){
		 $Final_out{$entry_match}.= "\n $1$2";
		# attaches to the last @entry_list element(ref)
	 }elsif( ($end_found < 1)&&($title_found==1)&&(/ *\# *(\w\w\w+) *\: *(.*)/)){
		 $entry_match=$1;
		 ${"count$1"}++;
		 if( ${"count$1"} > 1){
			 $Final_out{$1}.="\n             $2";
		 }else{
			 $Final_out{$entry_match}.= $2; }

	 ### Following is when entry line '# $certain_var = 1 by t'
	 }elsif( ($end_found != 1) && ($title_found==1) && (/^\# *([\$\@\%]+.+)/) ){
		 $line = $1;
		 if($entry_match =~ /[Oo]ption/){  ## if last entry was '# Option :', attach the variable directly.
			 $Final_out{$entry_match} .= "\n             $line";
		 }else{                            ## if last entry wasn't '# Option :', find Option
			 for $entry (keys %Final_out){  ##  and attach the variable to it
				 if ($entry =~ /[Oo]ption/){
					 $Final_out{$entry} .= "\n             $line";
				 }
			 }
		 }
	 }elsif( ($title_found==1)&&(/ *\#[\*\-]{12,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
		 $end_found++;
	 }elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  }
  \%Final_out;
}                  ##<<--- ENd of the sub read_first_head_box


#________________________________________________________________________
# Title     : read_head_boxes
# Usage     : %entries = %{&read_head_box(\$file_to_read, ,,, )};
# Function  : Reads the introductory header box(the one you see on top of sub routines of
#             Jong's programs.). Make a hash(associative array) to put entries
#             and descriptions of the items.
# Example   : @hashes = @{&read_head_boxes(@ARGV)};
#             $num_of_sub = @hashes;
#             print "\n Number of subs was $num_of_sub\n";
# Warning   :
# Keywords  :
# Options   :
# Returns   : A hash ref.
# Argument  : one or more filenames
# Version   : 1.1
#--------------------------------------------------------------------
sub read_head_boxes{
  my($i, $c, $d, $j, $s, $z, @whole_file, $title_found, %Final_out,
	  $variable_string, $TITLE, $title, @keys, $end_found, $line, $entry,
	  $entry_match, $End_line_num, $remove_blank,  $title_entry_null,
	  $end_found, $Enclosed_entry, $Enclosed_var, $blank_counter,
	  $title_entry_exist, $entry_value, $temp_W, $Warning_part, @arr_of_hash
	);

  for($s=0; $s < @_; $s ++){
	 if( -e $_[$s]){
		open (IN, "$_[$s]");
		my @whole_file=<IN>;
		my ($end_found, $blank_counter, $TITLE, $title, $entry_match );
		for($i=0; $i< @whole_file; $i++){
	$whole_file[$i] =~ tr/\t/ {7}/ if($whole_file[$i] =~/\t/);  ## This is important!
	if( ($whole_file[$i]=~/^#_{50,}|#\*{50,}|#\-{50,}$/)&&
	   ($whole_file[$i+1]=~/ *\# {0,3}([Titl]+e) {0,8}: {0,10}([\-\w\.:]*) *(.*)/i) ){
	   $end_found = 0;
	   $TITLE = $1;
	   $title = "$2\n";
	   $entry_match=$TITLE;
	   if($TITLE =~ /^Title$/i){   # title name is given ?
	       if( ($title=~/^\s+$/)||( $title eq "\n") ){
		  $title_entry_null =1;
		  $title = '';
	       }else{
		  ${"Final_out$title"}{$TITLE}=$title;
		  push(@arr_of_hash, \%{"Final_out$title"} );
		  $title_found =1 ;
		  $i++;
	       }
	   }
	}elsif( ($end_found != 1)&&($title_found==1)&&  ## for eg) line
	   ($whole_file[$i]=~ /^# {1,12}(eg ?\)) {0,8}(.*)/i)){
	   $entry_match='Example';
	   ${"Final_out$title"}{$entry_match}.= "$2\n";
	}elsif( ($end_found != 1)&&($title_found==1)&& ## matching the most
	   ($whole_file[$i]=~ /^# {0,2}(\w{1,5}\s{0,2}\w{1,7}) {0,8}[:\)] {0,5}(.*) */i)){
	   $entry_match=$1;
	   $entry_value=$2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   ${"Final_out$title"}{$entry_match}.= ": $entry_value\n";
		  }elsif( ($end_found != 1) && ($title_found==1) &&
	   ($whole_file[$i]=~ /^# {0,15}([\$\@]\w+ +[\w=\>]+ +\S+ \w+ \S+ *.*)/ )){
	   ${"Final_out$title"}{$entry_match} .= "$1\n";
	}elsif( ($end_found != 1)&&    ## Blank match
	   ($title_found==1)&&($whole_file[$i]=~/^# {0,}$/) ){
	   $blank_counter++;
	   if($blank_counter > 2){ $blank_counter--; }
	   else{ ${"Final_out$title"}{$entry_match}.= "\n";  }
	}elsif( ($end_found != 1)&& ## matching lines without entry.
	   ($title_found==1)&&($whole_file[$i]=~/^#( {1,12})(.+)/) ){
	   ${"Final_out$title"}{$entry_match}.= "$1$2\n";
			  $blank_counter=0;
	}elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^# {1,12}([^:.]+)/) ){
	   ${"Final_out$title"}{$entry_match}.= "$1\n";
			  $blank_counter=0;
	}elsif( ($title_found==1)&&   ## to match '#-----..' or '#*******..'(Astrid's)
	   ($whole_file[$i]=~ /^#[\*\-]{20,}/)){
	   $End_line_num = $i;
	   $end_found=1;
	   $title_found = 0;
	}
		}
	 }
  }
  return(\@arr_of_hash);
}

#________________________________________________________________________
# Title     : read_head_box2
# Usage     : %entries = %{&read_head_box(\$file_to_read )};
# Function  : Reads the header box(the one you see on top of sub routines of
#             Jong's programs.). This is similar to read_head_box, but
#             This has one long straight string as value(no \n inside)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  :
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Version   : 1.5
#--------------------------------------------------------------------
sub read_head_box2{
  my($i, @whole_file, $title_found, %Final_out, $variable_string,
	  $end_found, $line, $entry, $entry_match, $remove_blank, $j );

  if(ref($_[0]) eq 'ARRAY'){ ## When array is given
	  @whole_file = @{$_[0]};
  }elsif(-e ${$_[0]}){       ## When filename is given in a ref
	  open(FILE, "${$_[0]}");
	  @whole_file=(<FILE>);
  }elsif(-e $_[0]){          ## When filename is given
	  open(FILE, "$_[0]");
	  @whole_file=(<FILE>);
  }elsif( $_[0] eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }elsif( ${$_[0]} eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }else{
	  open(SELF, "$0");
	  @whole_file=(<SELF>);
  }

  ########################################################################
  for($i=0; $i<@whole_file; $i++){
	 if( ($whole_file[$i]=~/^#________+$/)&&
		 ($whole_file[$i+1]=~/ *\# *(\w+\s{0,2}\w+) *\: *([\w\.\w]+) */) ){
		 my($entry)=$1;
		 my($title) = "$2\n";
		 $Final_out{$entry}=$title;
		 $title_found ++ ;
		 $i++;  ## << this is essential to prevent reading the same line again.
		 last if $title_found > 1;    }


	 elsif( ($end_found != 1)&&($title_found==1)&&
			  ($whole_file[$i]=~ /^# +(\w\w+\s{0,2}\w+) +:(.*)/)){
		 $entry_match=$1;
		 my($value) = "$2\n";
		 ${"count$1"}++;     ### If there were more than 2 identical entries
		 if( ${"count$1"} > 1){
			 $Final_out{$entry_match}.="$value\n"; ### When there are two or more idential entry
		 }
		 else{
			 if(($value eq '')||($value eq ' ')){
				 unless( $remove_blank == 1){
					 $value= " \n";
				 }
			 }
			 unless( $Final_out{$entry_match} eq $value ){    ## when the entry is not described,
				 $Final_out{$entry_match}.= "$value\n";
			 }
		 }  }

	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# (.+)$/) ){
		 $Final_out{$entry_match}.= "$1\n"; ## To make a long string line.
	 } # attaches to the last @entry_list element(ref)

	 elsif( ($end_found != 1)&&  ##<--------- If blank line is matched. Take the line
		 ($title_found==1)&&($whole_file[$i]=~/^# *$/) ){
		 $Final_out{$entry_match}.= " \n"; ## To make a long string line.
	 }  # attaches to the last @entry_list element(ref)

	 elsif( ($end_found != 1) && ($title_found==1) &&
		 ($whole_file[$i]=~ /^# +([\$\@\%]+.+)/ )){
		 $Final_out{$entry_match} .= "$1\n";

	 }elsif( ($title_found==1)&&
		 ($whole_file[$i]=~ / *\#[\*\-]{14,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
		 $end_found++;
		 last;
	 }elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  }
  if($title_found < 1){
	  print "\nFatal:  No headbox found by read_head_box2 sub.\n";  }
  \%Final_out;
}                  ##<<--- ENd of the sub read_head_box

#________________________________________________________________________
# Title     : read_all_head_boxes
# Usage     : %entries = %{&read_all_head_box(\$file_to_read )};
# Function  : Reads the header boxes(the one you see on top of sub routines of
#             Jong's programs.)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_all_head_boxes{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($title_found, $Title, $TITLE, $title_found, $end_found, $entry_match,
	 %count, $line, $entry, @boxes);
  if( -e $file[0] ){  ### If file is given it opens the file to find correct_head_box
	  open(OPEN_FILE, "$file[0]");  ## If not given, it uses running script to find it.
  }else{   open(OPEN_FILE, "$0"); }

  my(@whole_file)=(<OPEN_FILE>);

  for(@whole_file){
	 if( / *\# *([Tt][IiTtLlEe]+) *\: *([\w+\.\w+]*)/){
		 $Title = $1;
		 $TITLE = $2;
		 ${"$TITLE"}{$Title}=$TITLE;
		 $title_found = 1;
		 $end_found   = 0;
	 }elsif( ($end_found == 0)&&($title_found==1)&&(/^\#(         +[:]* +)(.+)$/) ){
		 ${"$TITLE"}{$entry_match}.= "\n $1$2";
		# attaches to the last @entry_list element(ref)
	 }elsif( ($end_found == 0 )&&($title_found==1)&&(/ *\# *(\w{3,11}) *\: *(.*)/)){
		 $entry_match=$1;
		 $count{"$TITLE$1"} ++;
		 if( $count{"$TITLE$1"} > 1){
			 ${"$TITLE"}{$1}.="\n             $2";
		 }else{
			 ${"$TITLE"}{$entry_match}.= $2; }

	 ### Following is when entry line '# $certain_var = 1 by t'
	 }elsif( ($end_found != 1) && ($title_found==1) && (/^\# *([\$\@\%]+.+)/) ){
		 $line = $1;
		 if($entry_match =~ /[Oo]ption/){  ## if last entry was '# Option :', attach the variable directly.
			 ${"$TITLE"}{$entry_match} .= "\n             $line";
		 }else{                            ## if last entry wasn't '# Option :', find Option
			 for $entry (keys %{"$TITLE"}){  ##  and attach the variable to it
				 if ($entry =~ /[Oo]ption/){
					 ${"$TITLE"}{$entry} .= "\n             $line";
				 }
			 }
		 }
	 }elsif( ($title_found==1)&&(/ *\#[\*\-]{12,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
		 $end_found = 1; $title_found=0;
		 push(@boxes, \%{"$TITLE"});

	 }elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  }
  if(@boxes > 1){ \@boxes; }
  elsif( @boxes==1){ $boxes[0]; }
}                  ##<<--- ENd of the sub read_head_box

#____________________________________________________________________
# Title    : correct_head_box
# Function : Makes headbox in right and updated format. The most
#            updated headbox format is very this headbox. So, to
#            change all other headbox format, change this first.
# Usage    : just type correct_head_box.pl with a file name.
# Example  : correct_head_box.pl Bio.pl
# Argument : a filename
# Returns  :
# Options  :
# Version  : 1.1
# Keywords :
# Warning  :
#---------------------------------------------------------------
sub correct_head_box{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  ## Reading self and the template headbox in this subroutine.
  my %correct_head_box_entry = %{&read_correct_head_box()};
  for($p=0; $p < @file; $p++){
	  $in_file = $file[$p];

	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  ##  Make backup of the input file
	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  &cp( "$in_file", "$in_file\.bak$$");
	  print "\n $in_file\.bak$$ is created as a backup \n\n";
	  print chr(7);

	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  ##  Open files
	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  open(TARGET, "$in_file\.bak$$");
	  open(RESULT, ">$in_file");

	  my(@whole_file) = <TARGET>;
	  my(@keys) = keys %correct_head_box_entry;

	  for($i=0; $i < @whole_file; $i++){   ## <= going through the whole lines
		  my(@BOX);
		  if(($whole_file[$i] =~ /^#___________+/)&&
			  ($whole_file[$i+1] =~ /^# {0,4}([\w+\s*\w+]+) {0,4}: {0,9}([\w+\.\w+]*)/ ) ){
			  my($sub_name)=$2;
			  until( ($whole_file[$i] =~ /^sub +([\w\.]+) *\{/)||($whole_file[$i] =~ /^#\-{15,}/)
			  || ($whole_file[$i] =~ /^#\*{15,}/) ){
				  push(@BOX, $whole_file[$i]);
				  $i++;
			  }

			  ### To get the sub name by reading  'sub xxxxxxx{ ' line after #---------
			  for($z=$i; $z < $i+5; $z++){
				  if($whole_file[$z]=~/^sub +([\w\.]+) *\{/){
					  $sub_name=$1;
					  last;
				  }
			  }

			  my(%Final_out)=%{&read_head_box(\@BOX)};  ## Putting into a hash
			  ### Now I have %Final_out and %correct_head_box_entry
			  my(%correct) =%{&superpose_hash(\%correct_head_box_entry, \%Final_out)};
			  print RESULT @{&write_head_box(\%correct)};

			  until($whole_file[$i]=~/^sub +([\w\.]+) *\{/){  $i++ }
			  if($whole_file[$i]=~/^sub +([\w\.]+) *\{/){
				  until( $whole_file[$i] =~ /^}/){
					  print RESULT $whole_file[$i];
					  $i++;
				  }
				  print RESULT $whole_file[$i];
			  }
		  }elsif($whole_file[$i]=~/^sub +([\w\.]+) *\{/){  ### When there is no headbox at all.
			  $correct_head_box_entry{'Title'}=$1;
			  $correct_head_box_entry{'Version'}='1.0';
			  $correct_head_box_entry{'Author'}=getlogin;
			  print RESULT @{&write_head_box(\%correct_head_box_entry)};
			  print RESULT $whole_file[$i++];
			  until( $whole_file[$i] =~ /^}/){
				  print RESULT $whole_file[$i];
				  $i++;
			  }
			  print RESULT $whole_file[$i];

		  }else{  print RESULT $whole_file[$i]; }
	  }
	}
}

#________________________________________________________________________
# Title     : read_correct_head_box
# Usage     :
# Function  : This reads correct_head_box only.
# Example   :
# Keywords  : read_update_head_box, read update headbox
# Options   : v  for verbose message printing.
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_correct_head_box{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my(%Final_out, $title_found, $end_found, $entry_match, $title, $TITLE,
	 $blank_counter, $end_found);
  my $UID = getlogin;

  if( -e $file[0] ){  ### If file is given it opens the file to find correct_head_box
	  open(OPEN_FILE, "$file[0]");  ## If not given, it uses running script to find it.
  }else{   open(OPEN_FILE, "$0"); }

  my(@whole_file)=(<OPEN_FILE>);
  my($correct_head_box_name)  = 'correct_head_box';

  DO_IT_AGAIN_WITH_DIFF_NAME:
  for($i=0; $i < @whole_file; $i ++){

	 $whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!

	 #######################################
	 ##  The first and second line of box ##
	 #######################################
	 if( ($whole_file[$i]=~/^#____________+$/)&&    ##  '#______' is discarded
		 ($whole_file[$i+1]=~/ *\# {0,3}([TitlNam]+e) {0,8}: ([read_]*correct_head_box[\.pl]*)/i) ){
		 $TITLE = $1;
		 $title = "$2\n";
		 if($TITLE =~ /^Title|Name$/i){   #
			  if( ($title=~/^\s+$/)||( $title eq "\n") ){
				  $title_entry_null =1;
				  $title = '';
			  }
		 }
		 $Final_out{$TITLE}=$title;
		 $title_found ++ ;
		 $i++;  ## << this is essential to prevent reading the same line again.
		 last if $title_found > 1;    }

	 ############################################################################
	 ## The first and second line of box 2
	 ############################################################################
	 elsif( ($end_found != 1)&&($whole_file[$i]=~/^#____________+$/)&&
		 ($whole_file[$i+1]=~/^# {1,3}(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */) ){
		 $title_found ++ ;
		 $i++;
		 $entry_match=$1;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= "$2\n";
		 last if $title_found > 1;  next;   }

	 ##################################################################################
	 ##  'Enclosed' : section. After this, everything is read without discrimination ##
	 ##################################################################################
	 elsif( ($Enclosed_entry == 1)&&($whole_file[$i] =~/^#{1,} {1,}(.+)$/) ){
		 $Final_out{$Enclosed_var}.= "$1\n";    }

	 ############################################################################
	 ##  With proper entry 1 : descriptins like. 'Ussage : ssssssxxjkk  kj'      ##
	 ############################################################################
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}: {1,5}(.*) */)){
		 $entry_match=$1;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= "$2\n";
		 if($entry_match=~/^(Enclosed*)$/i){
			 $Enclosed_entry = 1;  $Enclosed_var=$1;
		 }
	 }

	 ##############################################################################
	 ##  With proper entry 2 : descriptins like. 'Usage :', But blank description ##
	 ##############################################################################
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,9}: {0,}$/)){
		 $entry_match=$1;
		 $Final_out{$entry_match}.= "\n";
		 if($entry_match=~/^(Enclosed*)$/i){
			 $Enclosed_entry = 1;  $Enclosed_var=$1;
		 }
	 }

	 ###############################################
	 ### $option variable matching                ##
	 ###############################################
	 elsif( ($end_found != 1) && ($title_found==1) &&
		 ($whole_file[$i]=~ /^# {1,15}([\$\@]\w+ +[\w=\>]+ +\S+ \w+ \S+ *.*)/ )){
		 $Final_out{$entry_match} .= "$1\n";  }

	 ###############################################
	 ###  all space line matching                 ##
	 ###############################################
	 elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
		 ($title_found==1)&&($whole_file[$i]=~/^# {0,}$/) ){
		 $blank_counter++;
		 if($blank_counter > 2){ $blank_counter--; }
		 else{ $Final_out{$entry_match}.= " \n";  }
	 }

	 ###############################################
	 ###  Anything after 3 space to 12 positions  ##
	 ###  To match 'examples' etc. INC. ':'       ##
	 ###############################################
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# {2,12}(.+)/) ){
		 $Final_out{$entry_match}.= "$1\n"; $blank_counter=0; }

	 ###############################################
	 ###  Anything after 1 space to 11 positions  ##
	 ###  To match 'examples' etc. EXC. ':'       ##
	 ###############################################
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# {1,12}([^:.]+)/) ){
		 $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	 ###############################################
	 ###-------End of the read_box reading--------##
	 ###############################################
	 elsif( ($title_found==1)&&
		 ($whole_file[$i]=~ /^#[\*\-]{15,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
		 $End_line_num = $i;
		 $end_found++;
		 last;      }

	 ###############################################
	 #  <<<<  Check if there is option table >>>>  #
	 ###############################################
	 elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  }
  @keys=keys %Final_out;
  for($i=0; $i < @keys; $i++){
	  if(   $keys[$i] =~/^Example/i){    $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Version/i){    $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Function/i){   $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Options/i){    $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Keywords/i){   $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Class/i){      $Final_out{$keys[$i]}=''; }
	  elsif($keys[$i] =~/^Author/i){     $Final_out{$keys[$i]}=$UID; }
  }
  return(\%Final_out);
}

#____________________________________________________________________
# Title    : write_head_box
# Function : gets a hash ref. and writes the head box for a subroutine
# Usage    :
# Example  :
# Argument :
# Returns  :
# Options  : v  for verbose representation. This will print boxes on STDOUT
#            n  for no '#' leader.
# Version  : 2.2
# Keywords : write_headbox
#---------------------------------------------------------------
sub write_head_box{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($KL)=9;  my($VL)=60; ## key length( like in  # Title )  ## value length
  $num = 80;
  if($char_opt =~ /n/i){
	  $CS=' '; ## Comment symbol. For help display, you can change into ' '
  }else{
	  $CS='#'; ## Comment symbol. Default head_box display.
  }
  for($x=0; $x < @hash; $x++){
		my(%input) = %{$hash[$x]};  my(@keys)= sort (keys %input);
		my(@out);
		###########################################################################
		##  PUTTING an order in the printout entries. To make 'Title' come first
		###########################################################################
		for($i=0; $i < @keys; $i++){
		  if($keys[$i]=~/^Title/i){
			  $temp=$keys[0];	   $keys[0]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Enclosed?/i){
			  $temp=$keys[$#keys];	   $keys[$#keys]=$keys[$i];   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Usage$/i){
			  $temp=$keys[1];	   $keys[1]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Function/i){
			  $temp=$keys[2];	   $keys[2]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Example/i){
			  $temp=$keys[3];	   $keys[3]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Version/i){
			  $temp=$keys[$#keys-2];  $keys[$#keys-2]=$keys[$i];   $keys[$i]=$temp;
			  #### To make null version value to '1.0'
			  if($input{$keys[$#keys-2]}=~/^ *$/){ $input{$keys[$#keys-2]}='1.0'; }
		  }elsif($keys[$i]=~/^Warning/i){
			  $temp2=$keys[$#keys-1]; $keys[$#keys-1]=$keys[$i];   $keys[$i]=$temp2;
		  }
		}
		############################################################
		##       Writing starting line                            ##
		############################################################
		my($start_line) = "$CS".'_'x"$num"."\n";
		if( $char_opt =~ /v/i){
			print $start_line;      }

		my($Enclosed_came);  ## <<-- This should be HERE !
		for( $i =0; $i < @keys; $i++){  #### @keys has been sorted before.
		  my($Len) = length($input{$keys[$i]});  $delimiter = ':';
		  my($entry) = $keys[$i];
		  $entry =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e; ## capitalizing word
		  if($entry=~/^Enclosed?$/i){ $Enclosed_came = 1; }
		  my(@input) = split(/\n+/, $input{$keys[$i]});
		  if(@input > 0){
			  for($j =0; $j < @input; $j++){
				 ## If NO entry name(blank) is given    ##
				 if($j > 0){  ## If the value is a multi line.
					  $entry = '';   $delimiter=' ';    }
				 if( $char_opt =~ /v/i){
					  ######################################################################
					  ###  This is to reduce the entry length of Enclosed content lines   ##
					  ######################################################################
					  if( ($Enclosed_came==1)&&($entry eq '') ){ $KL=2; $VL=80; }
					  printf("$CS %-${KL}s $delimiter %-${VL}s\n", $entry , $input[$j]);    }
				 if( ($Enclosed_came==1)&&($entry eq '') ){ $KL=2; $VL=80; }
				 $out[$k++]=sprintf("$CS %-${KL}s $delimiter %-${VL}s\n", $entry , $input[$j]);
				 if($entry=~/^Enclosed?/){ $Enclosed_came = 1; }   }}
		  ######################################################################
		  ##   If the entries have null descriptions, just print entries  ######
		  ######################################################################
		  elsif(@input ==0){
				 if( $char_opt =~ /v/i){
					  printf("$CS %-${KL}s $delimiter %-${VL}s\n", $entry , ' ');   }
				 $out[$k++]=sprintf("$CS %-${KL}s $delimiter %-${VL}s\n", $entry , ' ');
		  }
		}
		############################################################
		##       Writing  Ending  line                            ##
		############################################################
		$end_line = "$CS".'-'x"$num"."\n";
		if( $char_opt =~ /v/i){   print $end_line;   }
		unshift(@out, $start_line);   push(@out, $end_line);   push(@Final_out, \@out);
  }
  if(@Final_out > 1){ @Final_out; }
  elsif( @Final_out==1){ $Final_out[0] }
} #<--- END of write_head_box



#________________________________________________________________________
# Title     : read_option_table
# Usage     :
# Function  : Reads the option table made by Jong in any perl script. The
#             option table is a box with separators.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_option_table{
	my($table_found, @option_tb, $head);
	 open(SELF, "${$_[0]}");
	 while(<SELF>){
		if( (/^ *#+/) && ( $table_found== 1) ){
		  push (@option_tb, "$_");
		}elsif( ($table_found != 1)&&(/^ *\#+ *[Oo]ption *[Tt]able */) ){
			$table_found=1; $head="############## Option Table for $logname\'s \"$0\"\n"; ##
			push(@option_tb, $head);
		}
		if( ($table_found==1)&&(/^ *###################+ *$/)){  ### to find the end point of reading
			$table_found =0; last; }
	 }
	 return(\@option_tb);
}



#________________________________________________________________________
# Title     : default_help
# Usage     : &default_help2;  usually with 'parse_arguments' sub.
# Function  : Prints usage information and others when invoked. You need to have
#             sections like this explanation box in your perl code. When invoked,
#             default_help routine reads the running perl code (SELF READING) and
#             displays what you have typed in this box.
#             After one entry names like # Function :, the following lines without
#             entry name (like this very line) are attached to the previous entry.
#             In this example, to # Function : entry.
# Example   : &default_help2; &default_help2(\$arg_num_limit);   &default_help2( '3' );
#             1 scalar digit for the minimum number of arg (optional),
#             or its ref. If this defined, it will produce exit the program
#             telling the minimum arguments.
# Warning   : this uses format and references
# Keywords  :
# Options   :
# Returns   : formated information
# Argument  :
# Version   : 3.3
#--------------------------------------------------------------------
sub default_help{
  my($i, $perl_dir, $arg_num_limit, $head ,$arg_num_limit,
	  @entries, @entries_I_want_write );
  my($logname)=getlogin();
  my($pwd)=`pwd`;
  my($date)=`date`;
  chomp($date,$pwd);
  my($not_provided)="--- not provided ---\n";
  my($file_to_read) = $0;

  for($i=0; $i < @_; $i ++){
	  if((ref($_[$i]) eq 'SCALAR')&&(${$_[$i]} =~ /^\d$/)){
		  $arg_num_limit = ${$_[$i]};  }
	  elsif( (!(ref($_[$i]))) && ($_[$i] =~ /^\d$/)){
		  $arg_num_limit = $_[$i];     }
  }
  my %entries = %{&read_head_box(\$file_to_read )};
  if($option_tb_found ==1){
	 @option_tb=@{&read_option_table(\$file_to_read)};
  }

  @entries = keys %entries;
  foreach $help_item (@entries){
	  ${$help_item}= $not_provided if( ${$help_item}=~/^[\W]*$/  and  !defined(${$help_item}) );
  }
  #""""""""""""""""""""""""""""""""""""""""
  #########  Writing the format <<<<<<<<<<<
  #""""""""""""""""""""""""""""""""""""""""
  $~ =HEADER_HELP;
  write;   ## <<--  $~ is the selection operator
  $~ =DEFAULT_HELP_FORM;

  @entries_I_want_write=sort keys %entries;

  for( @entries_I_want_write ){  write  }

  print chr(7);  print "_"x72,"\n\n";

  if(@ARGV < $arg_num_limit){ print "\* $0 fataly needs $arg_num_limit arguments\n\n" }

  if(  $option_tb_found == 1){
	 #########  Printing the OPTION table contents <<<<<<<<<<<<
	 print "  Press \"Return\" key to see what options $logname\'s \n\n    \"$0\" take... \n";
		 $key_press=getc();
	 print @option_tb, "\n"x2 if(@option_tb > 0);
  }
format HEADER_HELP  =
_____________________________________________________________________
		  __  __      ______     __          _____
		 /\ \/\ \    /\  ___\   /\ \        /\  _ `\
		 \ \ \_\ \   \ \ \__/   \ \ \       \ \ \L\ \
		  \ \  _  \   \ \  _\    \ \ \       \ \ ,__/
		   \ \ \ \ \   \ \ \/___  \ \ \_____  \ \ \/
		    \ \_\ \_\   \ \_____\  \ \______\  \ \_\
		     \/_/\/_/    \/_____/   \/______/   \/_/ V 3.1`
_____________________________________________________________________
.
format DEFAULT_HELP_FORM =
 @<<<<<<<<<: @*
 $_        $entries{$_}
.
}



#________________________________________________________________________
# Title     : default_help_old
# Usage     : &default_help;  usually with 'parse_arguments' sub.
# Function  : prints usage information and others when invoked. You need to have
#             sections like this explanation box in your perl code. When invoked,
#             default_help routine reads the running perl code (self reading) and
#             displays what you have typed in this box.
#             After one entry names like # Function :, the following lines without
#             entry name (like this very line) are attached to the previous entry.
#             In this example, to # Function : entry.
# Example   : &default_help; &default_help(\$arg_num_limit);   &default_help( '3' );
# Warning   : this uses format and references
# Keywords  :
# Options   :
# Returns   : formated information
# Argument  : 1 scalar digit for the minimum number of arg (optional),
#             or its ref. If this defined, it will produce exit the program
#             telling the minimum arguments.
# Version   : 2.0
#--------------------------------------------------------------------
sub default_help_old{
  my($i, $perl_dir, $title_found,$usage, $prog_title);
  my($version, $author, $package, $warning, $tips, @option_tb);
  my($arg_num_limit, $option, $pwd,$function,$argument );
  my($file_to_read, $returns, $end_found,$example, $head);
  my($logname)=getlogin(); my($pwd)=`pwd`; my($date)=`date`; chomp($date,$pwd);
  my($not_provided)="--- not provided ---\n";

  for($i=0; $i < @_; $i ++){
	  if((ref($_[$i]) eq 'SCALAR')&&(${$_[$i]} =~ /^\d$/)){
		  $arg_num_limit = ${$_[$i]};  }
	  elsif( (!(ref($_[$i]))) && ($_[$i] =~ /^\d$/)){
		  $arg_num_limit = $_[$i];     }
  }

  #################################################################
  ##########       SElf reading part       ########################
  #################################################################

  $file_to_read = $0;
  &read_head_box_old(\$file_to_read);
  sub read_head_box_old{
	 my(@entry_list, $help_item, $temp, $usa, $exa, $war,
		 $opt, $arg, $ret, $fun, $tip, $aut, $ver, $pac);
	 open(SELF, "${$_[0]}");
	 while(<SELF>){
		if(/\#\![\/]*([\w\/]+) *([-\w]*) *([-\w]*) *([-\w]*) *([-\w]*)/){ ## matching #!/nfs/ind5/usr/bin/perl
			$perl_dir = $1;  $perl_options=join(" ", ($2,$3,$4,$5) );
			push(@entry_list, \$perl_options); next; }
		elsif( ($title_found != 1)&&(/ *\# *[Tt]*itle *\: *(.*)$/) ){
			$prog_title=$1; $title_found=1  }
																		 #          :
		elsif( ($end_found != 1)&&($title_found==1)&&(/^\#         +[:]* +(.+)$/) ){
			$temp ="\n                $1";         # this is to handle explanation without names.
			${$entry_list[$#entry_list]} .=$temp; }# attaches to the last @entry_list element(ref)

		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Ff]unction *\: *(.*)/)){
			$fun++; if($fun > 1){ $function.="\n                $1";}else{ $function.=$1  }
			push (@entry_list, \$function)  }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Uu]sage *\: *(.*)/)){
			$usa++; if($usa > 1){ $usage.="\n                $1";}else{ $usage.=$1  }
			push (@entry_list, \$usage)     }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Ee]xample *\: *(.*)/)){
			$exa++; if($exa > 1){ $example.="\n                $1";}else{ $example.=$1  }
			push (@entry_list, \$example )  }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Aa]rgument[s]* *\: *(.*)/)){
			$arg++; if($arg > 1){ $argument.="\n                $1";}else{ $argument.=$1 }
			push (@entry_list, \$argument)  }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Oo]ption[s]* *\: *(.*)/)){
			$opt++; if($opt > 1){ $option.="\n                $1";}else{ $option.=$1 }
			push (@entry_list, \$option)  }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Rr]eturn[s]* *\: *(.*)/)){
			$ret++; if($ret > 1){ $returns.="\n                $1";}else{ $returns.=$1  }
			push (@entry_list, \$returns)   }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Tt]ips *\: *(.*)/)){
			$tip++; if($tip > 1){ $tips.="\n                $1";}else{ $tips.=$1 }
			push (@entry_list, \$tips)      }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Aa]uthor *\: *(.*)/)){
			$aut++; if($aut > 1){ $author.="\n                $1";}else{ $author.=$1 }
			push (@entry_list, \$author)      }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Vv]ersion *\: *(.*)/)){
			$ver++; if($ver > 1){ $version.="\n                $1";}else{ $version.=$1 }
			push (@entry_list, \$version)      }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Pp]ackage *\: *(.*)/)){
			$pac++; if($pac > 1){ $package.="\n                $1";}else{ $package.=$1 }
			push (@entry_list, \$package)      }
		elsif( ($end_found != 1)&&($title_found==1)&&(/ *\# *[Ww]arning *\: *(.*)/)){
			$war++; if($war > 1){ $warning.="\n                $1";}else{ $warning.=$1  }
			push (@entry_list, \$warning)   }
		elsif( ($end_found != 1)&&($title_found==1)&&(/^ *[\#]+------------------------*/)){
			$end_found = 1;    }

		if( (/^ *#+/) && ( $table_found== 1) ){ push (@option_tb, "$_"); }
		elsif( ($table_found !=1)&&(/^ *\#+ *[Oo]ption *[Tt]able/) ){
			$table_found=1; $head="############## Option Table for $logname\'s \"$0\"\n"; ##
			push(@option_tb, $head); }

		if( ($table_found==1)&&(/^ *###################+ *$/)){  ### to find the end point of reading
			$table_found =0; last;}
	 }
  } ##<<--- ENd of the sub read_head_box


  foreach $help_item (@entry_list)  ## substituing with 'Not provided' message when there is no info
  {
	  ${$help_item}= $not_provided if( (${$help_item}=~/^[\W]*$/)||( !defined(${$help_item})) );
  }
  @argus = @ARGV;
  shift(@argus) if (-f $argus[0]);
  $options = join(" ", @argus); #### to show what option you put, later.

  #########################################
  #########  Writing the format <<<<<<<<<<<
  #########################################
  $~ =HEADER_HELP; write;   ## <<--  $~ is the selection operator
  $~ =DEFAULT_HELP; write;
  print chr(7); print "\n\n";

  if(@ARGV < $arg_num_limit){ print "\* $0 fataly needs $arg_num_limit arguments\n\n" }

  #########  Printing the OPTION table contents <<<<<<<<<<<<
  print "  Press \"Return\" key to see what options $logname\'s \n\n    \"$0\" take... \n";
	  $key_press=getc();
  print @option_tb, "\n"x2 if(@option_tb > 0);

format HEADER_HELP  =
_____________________________________________________________________________
				  __  __      ______      __           _____
				 /\ \/\ \    /\  ___\    /\ \         /\  _ `\
				 \ \ \_\ \   \ \ \__/    \ \ \        \ \ \L\ \
				  \ \  _  \   \ \  _\     \ \ \        \ \ ,__/
					\ \ \ \ \   \ \ \/___   \ \ \_____   \ \ \/
					 \ \_\ \_\   \ \_____