#!/usr/bin/perl
# Last Update by /gn0/jong/Perl/update_subroutines.pl: Tue May  6 23:09:13 BST 1997
#_________________________________________________________________________
# Title     : write_pdbg_files.pl
# Usage     : write_pdbg_files.pl PDB40D_FASTA.fa
# Function  : gets the classificaiton of scop in pdb40d.fa like file and
#             produces pdb40d.pdbg file.
#
#             1.1.1.1.4  means: Class.Fold.Superfamily.Family.Protein
#
# Example   :
# Input>
#   >d2sn3__ 7.3.6.1.1 scorpion toxin [Centruroides sculpturatus ewing, variant 3]
#   KEGYLVKKSDGCKYGCLKLGENEGCDTECKAKNQGGSYGYCYAFACWCEGLPESTPTYPL
#
# OUTPUT>
#   >d2cmd_1 3.18.1.5.2 (1-145) Malate dehydrogenase [Escherichia coli]
#   >d2naca2 3.18.1.4.1 (148-335) Formate dehydrogenase [Pseudomonas sp. 101]
#
# Keywords  : make_groups_from_pdb40d, make_groups_from_scop, make_superfamilies
#             write_pdbg_files, make_pdbg_files, make_pdb_group_files,
#             write_pdbg, make_groups_from_pdb_entries
# Options   :
#    $write_non_redundant_seq_file_version=n by n
# Returns   :
# Argument  :
# Version   : 1.0
#------------------------------------------------------------------------

@files=@{&parse_arguments(1)};
print "\n# (i) Input files were @files\n";
&write_pdbg_files(\@files, $write_non_redundant_seq_file_version);

#______________________________________________________________________
# Title     : write_pdbg_files
# Usage     : &write_pdbg_files(\@files);
# Function  : gets the classificaiton of scop in pdb40d.fa like file and
#             produces pdb40d.pdbg file.
#
#             1.1.1.1.4  means: Class.Fold.Superfamily.Family.Protein
#
# Example   :
# Input>
#   >d2sn3__ 7.3.6.1.1 scorpion toxin [Centruroides sculpturatus ewing, variant 3]
#   KEGYLVKKSDGCKYGCLKLGENEGCDTECKAKNQGGSYGYCYAFACWCEGLPESTPTYPL
#
# OUTPUT>
#   >d2cmd_1 3.18.1.5.2 (1-145) Malate dehydrogenase [Escherichia coli]
#   >d2naca2 3.18.1.4.1 (148-335) Formate dehydrogenase [Pseudomonas sp. 101]
#
# Keywords  : make_groups_from_pdb40d, make_groups_from_scop, make_superfamilies
#             write_pdbg_files, make_pdbg_files, make_pdb_group_files,
#             write_pdbg, make_groups_from_pdb_entries
# Options   :
#    $write_non_redundant_seq_file_version=n by n
#
# Returns   :
# Argument  :
# Version   : 1.3
#--------------------------------------------------------------------------
sub write_pdbg_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($write_non_redundant_seq_file_version);

    if($char_opt=~/n/){ $write_non_redundant_seq_file_version='n' }

    for($i=0; $i< @file; $i++){
       my(%superfamily_groups, $seq, $classification, $class, $fold, $superfamily,
          $family, $protein, $description, $group_num_counter, @keys, @values,
          $all_seq_in_input_classification, $num_of_singlets, $all_possible_pair_combination,
          $alpha_class, $beta_class, $alpha_and_beta_class, $alpha_plus_beta_class,
          %non_redundant_seqs);

       $base=${&get_base_names($file[$i])};
       $out_pdbg_file="$base\.pdbg";
       print "\n# (i) \$out_pdbg_file is $out_pdbg_file from $base $file[$i]\n";
       $non_redundant_seq_fasta_outputfile="$base\_non_redun\.mpfa"; # mpfa: multi, protein fasta file
       open(PDB_SEQ_FILE, "$file[$i]");

       while(<PDB_SEQ_FILE>){  # matching  1.1.1.1.4  <-- classification,
          if(/\> {0,3}(\S+)[\t ]+((\d+)\.(\d+)\.(\d+)\.(\d+)\.(\d+))[\t ]*(.*)/){
              $seq=$1;
              $classification=$2;
              $class = $3;
              $fold  = $4;
              $seq_check{$seq}++;
              if($seq_check{$seq} > 1){ print "\n# (E) BUGG! $seq occurred $seq_check{$seq} TIMES!!";
                  $seq_name_with_classification='';
                  $write_non_redundant_seq_file_version='n'; # preventing writing non-redundant seq fasta file
                  next;
              }else{
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Following assigns seq to hash to make non-redundant seqname-> seq hash. To prevent Tim's bug of redundancy in pdb95 etc
                  #_______________________________________________________________________________________
                  $seq_name_with_classification="$seq $classification";
              }
              $superfamily=$5;
              $family=$6;
              $protein=$7;
              $description=$8;
              $fold_groups{"$class\.$fold"}.="\>$seq $classification $description\n";
              $superfamily_groups{"$class\.$fold\.$superfamily"}.="\>$seq $classification $description\n";
              $superfamily_member_count{"$class\.$fold\.$superfamily"}++;
              $all_seq_in_input_classification++;
           }elsif(/(\S+)/){
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Assigns seq to hash to make non-redundant seqname-> seq hash. To prevent Tim's bug of redundancy in pdb95 etc
              #_______________________________________________________________________________________
              if($seq_name_with_classification){
                  $non_redundant_seqs{$seq_name_with_classification}.=$1;
              }
              print "\n# !! $0: This line is not suitable for pdbg conversion !!" if $verbose;
           }
        }
        close PDB_SEQ_FILE;

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Writing PDBG file
        #__________________________________________________________________________
        open(PDBG, ">$out_pdbg_file");
            @keys= sort keys %superfamily_groups;
            for($j=0; $j < @keys; $j++){
                $superfamily_member_count = $superfamily_member_count{$keys[$j]};

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~`
                # Calculates the possible combination here    (X*(X-1))/2
                #_____________________________________________________________
                $possible_combination=$superfamily_member_count*($superfamily_member_count-1)/2;
                $all_possible_pair_combination +=$possible_combination;
                print "\n# \$possible_combination for $keys[$j] family is $possible_combination";

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Following is to calculate the number of supfam members in each class
                #________________________________________________________________________
                if($keys[$j]=~/^((\d+)\.(\d+))\.\d+/){
                    $super_fam_num_in_class{$2}++;
                }

                if($keys[$j]=~/^1\./){  ## alfa class
                    $alpha_class+=$possible_combination;
                }elsif($keys[$j]=~/^2\./){  ## beta class
                    $beta_class+=$possible_combination;
                }elsif($keys[$j]=~/^3\./){  ## beta class
                    $alpha_and_beta_class+=$possible_combination;
                }elsif($keys[$j]=~/^4\./){  ## beta class
                    $alpha_plus_beta_class+=$possible_combination;
                }
                if($superfamily_member_count{$keys[$j]} < 2){
                      $num_of_singlets ++;   next; ## skipping singlets
                }else{
                      print PDBG $superfamily_groups{$keys[$j]}, "\n";
                      $group_num_counter++;
                }
          }
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Following for loop is to get numb of folds in each class
          #___________________________________________________________
          @keys= keys %fold_groups;
          for($j=0; $j < @keys; $j++){

          $fold_member_count = $fold_member_count{$keys[$j]};
          $possible_combination=$fold_member_count*($fold_member_count-1)/2;
          $all_possible_pair_combination +=$possible_combination;
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Following is to calculate the number of supfam members in each class
          #________________________________________________________________________
          if($keys[$j]=~/^(\d+)\.\d+ *$/){
              $fold_num_in_class{$1}++;
          }
       }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Writing %non_redundant_seqs making NEW fasta file from input fasta
       #____________________________________________________________________
       if($write_non_redundant_seq_file_version){
          &write_fasta(\%non_redundant_seqs, \$non_redundant_seq_fasta_outputfile);
       }

       my $num_of_seq_in_group=$all_seq_in_input_classification-$num_of_singlets;

       print "\n# No of Groups                : $group_num_counter";
       print "\n# No of All seq in the input  : $all_seq_in_input_classification";
       print "\n# No of singlets              : $num_of_singlets";
       print "\n# No of seq in the $group_num_counter groups : $num_of_seq_in_group";
       print "\n# No of all poss pair combi   : $all_possible_pair_combination";
       print "\n# No of poss alpha class combi: $alpha_class";
       print "\n# No of poss beta  class combi: $beta_class";
       print "\n# No of poss a/b   class combi: $alpha_and_beta_class";
       print "\n# No of poss a+b   class combi: $alpha_plus_beta_class";
       print PDBG "\n# No of Groups: $group_num_counter";
       print PDBG "\n# No of All seq in the input: $all_seq_in_input_classification";
       print PDBG "\n# No of singlets: $num_of_singlets";
       print PDBG "\n# No of seq in the $group_num_counter groups: $num_of_seq_in_group";
       print PDBG "\n# No of all poss. pair combi  : $all_possible_pair_combination\n";
       print PDBG "\n# No of poss alpha class combi: $alpha_class";
       print PDBG "\n# No of poss beta  class combi: $beta_class";
       print PDBG "\n# No of poss a/b   class combi: $alpha_and_beta_class";
       print PDBG "\n# No of poss a+b   class combi: $alpha_plus_beta_class";
       @classes=keys %super_fam_num_in_class;
       for($j=0; $j< @classes; $j++){
          print "\n# No of superfam in  class $classes[$j]  : $super_fam_num_in_class{$classes[$j]}";
          print PDBG "\n# No of superfam in  class $classes[$j]  : $super_fam_num_in_class{$classes[$j]}";
       }
       #@classes=keys %fold_num_in_class;
       for($j=0; $j< @classes; $j++){
          print "\n# No of folds in     class $classes[$j]  : $fold_num_in_class{$classes[$j]}";
          print PDBG "\n# No of folds in     class $classes[$j]  : $fold_num_in_class{$classes[$j]}";
       }

       print  "\n# $out_pdbg_file has been written by $0\n\n";
       if($write_non_redundant_seq_file_version){
          print "\n# (i) $non_redundant_seq_fasta_outputfile is ALSO made\n";
       }else{
          print "\n# (i) write_pdbg_files: additional info: There was no redundancy in fasta file\n";
       }
       close(PDBG);
   }
}


#________________________________________________________________________
# Title     : write_fasta
# Usage     : many argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
# Function  : writes multiple seqs. in fasta format (takes one or more seq.!!)
#             This needs hash which have 'name' 'actual sequence as value'
#
#             To print out each fasta seq into each single file, use write_fasta_seq_by_seq
#             This can rename seq names
#     File Exts:
#             xxxx.fa   <--- generic FASTA,
#             xxxx.mfa  <--- multiple FASTA seqs format file extension
#             xxxx.pfa  <--- protein seq FASTA
#             xxxx.nfa  <--- Nuclueotide seq FASTA
#             xxxx.sfa  <--- single fasta seq FASTA
#             xxxx.mpfa <--- multiple protein. seq. FASTA
#             xxxx.mnfa <--- multiple Nucl. seq. FASTA
#
# Example   : &write_fasta(\%in1, \$out_file_name, \%in2, \%in3,..., );
#             << The order of the hash and scalar ref. doesn't matter. >>
# Warning   : The default output file name is 'default_out.fa' if you do not
#             specify output file name.
#             OUTput file should have xxxxx.fa or xxxx.any_ext NOT just 'xxxxx'
# Keywords  : write_fasta_file, print_fasta_file, write fasta file, fasta_write
#             show_fasta, write_sequence_fasta, write_fasta_files,
# Options   : v for STD out.
#             r for rename the sequences so that Clustalw would not complain with 10 char limit
#               so result wuld be:  0 ->ASDFASDF, 1->ASDFASFASF, 2->ADSFASDFA
#       $write_pure_seq_only=o by o -o  ## writing only the seq (no gap chars or space)
# Returns   :
# Argument  :
#   $sort_seq_names=s by s  ## in writing sorted sequences are written
#   $write_rv_seq_as_well=R by R  # write reverse seq as well as forward seq
#   $gap_char= by g=
#   $write_Protein_seq_only=P by P
# Category  :
# Version   : 3.5
#--------------------------------------------------------------------
sub write_fasta{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

    my($sort_seq_names, $string, $string_leng, $na,$out_file_name_provided,
       $write_pure_seq_only, $write_rv_seq_as_well, $output_file_rv,$hash_num,
       @files_made_with_rv, @files_made, %hash, $seq, $sec_str_hash_form_given,
       $write_Protein_seq_only);
    my($output_file) ='default_out.fa'; ### when no output file name is given, this is used
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # Try and determin the output file name
    #________________________________________________________
    if(@file>0){
        if($file[0]=~/\S\.[mpsn]{0,2}fa[sta]?$/){ ## to handle .nfa and .fa files
            $output_file = $file[0];   $out_file_name_provided=1;
            if(-s $output_file){
                rename($output_file, "$output_file\.bak");
                print "\n# (INFO) $output_file is present. $output_file\.bak will be created for backup\n";
            }
        }elsif($file[0]=~/(\S+)\.\S+/){
            print "\n# (i) Your input filename for \"write_fasta\" did not match xxxxx.[mpsn]fa[sta] format\n";
            $output_file = "$1\.fa";   $out_file_name_provided=1;
        }
    }else{ $output_file='default_out.fa'; }

    if($vars{'g'}=~/(\S+)/){ $gap_char=$1;        }
    if($char_opt=~/s/){ $sort_seq_names='s';        }
    if($char_opt=~/o/){ $write_pure_seq_only='o' }
    if($char_opt=~/P/){ $write_Protein_seq_only='P' }
    if($char_opt=~/R/){ $write_rv_seq_as_well='R'; print "\n# (INFO) You wanted REVerse seq as well\n"; }
    $hash_num=@hash;

    print &__YELLOW__, &__BOLD__,"\n# (i) write_fasta => Number of HASH given to write_fasta is $hash_num\n", &__RESET__;

    for ($n=0 ; $n < @hash; $n ++){
           my($seq, %hash);
           my %hash_orig=%{$hash[$n]};
           my(@keys_orig)= sort %hash_orig;
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # (1) If sec. str. hash form is given
           #______________________________________________________
           if($keys_orig[0]=~/^\d+$/ and $hash_orig{$keys_orig[0]}->[0]=~/^\S$/){
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # $hash_orig{$keys_orig[0]}->[3] is seq_name, $hash_orig{$keys_orig[0]}->[0] has each residue
                #_____________________________________________________________________________________________
                for($i=0; $i< @keys_orig; $i++){
                    $seq .=$hash_orig{$keys_orig[$i]}->[0];  ## making seq string like 'ASDFADFAFA....'
                }
                %hash=($hash_orig{$keys_orig[0]}->[3], $seq);
                $sec_str_hash_form_given=1;
           }else{
                %hash=%hash_orig;
                %hash_orig=();
           }

	   if($sort_seq_names){  @keys=sort keys %hash;
	   }else{
	   	@keys= keys %hash;
	   	if($keys[0]=~/\S+[\t ]+(\d+\.\d+\.\d+)/){
                    @keys=@{&sort_by_column(\@keys, 2)};
                }
	   }

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # (2) When only one seq is given, use the seq name as output file
           #________________________________________________________________
           if( (@hash==1 and @keys==1 and @file < 1) or $sec_str_hash_form_given){
               $output_file="$keys[0]\.fa";
               $output_file_rv="$keys[0]\_rv\.fa";
           }elsif(@file < 1){                        $output_file="default_fa_$n\.fa";
               if($write_rv_seq_as_well){ $output_file_rv="default_fa_$n\_rv\.fa";  }
           }else{
               $output_file; # $output_file is already set when @file > 0
               if($write_rv_seq_as_well and !$base){
                   ($base, $ext)=split(/\./, $output_file);
                   $output_file_rv="$base\_rv\.$ext";
               }
           }

           open (FASTAS_WRITE,">>$output_file");      # $string is the seq string.
           open (FASTAS_WRITE_RV,">>$output_file_rv") if $write_rv_seq_as_well;
           push(@files_made, $output_file);
           push(@files_made_with_rv, $output_file_rv) if ($write_rv_seq_as_well);

           for ($i=0; $i < @keys; $i++){
                $na= $keys[$i];              $string = "\U$hash{$na}";
                if($write_rv_seq_as_well){  $string_rv=reverse($string);  $na_rv="$keys[$i]\_rv"; }

                if($write_pure_seq_only){
                     $string=~s/[\n \.-]//g;        # replaces all non-chars to null. '_' is used for stop codon
                     $string_rv=~s/[\n \.-]//g;        # replaces all non-chars to null. '_' is used for stop codon
                }
                if($write_Protein_seq_only and $string=~/[UBR0-9]/){
                     next;
                }

		if($char_opt=~/r/){ $na=$i;} # rename the seqeunces with '0, 1, 2, 3," etc for  clustalw

		if($debug == 1){         print ">$na\n";
                    print FASTAS_WRITE ">$na\n";
                    print FASTAS_WRITE_RV ">$na_rv\n" if $write_rv_seq_as_well;
	        }elsif($char_opt=~/v/){  print ">$na\n";
                    print FASTAS_WRITE ">$na\n";
                    print FASTAS_WRITE_RV ">$na_rv\n" if $write_rv_seq_as_well;
		}else{
		    print FASTAS_WRITE ">$na\n";
                    print FASTAS_WRITE_RV ">$na_rv\n" if $write_rv_seq_as_well;
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# (3) Main algorithm of writing in 60 char leng line
		#_____________________________________________________

                if($gap_char){ $string=~s/\W/$gap_char/g } ## <------------ Change the GAP char

		$string_leng=length($string);
		for($j=0; $j< $string_leng; $j+=60){
                    if($debug == 1){
                        printf "%.60s\n", substr($string,$j,60);
                        printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
                    }elsif($char_opt=~/v/i){
                        printf "%.60s\n", substr($string,$j,60);
                        printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
                    }else{
                        printf FASTAS_WRITE "%.60s\n", substr($string,$j,60);
                    }
		}
                if($write_rv_seq_as_well){
                    for($j=0; $j< $string_leng; $j+=60){
                        if($debug == 1){
                           printf "%.60s\n", substr($string_rv,$j,60);
                           printf FASTAS_WRITE_RV "%.60s\n", substr($string_rv,$j,60);
                        }elsif($char_opt=~/v/i){
                           printf "%.60s\n", substr($string_rv,$j,60);
                           printf FASTAS_WRITE_RV "%.60s\n", substr($string_rv,$j,60);
                        }else{
                           printf FASTAS_WRITE_RV "%.60s\n", substr($string_rv,$j,60);
                        }
                    }
                }
	  }
	  close FASTAS_WRITE;
	  close(FASTAS_WRITE_RV) if $write_rv_seq_as_well;

        }
	if( $out_file_name_provided != 1){
	   print "\n\n# You didnt give out file name, $output_file  used\n";
	}
	if( -s $output_file ){
           if($verbose=~/\S/){ ## if v option is given, mesg is omitted to prevent comments to a redirected output
               print "\n# Sequences were written in  $output_file ";
           }
	}else{
	  print "\n# The size of written outfile \"$output_file\" is 0, error \n\n"
	}
	if($write_rv_seq_as_well){
            return(\@files_made_with_rv);
	}else{
	    return(\@files_made);
	}
}



#________________________________________________________________________
# Title     : set_debug_option
# Usage     : &set_debug_option;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug_option #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Version   : 1.8
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_debug_option{
  my($j, $i, $level);
  unless( defined($debug) ){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 $debug=1;
				  print chr(7);
			 print __LINE__," \$debug  is set to ", $debug, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"debug$i"}=1;
				 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
			 }
		 }
	 }
  }
}
#________________________________________________________________________
# Title     : default_help
# Usage     : &default_help2;  usually with 'parse_arguments' sub.
# Function  : Prints usage information and others when invoked. You need to have
#             sections like this explanation box in your perl code. When invoked,
#             default_help routine reads the running perl code (SELF READING) and
#             displays what you have typed in this box.
#             After one entry names like # Function :, the following lines without
#             entry name (like this very line) are attached to the previous entry.
#             In this example, to # Function : entry.
# Example   : &default_help2; &default_help2(\$arg_num_limit);   &default_help2( '3' );
#             1 scalar digit for the minimum number of arg (optional),
#             or its ref. If this defined, it will produce exit the program
#             telling the minimum arguments.
# Warning   : this uses format and references
# Keywords  :
# Options   :
# Returns   : formated information
# Argument  :
# Version   : 3.3
#--------------------------------------------------------------------
sub default_help{
  my($i, $perl_dir, $arg_num_limit, $head ,$arg_num_limit,
	  @entries, @entries_I_want_write );
  my($logname)=getlogin();
  my($pwd)=`pwd`;
  my($date)=`date`;
  chomp($date,$pwd);
  my($not_provided)="--- not provided ---\n";
  my($file_to_read) = $0;

  for($i=0; $i < @_; $i ++){
	  if((ref($_[$i]) eq 'SCALAR')&&(${$_[$i]} =~ /^\d$/)){
		  $arg_num_limit = ${$_[$i]};  }
	  elsif( (!(ref($_[$i]))) && ($_[$i] =~ /^\d$/)){
		  $arg_num_limit = $_[$i];     }
  }
  my %entries = %{&read_head_box(\$file_to_read )};
  if($option_tb_found ==1){
	 @option_tb=@{&read_option_table(\$file_to_read)};
  }

  @entries = keys %entries;
  foreach $help_item (@entries){
	  ${$help_item}= $not_provided if( ${$help_item}=~/^[\W]*$/  and  !defined(${$help_item}) );
  }
  #""""""""""""""""""""""""""""""""""""""""
  #########  Writing the format <<<<<<<<<<<
  #""""""""""""""""""""""""""""""""""""""""
  $~ =HEADER_HELP;
  write;   ## <<--  $~ is the selection operator
  $~ =DEFAULT_HELP_FORM;

  @entries_I_want_write=sort keys %entries;

  for( @entries_I_want_write ){  write  }

  print chr(7);  print "_"x72,"\n\n";

  if(@ARGV < $arg_num_limit){ print "\* $0 fataly needs $arg_num_limit arguments\n\n" }

  if(  $option_tb_found == 1){
	 #########  Printing the OPTION table contents <<<<<<<<<<<<
	 print "  Press \"Return\" key to see what options $logname\'s \n\n    \"$0\" take... \n";
		 $key_press=getc();
	 print @option_tb, "\n"x2 if(@option_tb > 0);
  }
format HEADER_HELP  =
_____________________________________________________________________
		  __  __      ______     __          _____
		 /\ \/\ \    /\  ___\   /\ \        /\  _ `\
		 \ \ \_\ \   \ \ \__/   \ \ \       \ \ \L\ \
		  \ \  _  \   \ \  _\    \ \ \       \ \ ,__/
		   \ \ \ \ \   \ \ \/___  \ \ \_____  \ \ \/
		    \ \_\ \_\   \ \_____\  \ \______\  \ \_\
		     \/_/\/_/    \/_____/   \/______/   \/_/ V 3.1`
_____________________________________________________________________
.
format DEFAULT_HELP_FORM =
 @<<<<<<<<<: @*
 $_        $entries{$_}
.
}
#________________________________________________________________________
# Title     : handle_arguments
# Usage     : Just put the whole box delimited by the two '###..' lines below
#             to inside of your subroutines. It will call 'handle_arguments'
#             subroutine and parse all the given input arguments.
#             To use, claim the arguments, just use the variable in the box.
#             For example, if you had passed 2 file names for files existing
#             in your PWD(or if the string looks like this: xxxx.ext),
#             you can claim them by $file[0], $file[1] in
#             your subroutine.
# Function  : Sorts input arguments going into subroutines and returns default
#             arrays of references for various types (file, dir, hash, array,,,,)
#             If you give (\@out, @file), it will put @out into @array as a ref
#             and also the contents of @out will be dereferenced and put to
#             raw_string regardless what is in it).
#
# Example   : 'handle_arguments(\@array, $string, \%hash, 8, 'any_string')
# Warning   :
# Keywords  : handling arguments, parsing arguments,
# Options   :
# Returns   : Following GLOBAL variables
#
#             $num_opt,    @num_opt     @file          @dir
#             $char_opt,   @char_opt    %vars          @array,
#             @hash        @string,     @raw_string    @range,
#
#             $num_opt has 10,20
#             @num_opt has (10, 20)
#             @file has  xxxx.ext
#             @dir has  dir  or /my/dir
#             $char_opt has 'A,B'
#             @char_opt has (A, B)
#             @array has  (\@ar1, \@ar2)
#             @hash has (\%hash1, \%hash2)
#             @string  ('sdfasf', 'dfsf')
#             @raw_string (file.ext, dir_name, 'strings',,)
#             @range has values like  10-20
#             %vars deals with x=2, y=3 stuff.
#
# Argument  : any type, any amount
# Version   : 4.6
#             set_debug_option  is added.
#--------------------------------------------------------------------
sub handle_arguments{
	my($c, $d, $e, $f, $i, $j, $k, $l, $s, $t, $x, $y, $z, $char_opt, $dir, @hash,
		$file, $in_dir, $num_opt, @char_opt, @dir, @file, @string, @file_dir, @k,
		@num_opt, @raw_string,@string, @array, %vars, @range, @temp, $temp,
		@char_options);

  &set_debug_option;
  if(@_<1){ print chr(7),"\n This is handle_arguments. No args Passed, Error?\n"}
  elsif( (@_ ==1)&& (ref($_[0]) eq 'ARRAY') ){ # when there is only 1 argument
	  push(@array, $_[0]);
	  push(@k, $_[0]);
  }elsif( (@_==1)&&( !ref($_[0]) ) ){
	  if(-f $_[0]){ push(@file, $_[0]);   push(@string, $_[0]) }
	  elsif(-d $_[0]){ push(@dir, $_[0]); push(@string, $_[0]) }
	  elsif($_[0]=~/^\d+$/){ push(@num_opt, $_[0]); $num_opt.=$_[0] }
	  elsif($_[0]=~/^\w+$/){ push(@string, $_[0]); }
  }elsif(@_ >=1){ @k = @_ }

  #####______Start of  general argument handling______######
  for($k=0; $k < @k ;$k++){
	  if( !ref($k[$k]) ){
		  if($k[$k]=~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){  push(@char_opt, $1); $char_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\-([a-zA-Z]+)$/){          ## When multiple option is given,
			  @char_options = split(/\,|/, $1);  push(@char_opt, @char_options);
			  $char_opt .= join("\,", @char_options); ## '-' should be used. eg. '-HEGI'
		  }elsif($k[$k]=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
		  }elsif($k[$k]=~ /^(\-?\d+)$/){ push(@num_opt, $1);  $num_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\d+\.?\d*\-\d+\.?\d*$/){  push(@range,  $k[$k] );
		  }elsif(-f $k[$k]){                          push(@file,   $k[$k] );
		  }elsif(-d $k[$k]){                          push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /\/[\w\d\.\-]+[\/].+[\/]/){ push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^\/[\w\d\.\-]+[\/]*$/){    push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^[\/\w\d\-\.]+\.\w+$/){    push(@file,   $k[$k] );
		  }elsif($k[$k]=~/^\w+[\w\d\.\-]+$/){         push(@string, $k[$k] ); # string does not have space
		  }else{                                      push(@raw_string, $k[$k] );  }

	  }elsif( ref($k[$k]) ){
		  if( ref($k[$k]) eq "SCALAR"){
			 if(${$k[$k]} =~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){ push(@char_opt, $1); $char_opt  .= "$1\,";
				}elsif(${$k[$k]}=~ /^\-([a-zA-Z]+)$/){ push(@char_opt, @char_options);
					$char_opt  .= join("\,", @char_options);  ## as an option string.
				}elsif(${$k[$k]}=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
				}elsif(${$k[$k]}=~ /^(\-?\d+)$/){ $num_opt .= "$1\,";  push(@num_opt, $1);
			    }elsif(${$k[$k]}=~ /^\d+\.?\d*\-\d+\.?\d*$/){    push(@range,  $k[$k] );
				}elsif(-f ${$k[$k]}){                            push(@file,   ${$k[$k]} );
				}elsif(-d ${$k[$k]}){                            push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\/[\/\w\d\.\-]+[\/]*$/){     push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /^[\/\w\d\-\.]+\.\w+$/){      push(@file,   ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\w+[\w\d\.\-]+$/){           push(@string, ${$k[$k]} );
				}else{                                           push(@raw_string, ${$k[$k]}); }
		  }elsif(ref($k[$k]) eq "ARRAY"){ my @temp_arr = @{$k[$k]}; push(@array, $k[$k]);
			for ($i=0; $i<@temp_arr; $i++){
			   if(-f $temp_arr[$i]){                            push(@file, $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/^\d+\.?\d*\-\d+\.?\d*$/){ push(@range,$temp_arr[$i] );
			   }elsif(-d $temp_arr[$i]){                        push(@dir , $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\/[\/\w\d\.\-]+[\/]*$/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^[\/\w\d\-\.]+\.\w+$/){   push(@file,$temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\w+[\w\d\.\-]+$/){       push(@string,$temp_arr[$i]);
			   }else{                                           push(@raw_string, $temp_arr[$i]); }
			 }
		  }elsif(ref($k[$k]) eq "HASH"){                             push(@hash,   $k[$k] ); }
	  }
  }
  @raw_string=(@raw_string, @string);
  @file = @{&remove_dup_in_arrayH(\@file)};
  #-----------------------------------------------------
	 sub remove_dup_in_arrayH{  my($i, @out_ref, %duplicate, @orig, @out_ref);
		for($i=0; $i<@_; $i++){  undef(%duplicate);
	 if(ref($_[$i]) eq 'ARRAY'){    @orig = @{$_[$i]};    }
	 @nondup = grep { ! $duplicate{$_}++ } @orig; push(@out_ref, \@nondup);  }
		if(@out_ref ==1){ return($out_ref[0]);}
		elsif(@out_ref >1){  return(@out_ref);}
	 }
  #-----------------------------------------------------
  return(\@hash, \@array, \@string, \@dir, \@file, \@num_opt,
			\@char_opt, \$num_opt, \$char_opt, \@raw_string, \%vars, \@range );
}
#________________________________________________________________________
# Title     : parse_arguments
# Usage     : &parse_arguments; or  (file1, file2)=@{&parse_arguments};
# Function  : Parse and assign any types of arguments on prompt in UNIX to
#             the various variables inside of the running program.
#             This is more visual than getopt and easier.
#             just change the option table_example below for your own variable
#             setttings. This program reads itself and parse the arguments
#             according to the setting you made in this subroutine or
#             option table in anywhere in the program.
# Example   : &parse_arguments(1);
#             @files=@{&parse_arguments(1)};
# Warning   : HASH and ARRAY mustn't be like = (1, 2,3) or (1,2 ,3)
# Keywords  :
# Options   : '0'  to specify that there is no argument to sub, use
#              &parse_arguments(0);
#             parse_arguments itself does not have any specific option.
#             '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
# Returns   : Filenames in a reference of array
#             and input files in an array (file1, file2)=@{&parse_arguments};
# Argument  : uses @ARGV
# Version   : 1.6
#--------------------------------------------------------------------
sub parse_arguments{
  my( $c, $d, $f, $arg_num, $option_table_seen, $n, $option_filtered,
		$option_table_example, $input_line, @input_files);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Checks if there were arguments
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( @ARGV < 1 ){ #<-- If Argument is not given at prompt
	  for(@_){
		 if($_ eq '0'){
			 last;
		 }else{
			 print "\n \"$0\" requires at least one Argument, suiciding.\n\n";
			 print chr(7); #<-- This is beeping
			 print "  To get help type \"$0  h\"\n\n\n ";
			 exit;
		 }
	  }
  }
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Some DEFAULT $debug variables for debugging purposes
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  &set_debug_option;
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  If there is only one prompt arg. and is [-]*[hH][elp]*, it calls
  #   &default_help and exits
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( ( @ARGV == 1 ) && ($ARGV[0] =~ /^[\-]*[hH\?][elp ]*$/) ){
		&default_help;   exit;
  }
  for($f=0; $f < @ARGV; $f++){
	  if( ($ARGV[$f] =~ /\w+[\-\.\w]+$/)&&(-f $ARGV[$f]) ){
		 push(@input_files, $ARGV[$f] ); next;  }
  }
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #     Reading the running program script
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  &assign_options_to_variables;
  if($HELP == 1){ &default_help }
  return(\@input_files);
}
#________________________________________________________________________
# Title     : assign_options_to_variables
# Usage     : &assign_options_to_variables(\$input_line);
# Function  : Assigns the values set in head box to the variables used in
#             the programs according to the values given at prompt.
#             This produces global values.
#             When numbers are given at prompt, they go to @num_opt
#              global variable. %vars global option will be made
#
# Example   : When you want to set 'a' char to a variable called '$dummy' in
#             the program, you put a head box commented line
#             '#  $dummy    becomes  a  by  -a '
#             Then, the parse_arguments and this sub routine will read the head
#             box and assigns 'a' to $dummy IF you put an argument of '-a' in
#             the prompt.
# Warning   : This is a global vars generator!!!
# Keywords  :
# Options   : '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
# Returns   : Some globaly used variables according to prompt options.
#             @num_opt,
#
# Argument  : None.
# Version   : 2.4
#--------------------------------------------------------------------
sub assign_options_to_variables{
  my($i, $j, $op, $z, $n, $symb, $value, $var, %val, @val, $option_table_example, @input_options);

  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #      Defining small variables for option table reading
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  my($g)='gets';                my($if)='if';
  my($is)='is';                 my(@input_files);
  my($o)='or';   my(@arguments) = sort @ARGV;

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  Assigning global arguments(@num_opt, %vars) variables
  #_______________________________________________________________
  for($i=0; $i< @arguments; $i++){
	 if(($arguments[$i]=~/^(\-?\d+[\.\d+]?)$/)&&   ### it mustn't be a file
		( !(-f $arguments[$i]) ) ){                ### getting NUM opt
		push(@num_opt, $1);
	 }elsif( $arguments[$i]=~/^(\S+)=(\S+)$/){
		$vars{$1}=$2;
	 }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Some DEFAULT $debug variables for debugging purposes
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  &set_debug_option;

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #   The main processing of self
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  open(SELF, "$0");    ## opens the program you ran to get the options table.
  while(<SELF>){

	  if( $first_border_and_title > 6 ){  ## This is to make it read only the first headbox.
		  last;                            #  $first_border_and_title is an incremental counter.
	  }elsif( /^ *#[_\*\-]{15,}$/ and /^ *# *[Tt][itle]*[ :]*/ ){
		  $first_border_and_title++;
		  print __LINE__, "# assign_options_to_variables : Title line found\n" if $debug eq 1;
	  }elsif(/^ {0,5}# {1,50}[\$\%\@].+$/){
		  $op = $&;  ## $op is for the whole input option line which has $xxxx, @xxx, %xxxx format
		  $op =~ s/^( *\# *)(\W\w+.+)$/$2/;  ## This is removing '#  ' in the line.
		  $op =~ s/^(\W\w+.+)(\s+\#.*)$/$1/;  ## This is removing any comments in the line.
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 ## matching the following line input format.
			 ## $av_sc_segment     becomes    a  by  a  # To smooth the SC rates. Gets the averages of
			 ## $ARG_REG is for arguments regular expression variable.
			 ##  This reg. exp. matches = 'a or A or E or e' part
			 ##  which represents alternative prompt arguments possibilities. \=$b$g$is$e$set
			 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
			 $ARG_REG ='(\S*) *[or=\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*)';
			 if($op=~/^([\$\@\%])([\w\-]+) {0,20}[=|$g|$is] *[\$\@\%]*([\- \w\.\d]+) *[bB]y +$ARG_REG/){
							 ## $sym     $var        becomes          a [$a...]       by       a -a -A
				  my $sym = $1;  #### The symbols like ($, @, %), '$' in the above.
				  my $var = $2;  #### Actual variable name 'var' from $var, 'av_sc_segment' in the above.
				  my $val = $3;  #### The becoming value  first 'a' in the above.
				  my @arg = ($4, $5, $6, $7, $8);  ## The alternative prompt arguments, second 'a' in the above..
			      print "\n $sym $var $val \n" if $debug==1;
			      print "\n \@arg are @arg \n" if $debug==1;

				  #""""""""""""""""""""""""""""""""""""""""""""""""""""
				  #  Going through the PROMPT args.
				  #""""""""""""""""""""""""""""""""""""""""""""""""""""
				  for($z=0; $z < @arguments; $z++){     ## $arguments[$z]  is from @ARGV
					  $arguments[$z] =~ s/\-//;
					  for ($i=0; $i < @arg; $i ++ ){
						 if( ("$arg[$i]" eq "$arguments[$z]" )&& ($arg[$i] !~ /\=/)
							 && ($sym eq '$') ){
							 ${"$var"}="$val";
							 if($debug == 1){
								 print __LINE__," \$${var} is set to \"$1\"\n";
							 }

						 }#'''''''''''''''' $arg = by s=  syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~
						 elsif( ( $arg[$i] =~ /^(\w+) *\=/ ) &&
							( $arguments[$z] =~ /^${1}= *([\w\.*\-*]+)$/) &&
							( $sym eq '$') ){
							  ${"$var"}="$1";
							  if($debug eq 1){ print __LINE__,"\$${var} is set to \"$1\"\n";  }
						 }
					  }
				  }
			  }
		}
	}
}

#________________________________________________________________________
# Title     : get_base_names
# Usage     : $base =${&get_base_names(\$file_name)};
#             :   or @bases = &get_base_names(\@files);  # <-- uses `pwd` for abs directory
# Function  : produces the file base name(eg, "evalign"  out of "evalign.pl" ).
#              when xxxx.xx.gz form file is given, it removes gz as well
#
# Example   : $base => 'test'  with 'test.txt' or '/home/dir/of/mine/text.txt'
# Warning   :
# Keywords  : get_base_name{, base_name, file_base_name ,  get_file_base_name
#             get_basename, basename, get_root_name, base , root, get_file_root
# Options   :
# Returns   :
# Argument  : handles both ref and non-ref.
# Category  :
# Version   : 1.5
#--------------------------------------------------------------------
sub get_base_names{
    my($x, $pos, $pos1, @out_file, $file_only, $file, @file, $base, @base);
    @file=@{$_[0]} if (ref($_[0]) eq 'ARRAY');
    @file=@_ if !(ref($_[0]) eq 'ARRAY');
    for($x=0; $x < @file; $x ++){
        if( ref($file[$x]) ){
           $file = ${$file[$x]};
           $pos1=rindex($file, "/");
           $file_only=substr($file, ($pos1+1));
           if($file_only=~/(\S+\.\S+)\.gz$/){
                $pos = rindex($1, ".");
                $base= substr($1, 0, $pos);
           }elsif($file_only=~/^[^\.]+$/){ ## when file does not have '.' in its name
                $base= $file_only;
           }else{
                $pos = rindex($file_only, ".");
                $base= substr($file_only, 0, $pos);
           }
        }else{
           $file = $file[$x];
           $pos1=rindex($file, "/");
           $file_only=substr($file, ($pos1+1));
           if($file_only=~/(\S+\.\S+)\.gz$/){
                                            $pos = rindex($1, ".");
              $base= substr($1, 0, $pos);
                                   }elsif($file_only=~/^[^\.]+$/){ ## when file does not have '.' in its name
                                            $base= $file_only;
           }else{
                    $pos = rindex($file_only, ".");
                    $base= substr($file_only, 0, $pos);
           }
        }
        push(@base, $base);
    }
    if(@base == 1 ){ \$base[0] }else{ \@base }
}


#________________________________________________________________________
# Title     : read_option_table
# Usage     :
# Function  : Reads the option table made by Jong in any perl script. The
#             option table is a box with separators.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_option_table{
	my($table_found, @option_tb, $head);
	 open(SELF, "${$_[0]}");
	 while(<SELF>){
		if( (/^ *#+/) && ( $table_found== 1) ){
		  push (@option_tb, "$_");
		}elsif( ($table_found != 1)&&(/^ *\#+ *[Oo]ption *[Tt]able */) ){
			$table_found=1; $head="############## Option Table for $logname\'s \"$0\"\n"; ##
			push(@option_tb, $head);
		}
		if( ($table_found==1)&&(/^ *###################+ *$/)){  ### to find the end point of reading
			$table_found =0; last; }
	 }
	 return(\@option_tb);
}
#________________________________________________________________________
# Title     : read_head_box
# Usage     : %entries = %{&read_head_box([\$file_to_read, \@BOXED ] )};
# Function  : Reads the introductory header box(the one you see on top of sub routines of
#             Jong's programs.). Make a hash(associative array) to put entries
#             and descriptions of the items. The hash values have new lines '\n' are
#             attached, so that later write_head_box just sorts Title to the top
#             and prints without much calculation.
#             This is similar to read_head_box, but
#             This has one long straight string as value(no \n inside)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : open_head_box, open_headbox, read_headbox
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Version   : 2.7
#--------------------------------------------------------------------
sub read_head_box{
  my($i, $c, $d, $j, $s, $z, @whole_file, $title_found, %Final_out,
	  $variable_string, $TITLE, $title, @keys, $end_found, $line, $entry,
	  $entry_match, $End_line_num, $remove_blank,  $title_entry_null,
	  $end_found, $Enclosed_entry, $Enclosed_var, $blank_counter,
	  $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

  if(ref($_[0]) eq 'ARRAY'){ ## When array is given
	  @whole_file = @{$_[0]};
  }elsif(-e ${$_[0]}){       ## When filename is given in a ref
	  open(FILE, "${$_[0]}");
	  @whole_file=(<FILE>);
  }elsif(-e $_[0]){          ## When filename is given
	  open(FILE, "$_[0]");
	  @whole_file=(<FILE>);
  }elsif( $_[0] eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }elsif( ${$_[0]} eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }else{
	  open(SELF, "$0");
	  @whole_file=(<SELF>);
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for($i=0; $i<@whole_file; $i++){
	 $whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	 #########################################
	 ##  The first and second line of box 1 ##
	 #########################################
	 if( ($whole_file[$i]=~/^#[_\*\~\-\=]{20,}$/)&&    ##  '#______' is discarded
		 ($whole_file[$i+1]=~/ *\# {0,3}([TitlNam]+e) {0,8}: {1,10}([\w\.:]*) *(Copyright.*)/i) ){
		 $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
		 $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
		 if($TITLE =~ /^Title|Name$/i){   #
			  if( ($title=~/^\s+$/)||( $title eq "\n") ){
				  $title_entry_null =1;  $title = '';  }    }
		 $Final_out{$TITLE}=$title;
		 $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
		 last if $title_found > 1;    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ## The first and second line of box 2, #__________ or #**************
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($whole_file[$i]=~/^#[_\*]{20,}$/)&&
		 ($whole_file[$i+1]=~/^# {1,3}(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */i) ){
		 $title_found ++ ;        $i++;
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
		 $Final_out{$entry_match}.= "$entry_value\n";
		 last if $title_found > 1;  next;   }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  'Enclosed' : section. After this, everything is read without discrimination ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($Enclosed_entry == 1)&&($whole_file[$i] =~/^#{1,} {1,}(.*)$/) ){
		 $Final_out{$Enclosed_var}.= "$1\n";    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With proper entry 1 : for  'eg)'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,12}(eg ?\)) {0,8}(.*)/i)){
		 $entry_match='Example';
		 $Final_out{$entry_match}.= "$2\n";
	 }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,6}(.*) */i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= "$entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;        }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= " $entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;      }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  $option variable matching                ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1) && ($title_found==1) &&
		 ($whole_file[$i]=~ /^# {1,15}([\$\@]\w+ +[\w=\>]+ +\S+ \w+ \S+ *.*)/ )){
		 $Final_out{$entry_match} .= "$1\n";  }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  all space line matching                 ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
		 ($title_found==1)&&($whole_file[$i]=~/^# {0,}$/) ){
		 $blank_counter++;
		 if($blank_counter > 2){ $blank_counter--; }
		 else{ $Final_out{$entry_match}.= " \n";  }     }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 3 space to 12 positions  ##
	 ###  To match 'examples' etc. INC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^#( {2,12})(.+)/) ){
		 $Final_out{$entry_match}.= "$1$2\n"; $blank_counter=0; }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 1 space to 11 positions  ##
	 ###  To match 'examples' etc. EXC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# {1,12}([^:.]+)/) ){
		 $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###-------End of the read_box reading--------##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($title_found==1)&&
		 ($whole_file[$i]=~ /^#[\~\=\*\-]{15,}/)){  ## to match '#-----..' or '#******..'(Astrid's)
		 $End_line_num = $i;       $end_found++;
		 last;      }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  <<<<  Check if there is option table >>>>  #
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  } ## < End of for loop


  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### If title is not there at all     ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  @keys=keys %Final_out;
  for(@keys){
	  if(/^Title$/i){    ## No Entry of Title at all??
		  $TITLE =$&;
		  $title_entry_exist = 1;
		  if($Final_out{$_}=~/^ *$/){   ## if Title => Null or just space
			  $title_entry_null = 1;    }  }  }

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title entry is not there    ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( $title_entry_exist != 1){
		for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
				$Final_out{'Title'} = "$1\n";   last;       }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title is blank              ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
		### $End_line_num is the last line read.
		for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
				$Final_out{$TITLE} = "$1\n";    last;     }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ## Error handling, if no head box is found   ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
  \%Final_out;
}               ##<<--- ENd of the sub read_head_box




#__________________________________________________________________________
# Title     : sort_by_column
# Usage     : @out=@{&sort_by_column(\@input_line_array, <column num>)};
# Function  : it sorts by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : sort_by_column.pl 3 xxxx.msp
#               Above will sort the file xxxx.msp by its 3rd column(numerically)
#               small numbers will come to the top.
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
# Version   : 1.6
#----------------------------------------------------------------------------
sub sort_by_column{
   my (@in, @M, $col, $sort_numerically, $sort_non_numerically, $sort_by_string);
   unless(@_ > 1  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; exit }
   $sort_numerically='n';
   for (@_){
      if(ref $_ eq 'ARRAY'){ @in =@{$_}; }
      elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
      elsif(/^\d+$/){ $col=$_ }
      elsif(/^ *[nd] *$/i){ $sort_numerically=1; $sort_non_numerically=0; }
      elsif(/^ *s *$/i){ $sort_non_numerically=$sort_by_string='s'; $sort_numerically=0; }
   }
   $col--;
   if($sort_numerically ){ ## if the first and last elements are digits?
       if($in[0]=~/\d+\.\d+/ and $in[$#in]=~/\d+\.\d+/){ ## when the column number contains things like:  10.10.2.1
           print "\n# (i) sort_by_column: The column $col contains $in[0], sorting reasonably\n";
           @in= map {$_->[0]} sort { $a->[1] <=> $b->[1]   ## this is good for SCOP classification
                                  or $a->[2] <=> $b->[2]
                                  or $a->[3] <=> $b->[3]
                                  or $a->[4] <=> $b->[4]
                                  or $a->[5] <=> $b->[5]
                                  or $a->[6] <=> $b->[6]
                                  or $a->[7] <=> $b->[7]
                                  or $a->[8] <=> $b->[8]
                                  or $a->[9] <=> $b->[9]
                                  or $a->[10] <=> $b->[10]
                                  or $a->[11] <=> $b->[11]
                                   }
                                   map { [$_->[0], ($_->[1])=~/(\d+)/g] }
                                   map { [$_,  ($_=~/(\S+)/g)[$col] ] } @in;
       }else{
           @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
       }
   }elsif($sort_by_string){ # here let's do the sring sort
       @in= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
   }
   return(\@in);
}

#__________________________________________________________________________
# Title     : sort_by_scop_classification
# Usage     : @out=@{&sort_by_scop_classification(\@input_line_array, <classification column no>)};
# Function  : it sorts by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : sort_by_scop_classification.pl 3 xxxx.mpfa
#               Above will sort the file xxxx.mpfa by its 3rd column(numerically)
#               small numbers will come to the top.
#  sorts things like:
#     >dsfsf 1.2.3.1.4
#     >dsfsa 1.2.10.1.5
#
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_by_scop_classification{
   my (@in, @M, $col, $sort_numerically, $sort_non_numerically, $sort_by_string);
   unless(@_ > 1  ){ print "\n# FATAL: sort_by_scop_classification needs 2 arguments\n"; exit }
   $sort_numerically='n'; # for SCOP it is n
   for (@_){
      if(ref $_ eq 'ARRAY'){ @in =@{$_}; }
      elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
      elsif(/^\d+$/){ $col=$_ }
      elsif(/^ *[nd] *$/i){ $sort_numerically=1; $sort_non_numerically=0; }
      elsif(/^ *s *$/i){ $sort_non_numerically=$sort_by_string='s'; $sort_numerically=0; }
   }
   $col--;
   if($sort_numerically ){ ## if the first and last elements are digits?
      @in= map {$_->[0]} sort{ $a->[1] <=> $b->[1]   ## this is good for SCOP classification
                             or $a->[2] <=> $b->[2]
                             or $a->[3] <=> $b->[3]
                             or $a->[4] <=> $b->[4]
                             or $a->[5] <=> $b->[5]
                             or $a->[6] <=> $b->[6]
                             or $a->[7] <=> $b->[7]
                             or $a->[8] <=> $b->[8]
                             or $a->[9] <=> $b->[9]
                             or $a->[10] <=> $b->[10]
                             or $a->[11] <=> $b->[11]
                             }
                             map { [$_->[0], ($_->[1])=~/(\d+)/g] }
                             map { [$_,  ($_=~/(\S+)/g)[$col] ] } @in;
   }elsif($sort_by_string){ # here let's do the sring sort
       @in= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($_=~/(\d+)/g)[$col] ] } @in;
   }
   return(\@in);
}


#______________________________________________________________________________
# Title     : __YELLOW__
# Usage     : &__YELLOW__
# Function  : changes the color of following text, to reset, use, &__RESET__
# Example   :  print &__RED__, "This should be RED\n";
# Keywords  : __yellow__, __YELL__, yellow_text,
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub __YELLOW__{
    return("\e[33m");
}

#______________________________________________________________________________
# Title     : __RESET__
# Usage     : print &__RESET__, "This should be RESET\n";
# Function  : changes the color of following text, to reset, use, &__RESET__
# Example   :
# Keywords  : __red__, red_text,
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub __RESET__{
    return("\e[0m");
}
#______________________________________________________________________________
# Title     : __BOLD__
# Usage     : print &__BOLD__, "This should be BOLD\n";
# Function  : changes the color of following text, to reset, use, &__BOLD__
# Example   :
# Keywords  : __red__, red_text,
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub __BOLD__{
    return("\e[1m");
}

