#!/usr/bin/perl -w

#phase_count_fast.pl

use strict;
use vars qw ($opt_c $opt_I $opt_J $opt_G $opt_H $opt_g $opt_l $opt_m $opt_n $opt_N $opt_o $opt_q $opt_r $opt_s $opt_t $opt_u $opt_v $opt_w $opt_x $opt_z);  # required if strict used
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';

getopts ('tI:J:G:H:c:g:l:m:Nn:o:q:rs:u:v:w:x:z:');     # ('aci:p:o:') means 'ac' are flags, 'i:p:o:' gets following scalar.


# Print a helpful message if the user provides no input file.
if (!@ARGV) { 
        print "usage:  phase_count_fast.pl [options] file.qrna org1 org2\n\n";
	print "options:\n";
        print "-c <case>          :  cases (default is case = 1)\n";
        print "                        possible cases are:\n";
        print "                        0=GLOBAL\n";  
        print "                        1=LOCAL_DIAG_VITERBI 2=LOCAL_DIAG_FORWARD\n";
        print "                        3=LOCAL_SEMI_VITERBI 4=LOCAL_SEMI_FORWARD\n";
        print "                        5=LOCAL_FULL_VITERBI 6=LOCAL_FULL_FORWARD\n";
	print "-I <min_id>        : min ID for analysis                      [default min_id = 0]\n";
	print "-J <max_id>        : max ID for analysis                      [default max_id = 100]\n";
	print "-G <min_gc>        : min GC for analysis                      [default min_gc = 0]\n";
	print "-H <max_gc>        : max GC for analysis                      [default max_gc = 100]\n";
	print "-g <typetarget>    :  which type of loci you want to analyze (default is all three)\n";
        print "                        possible types of loci are:\n";
        print "                        OTH | COD | RNA \n";
        print "-l <lambda>        : lambda parameter of an EVD fit\n";
        print "-m <mu>            : mu parameter of an EVD fit\n";
        print "-n <size>          : size of database\n";
        print "-N                 : ignore the loci clustering, just give stats on aligments and windows\n";
	print "-o <output>        : output file [default = ]\n";
	print "-q <file.q>        : include qfile to get the actual ends of the qrna call\n";
	print "-r                 : file is an rnaz file                     [default qrna]\n";
        print "-s <type_of_score> : type of score (sigmoidal | simple)       [default = sigmoidal]\n";
        print "-t                 : towhomness -- obtains corresponding loci in the other organism\n";
	print "-u <cutoff>        : default is cutoff = 5\n";
	print "-v <loci_overlap>  :  minimun overlap required to build loci (default is loci_overlap = -1)\n";
	print "-w <whichorg>      : default is whichorg = 1  (use 1-for-org1 2-for-org2 12-for-both)\n";
	print "-x <name>          : ignore given name, use this one for gff outputs\n\n";
	print "-z <file>          : give a table of cutoffs for %gc ranges\n\n";
       exit;
}
my $file    = shift;
my $org1    = shift;
my $org2    = shift;
my $tag;
my $type;

if (!$org1) { $org1 = "query"; }
if (!$org2) { $org2 = "DB"; }

my $dir;
my $filename; 
if ($file =~ /^(\S+)\/([^\/]+)$/) {
    $dir  = $1;
    $filename = $2;
}
else {
    $dir  = "";
    $filename = $file;
}
#print "file: $filename\n";
#print "dir:  $dir/\n";

my $output   = "$dir";

# examples of how a non-flag option might be used 
my $towhomness = $opt_t;

my $n_in_ali = 0;
my $file_q     = $opt_q;
if ($file_q) { check_with_qfile(); }


my $fix_lambda = -1;
if ($opt_l) { $fix_lambda = $opt_l; }

my $fix_mu = -1;
if ($opt_m) { $fix_mu = $opt_m; }

my $fix_size;
if ($opt_n) { $fix_size = $opt_n; }

my $typetarget;
if ($opt_g) { $typetarget = $opt_g; }
else        { $typetarget = "all";  }

my $overlap; #minimum overlap required when calculating loci
if (defined($opt_v)) { $overlap = $opt_v; }
else                 { $overlap = -1;     }

my $whichorg ;
if ($opt_w) { $whichorg = $opt_w; }
else        { $whichorg = 1;      }
if ($whichorg != 1 && $whichorg != 2 && $whichorg != 12 ) { print "organism has to be '1' or '2' or '12'\n"; die }

my $fix_cutoff;
if (defined($opt_u)) { $fix_cutoff = $opt_u; }
else                 { $fix_cutoff = 5;     }

my @cutoff;
my %gc_min_cutoff;
my %gc_max_cutoff;
my %lambda;
my %mu;
my %size;

read_gc_cutoff_file("$opt_z", \@cutoff, \%gc_min_cutoff, \%gc_max_cutoff, \%lambda, \%mu, \%size);

my $id_min;
if ($opt_I) { $id_min = $opt_I; }
else        { $id_min = 0;      }
my $id_max;
if ($opt_J) { $id_max = $opt_J; }
else        { $id_max = 100;    }

my $gc_min;
if ($opt_G) { $gc_min = $opt_G; }
else        { $gc_min = 0;      }
my $gc_max;
if ($opt_H) { $gc_max = $opt_H; }
else        { $gc_max = 100;    }

my $spec = "ID[$id_max:$id_min].GC[$gc_max:$gc_min]";

my $outputfile;
if    ($opt_o) { $outputfile = $opt_o; } 
elsif ($opt_z) { $outputfile = "$file.$typetarget"."loci".".CUTOFFvar.$spec";          }
else           { $outputfile = "$file.$typetarget"."loci".".CUTOFF$fix_cutoff.$spec";  }

my $usename;
if ($opt_x) { $usename = $opt_x; }

my $outputfile1gff = "$outputfile.$org1.gff";
my $outputfile2gff = "$outputfile.$org2.gff";

system("rm $outputfile.$org1.gff\n");
system("rm $outputfile.$org2.gff\n");
system("touch $outputfile.$org1.gff\n");
system("touch $outputfile.$org2.gff\n");

my $type_of_score;
if ($opt_s) { $type_of_score = $opt_s;      }
else        { $type_of_score = "sigmoidal"; }
if ($type_of_score =~ /^simple$/ || $type_of_score =~ /^sigmoidal$/) { ;}
else { print "wrong type of score. options are: 'simple' or 'sigmoidal'"; die; }

my $case;
if ($opt_c) { $case = $opt_c; }
else        { $case = 1;      }

if   ($case==0) { $tag = "GLOBAL";        }
elsif($case==1) { $tag = "LOCAL_DIAG_VITERBI"; }
elsif($case==2) { $tag = "LOCAL_DIAG_FORWARD"; }
elsif($case==3) { $tag = "LOCAL_SEMI_VITERBI"; }
elsif($case==4) { $tag = "LOCAL_SEMI_FORWARD"; }
elsif($case==5) { $tag = "LOCAL_FULL_VITERBI"; }
elsif($case==6) { $tag = "LOCAL_FULL_FORWARD"; }

my $sqrt2 = sqrt(2.0);

open (OUT, ">$outputfile") || die;

my $qrna;
if (!$opt_r) {
    $qrna = "QRNA";

    if ($typetarget =~ /^all$/) {
	phase_count_target ("RNA");
	phase_count_target ("COD");
	phase_count_target ("OTH"); 
    }
    else {
	phase_count_target ($typetarget);
    }
}
else {
    $qrna = "RNAz";
    phase_count_target_rnaz("RNA");
}

close (OUT);


#######################################


sub analyze_window {

    my ($target, $ntseq, $overlap, 
	$noseq_ref, $ncseq_ref, $nrseq_ref, $ncoseq_ref, $nroseq_ref, $nrcseq_ref, $n3seq_ref, $codpos_ref, $rnapos_ref, $codcut_ref, $rnacut_ref,
	$id, $id_min, $id_max, $gc, $gc_min, $gc_max,
	$startblast1, $startfrag1, $name1, $coor1, $ali1,
	$startblast2, $startfrag2, $name2, $coor2, $ali2,
	$type_of_score, $motif_lend, $motif_rend, $othsc, $codsc, $rnasc, 
	$nloci1_ref, $name1_ref, $lloci1_ref, $rloci1_ref, $type1_ref, $towhom1_ref, $howmany1_ref, $othsc1_ref, $codsc1_ref, $rnasc1_ref,
	$nloci2_ref, $name2_ref, $lloci2_ref, $rloci2_ref, $type2_ref, $towhom2_ref, $howmany2_ref, $othsc2_ref, $codsc2_ref, $rnasc2_ref
	)  = @_;
    
    my $realtype;

    my $rnalod;
    my $codlod;
    
    my $othlodsigm;
    my $rnalodsigm;
    my $codlodsigm;
    
    my $rna;
    my $cod;
    my $oth;
    
    $codlod = $codsc - $othsc;
    $rnalod = $rnasc - $othsc;
    
    if ($othsc < - 5000.0 || $codsc < - 5000.0 || $rnasc < - 5000.0) { 
	$othlodsigm = -5000.0;
	$codlodsigm = -5000.0;
	$rnalodsigm = -5000.0;
    }
    else {
	$othlodsigm = -log(exp(log(2.0)*($codsc-$othsc)) + exp(log(2.0)*($rnasc-$othsc)))/log(2.0);
	$codlodsigm = -log(exp(log(2.0)*($othsc-$codsc)) + exp(log(2.0)*($rnasc-$codsc)))/log(2.0);
	$rnalodsigm = -log(exp(log(2.0)*($othsc-$rnasc)) + exp(log(2.0)*($codsc-$rnasc)))/log(2.0);
    }   
                 
    if ($ntseq > 0 && $id >= $id_min && $id <= $id_max && $gc >= $gc_min && $gc <= $gc_max) {
	
	#
	#Identify type of hit, and add them up to generate some statistics at the end.
	#
	if ($type_of_score =~  /^simple$/) { 
	    is_above_cutoff($fix_cutoff, \$realtype, $codlod, $rnalod, $noseq_ref, $ncseq_ref, $nrseq_ref, $ncoseq_ref, $nroseq_ref, $nrcseq_ref, $n3seq_ref);                                
	    $oth = 0; $cod = $codlod; $rna = $rnalod;    
	}
	elsif ($type_of_score =~  /^rnaz$/) { 
	    is_above_cutoff_rnaz ($fix_cutoff, \$realtype, $rnasc, $nrseq_ref);               
	    $oth = 0.0; $cod = 0.0; $rna = $rnasc;    
	}
	else                               { 
	    is_above_cutoff_sigmoidal(\@cutoff, \%gc_min_cutoff, \%gc_max_cutoff, \$realtype, $gc, $othlodsigm, $codlodsigm, $rnalodsigm, $noseq_ref, $ncseq_ref, $nrseq_ref); 
	    $oth = $othlodsigm; $cod = $codlodsigm; $rna = $rnalodsigm; 
	}
	
	if ($cod > 0.0)     { $$codpos_ref ++; }
	if ($rna > 0.0)     { $$rnapos_ref ++; }

	foreach my $cut (@cutoff) {
	    if ($gc >= $gc_min_cutoff{$cut} && $gc < $gc_max_cutoff{$cut}) {
		if ($cod > $cut) { $$codcut_ref ++; }
		if ($rna > $cut) { $$rnacut_ref ++; }
	    }
	}

	if (!$opt_N) {
	    identify_loci($target, $ntseq, $overlap, 
			  $startblast1, $startfrag1, $name1, $coor1, $ali1,
			  $startblast2, $startfrag2, $name2, $coor2, $ali2,
			  $realtype, $motif_lend, $motif_rend, $oth, $cod, $rna,
			  $nloci1_ref, $name1_ref, $lloci1_ref, $rloci1_ref, $type1_ref, $towhom1_ref, $howmany1_ref, $othsc1_ref, $codsc1_ref, $rnasc1_ref);
	    identify_loci($target, $ntseq, $overlap, 
			  $startblast2, $startfrag2, $name2, $coor2, $ali2,
			  $startblast1, $startfrag1, $name1, $coor1, $ali1,
			  $realtype, $motif_lend, $motif_rend, $oth, $cod, $rna,
			  $nloci2_ref, $name2_ref, $lloci2_ref, $rloci2_ref, $type2_ref, $towhom2_ref, $howmany2_ref, $othsc2_ref, $codsc2_ref, $rnasc2_ref);
	}
    }
}

sub arrange_loci {
    my ($org, $nloci, $name_ref, $lloci_ref, $rloci_ref, $type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $n = 0;
    my $k;
    my $lend;
    my $lstart = 0;
    my $lstart_n;
    my $cur_name;
    my $cur_name_quote;
    my $name_ref_quote;
    my @list;
    my @newlist;
    my %rend;
    my %type;
    my %towhom;
    my %howmany;
    my %othsc;
    my %codsc;
    my %rnasc;
    my $howmany_tot_bf = 0;
    my $howmany_tot_af = 0;
    my $count = 0;

    my $verbose = 0;

    $cur_name       =           $name_ref->[0];
    $cur_name_quote = quotemeta $name_ref->[0];
    
    if ($verbose) {
	print "\n before arrange_loci\n"; 
	for ($l = 0; $l < $nloci; $l++) { 
	    print "BLOCI:$l $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l]\n"; 
	}
    }

    for ($l = 0; $l < $nloci; $l++) { $howmany_tot_bf += $howmany_ref->[$l]; }

    for ($l = 1; $l < $nloci; $l++) { 
	$name_ref_quote = quotemeta $name_ref->[$l];

   	if ($cur_name_quote eq $name_ref_quote) {
	    $n++;
	}
	else { $cur_name = $name_ref->[$l]; $cur_name_quote = quotemeta $name_ref->[$l]; $n = 0; $lstart_n = $l; }
	if ($n == 0) { 
	    undef @list; 
	    undef @newlist;
	    undef %rend; 
	    undef %type; 
	    undef %towhom; 
	    undef %howmany; 
	    undef %othsc; 
	    undef %codsc; 
	    undef %rnasc; 
	    for ($k = 0; $k < $lstart_n - $lstart; $k++) { 
		$list[$k] = $lloci_ref->[$k+$lstart]."-".$rloci_ref->[$k+$lstart].$towhom_ref->[$k+$lstart]; 
		$lend = $list[$k];

		$rend{$lend}    = $rloci_ref->[$k+$lstart];
		$type{$lend}    = $type_ref->[$k+$lstart];
		$towhom{$lend}  = $towhom_ref->[$k+$lstart];
		$howmany{$lend} = $howmany_ref->[$k+$lstart];
		$othsc{$lend}   = $othsc_ref->[$k+$lstart];
		$codsc{$lend}   = $codsc_ref->[$k+$lstart];
		$rnasc{$lend}   = $rnasc_ref->[$k+$lstart];
		
	    }
	    
	    @newlist = sort loci_left_to_right @list;

	    for ($k = 0; $k < $lstart_n - $lstart; $k++) {
		$lend = $newlist[$k];
		if ($lend) {
		    $count++;
		    if ($lend =~ /^(\d+)-/) { $lloci_ref->[$k+$lstart] = $1; }
		    $rloci_ref->[$k+$lstart]   = $rend{$lend};
		    $type_ref->[$k+$lstart]    = $type{$lend};
		    $towhom_ref->[$k+$lstart]  = $towhom{$lend};
		    $howmany_ref->[$k+$lstart] = $howmany{$lend};
		    $othsc_ref->[$k+$lstart]   = $othsc{$lend};
		    $codsc_ref->[$k+$lstart]   = $codsc{$lend};
		    $rnasc_ref->[$k+$lstart]   = $rnasc{$lend};
		}
	    }
	    $lstart = $lstart_n;
	}
    }
    
#last case
    $lstart_n = $lstart + $n + 1;
    undef @list;
    undef @newlist;
    undef %rend;
    undef %type;
    undef %towhom;
    undef %howmany;
    undef %othsc;
    undef %codsc;
    undef %rnasc;
    
   for ($k = 0; $k < $lstart_n - $lstart; $k++) {
 	$lend = $lloci_ref->[$k+$lstart]."-".$rloci_ref->[$k+$lstart].$towhom_ref->[$k+$lstart];
	
	$list[$k] = $lend;
	$rend{$lend}    = $rloci_ref->[$k+$lstart];
	$type{$lend}    = $type_ref->[$k+$lstart];
	$towhom{$lend}  = $towhom_ref->[$k+$lstart];
	$howmany{$lend} = $howmany_ref->[$k+$lstart];
	$othsc{$lend}   = $othsc_ref->[$k+$lstart];
	$codsc{$lend}   = $codsc_ref->[$k+$lstart];
	$rnasc{$lend}   = $rnasc_ref->[$k+$lstart];
    }
    
    @newlist = sort loci_left_to_right  @list;

    for ($k = 0; $k < $lstart_n - $lstart; $k++) {
	$lend = $newlist[$k];
	
	if ($lend) {
	    $count++;
	    
	    if ($lend =~ /^(\d+)-/) { $lloci_ref->[$k+$lstart] = $1; }
	    $rloci_ref->[$k+$lstart]   = $rend{$lend};
	    $type_ref->[$k+$lstart]    = $type{$lend};
	    $towhom_ref->[$k+$lstart]  = $towhom{$lend};
	    $howmany_ref->[$k+$lstart] = $howmany{$lend};
	    $othsc_ref->[$k+$lstart]   = $othsc{$lend};
	    $codsc_ref->[$k+$lstart]   = $codsc{$lend};
	    $rnasc_ref->[$k+$lstart]   = $rnasc{$lend};

	}
    }
    
    for ($l = 0; $l < $nloci; $l++) { $howmany_tot_af += $howmany_ref->[$l]; }
    
    #paranoia
    if ($count != $nloci) { die "bad loci count in $org: $count $nloci"; }
    if ($howmany_tot_bf != $howmany_tot_af) { die "bad blasthit count in $org: $howmany_tot_bf $howmany_tot_af"; }
    
    if ($verbose) {
	print "\n after arrange_loci\n"; 
	for ($l = 0; $l < $nloci; $l++) { 
	    print "ALOCI:$l $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l]\n"; 
	}
    }

}

sub by_mostly_numeric {
    ($a <=> $b) || ($a cmp $b);
}

sub check_with_qfile {
    my $ali = 0;

    open (INFILE,"$file_q") || die;
    while (<INFILE>) {
	if (/^>(\S+)-(\d+[><]\d+)-/ && $ali == 0) { 
	    $n_in_ali ++;
	    $ali = 1;
	}
	elsif (/^>(\S+)-(\d+[><]\d+)-/ && $ali == 1) { 
	    $ali = 0;
	}
    }
    close (INFILE);
}



sub class_stat {
    my ($outgff, $target, $towhomness, $org, $nloci, $name_ref, $lloci_ref, $rloci_ref, $type_ref, 
	$towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $n = 0;
    my $ave_length = 0;
    my $nloci_type = 0;
   
    my $evalue;

    open (GFF,">>$outgff") || die;

    if ($target !~ /NULL/) {
	
	print OUT "\n---------------Statistics for $target loci ($org):-------------------\n";
	
	for ($l = 0; $l < $nloci; $l++) { 
	    if($type_ref->[$l] =~ /^$target$/) { 
		$nloci_type ++; 
		$ave_length += $rloci_ref->[$l] - $lloci_ref->[$l]; 
	    }
	}
	

	if ($nloci_type == 0) { print OUT "sorry no loci of type $target\n"; }
	else {
	    print OUT "# loci:\t", "$nloci_type\n";
	    printf OUT "ave_length:\t %.2f\n\n", $ave_length / $nloci_type;
	    
	    for ($l = 0; $l < $nloci; $l++) {
		if($type_ref->[$l] =~ /^$target$/) { 
		    $n ++; 
		    print OUT "$n-loci $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l] (", 
		    $rloci_ref->[$l]-$lloci_ref->[$l]+1,") $howmany_ref->[$l] $type_ref->[$l] "; 
		    printf OUT "%.2f %.2f\n", $codsc_ref->[$l], $rnasc_ref->[$l]; 

		    my $score;
		    if    ($target =~ /^RNA$/) { $score = $rnasc_ref->[$l]; }
		    elsif ($target =~ /^COD$/) { $score = $codsc_ref->[$l]; }
		    elsif ($target =~ /^OTH$/) { $score = $othsc_ref->[$l]; }
		    elsif ($target =~ /^rnaz$/){ $score = $rnasc_ref->[$l]; }
		    else                       { print "wrong type\n"; die; }

		    $evalue = evalue ($score, $fix_size, $fix_lambda, $fix_mu);

		    if (defined($opt_x)) { 
			print GFF "$usename\t$qrna\_loci\t$type_ref->[$l]\t$lloci_ref->[$l]\t$rloci_ref->[$l]\t$score\t\.\t\.\tgene \"$name_ref->[$l]\" eval '$evalue'\n";        }
		    else {
			print GFF "$name_ref->[$l]\t$qrna\_loci\t$type_ref->[$l]\t$lloci_ref->[$l]\t$rloci_ref->[$l]\t$score\t\.\t\.\tgene \"$name_ref->[$l]\" eval '$evalue'\n"; }


		    if ($towhomness) {print OUT "$towhom_ref->[$l]"; }
		}
	    }
	}
    }
    close (GFF);
}

sub cleanup_towhom {
    my ($nloci, $towhom_ref, $nlocib, $name_loci_ref, $lloci_ref, $rloci_ref, $type_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;

    my $k;
    my $l;
    my $name;
    my $lend;
    my $rend;
    my @towhom;
    my $towhom;
    my $bit_total;

    $bit_total = ""; for ($k = 0; $k < $nlocib; $k++) { $bit_total .= "0"; }

    for ($l = 0; $l < $nloci; $l++) { 
	
	@towhom = split (/:/, $towhom_ref->[$l]);
	
	$towhom_ref->[$l] = "";
	
	my $bit = ""; for ($k = 0; $k < $nlocib; $k++) { $bit .= "0"; }
	    
	foreach $towhom (@towhom) {
	    $towhom =~ /^(\S+)\-(\d+)\-(\d+)$/;
	    
	    $name = $1;
	    $lend = $2;
	    $rend = $3;
	    for ($k = 0; $k < $nlocib; $k++) { 
		
		if (is_hit_in_locus($name, $lend, $rend, $name_loci_ref->[$k], $lloci_ref->[$k], $rloci_ref->[$k])) 
		{  
		    if (substr($bit, $k, 1) == 0) 
		    { 
			$towhom_ref->[$l] .= "\t\[".($k+1)."\] ".$name_loci_ref->[$k]." ".$lloci_ref->[$k]." ".$rloci_ref->[$k]." \(".($rloci_ref->[$k]-$lloci_ref->[$k]+1);
			$towhom_ref->[$l] .= "\) ".$howmany_ref->[$k]." ".$type_ref->[$k]." ".(int($codsc_ref->[$k]*100)/100)." ".(int($rnasc_ref->[$k]*100)/100)."\n"; 
			
			substr($bit, $k, 1) = "1";       # well, now, it is.
			substr($bit_total, $k, 1) = "1"; # for total count
		    }		   
		}
	    }
	}
    }
    #paranoia
    my $total = ($bit_total =~ tr/1/1/);
    if ($total != $nlocib) { print "you did not account for all loci ($total $nlocib)\n"; die; }
    
}

sub cutoff_curve {
    my ($param_curve, $param_file, $N, $k) = @_;

    my $m1;
    my $m1_err;
    my $m2;
    my $m2_err;

    my $a1_exp;
    my $a1_err;
    my $a2_exp;
    my $a2_err;

    my $keye1;    
    my $keye2;    
    
    # do exp fit
    #
    #
    my $abs_id  = 100;
    my $max_id  = 99;
    my $half_id = 55;   # change of fitting curve point
    my $min_id  = 0;

    my $asyntote_id  = 25;   #change into an asyntote

    my $abs_lod  =   8.0;
    my $max_lod  =  15.0;

    my $id;
    my $lod;

    my $lod_half_id = fit_half_exp ("$param_file", \$a1_exp, \$a1_err, \$m1, \$m1_err, $max_id,  $half_id, $max_lod);
    my $lod_min_id  = fit_half_exp ("$param_file", \$a2_exp, \$a2_err, \$m2, \$m2_err, $half_id, $min_id,  $lod_half_id);

    my $lod_asyntote_id = $lod_half_id - $a2_exp * ($half_id - $asyntote_id) ** $m2;

    $keye1 = "fit\[$max_lod - $a1_exp$a1_err * ($max_id -x) ^ $m1$m1_err\]"; 
    $keye2 = "fit\[$lod_half_id - $a2_exp$a2_err * ($half_id-x) ^ $m2$m2_err\]"; 
    
    open (PARAM,">$param_curve") || die;
    print PARAM "#file: $param_curve\n";
    print PARAM "#fit1: $keye1\n";
    print PARAM "#fit2: $keye2\n";
    
    my $fun;
    my $dim = $N*$k;
    my $x;
    for (my $i = 0; $i <= $dim; $i++) {
	$x = $i/$k;
	 
	if    ($x >= $max_id)                        { $fun = $abs_lod;                                        print PARAM "$x $fun\n"; }
	elsif ($x <  $max_id  && $x >= $half_id)     { $fun = $max_lod     - $a1_exp * ($max_id  - $x) ** $m1; print PARAM "$x $fun\n"; }
	elsif ($x <  $half_id && $x >= $asyntote_id) { $fun = $lod_half_id - $a2_exp * ($half_id - $x) ** $m2; print PARAM "$x $fun\n"; }
	else                                         { $fun = $lod_asyntote_id;                                print PARAM "$x $fun\n"; }
	
    }
    close(PARAM);


}

sub cutoff_for_id {

    my ($param_curve, $id, $Nid, $kid) = @_;
    
    my $cutoff;
    my $x;
    my $y;

    open (PARAM, "$param_curve") || die;
    while(<PARAM>) {
	if (/^\#/) {
	    next;
	}
	if (/^(\S+)\s+(\S+)/) {
	    
	    $x = $1;
	    $y = $2;
	    
	    if ($id/$kid <= $x && $x < ($id+1)/$kid) { $cutoff = $y; last; }

	}
    }
    close(PARAM);
    

    return $cutoff;

}

sub extract_exp_fit_info {

    my ($fitlog, $plotfile, $m_ref, $a_ref) = @_;

    my $read = 0;

    open (FIT,"$fitlog") || die;
    while (<FIT>) {
	
	if (/^FIT:.+$plotfile/) { $read = 1; }
	elsif (/^FIT:/)         { $read = 0; }

	if    (/^m\s+=\s+(\S+)\s+\+\/\-\s+\S+\s+(\(\S+\%\))/)  { if ($read == 1) { $$m_ref  = $1.$2; } }
	elsif (/^a\s+=\s+(\S+)\s+\+\/\-\s+\S+\s+(\(\S+\%\))/)  { if ($read == 1) { $$a_ref  = $1.$2; } }
    }
    close (FIT);
 
}

sub evalue {

    my ($score, $size, $lambda, $mu) = @_; 

    my $evalue = -1;

    if ($lambda > 0) { 

	$evalue = 1.0 - exp(-exp(-$lambda*($score-$mu)));

	$evalue *= $size; 
    }

    return $evalue;
}

sub  fit_half_exp {

    my ($param_file, $a_exp_ref, $a_err_ref, $m_ref, $m_err_ref, $max_x, $min_x, $max_y) = @_;
    
    my $m;
    my $m_err;
    my $a;
    my $a_err;

    my $x;
    my $y;
    
    my $y_min_x;

    open (FO,">foo") || die;
    open (FILE,"$param_file") || die;
    while(<FILE>) {
	
	if (/^(\S+)\s+(\S+)/) {
	    $x = $1;
	    $y = $2;
	    
	    if ($y <= $max_y && $x < $max_x && $x >= $min_x) {
		printf(FO "%.4f %.4f\n", log($max_x-$x), log($max_y-$y));
	    }
	    
	}
    }
    close (FILE);
    close(FO);
    
    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";
    print GP "fe(x) = m*x + a\n"; 
    print GP "fit fe(x) 'foo' using 1:2  via m,a \n";
    close (GP);
    
    extract_exp_fit_info ("fit.log", "foo", \$m, \$a);
    
    $m =~ /(\S+)(\(\S+\%\))/;  $m = $1; $m_err = $2;
    $a =~ /(\S+)(\(\S+\%\))/;  $a = $1; $a_err = $2;
    
    my $a_exp = exp ($a);
    
    $a_exp *= 1000;
    $a_exp = int $a_exp;
    $a_exp /= 1000;

    $m *= 1000;
    $m = int $m;
    $m /= 1000;

    $$a_exp_ref = $a_exp;
    $$a_err_ref = $a_err;

    $$m_ref = $m;
    $$m_err_ref = $m_err;

    system ("rm 'fit.log'\n"); 
    system("rm foo\n");

    $y_min_x = $max_y - $a_exp*($max_x-$min_x)**$m;
    $y_min_x *= 1000;
    $y_min_x = int $y_min_x;
    $y_min_x /= 1000;
     
   return $y_min_x
}


sub get_ali_from_qfile {

    my ($qfile, $ali_n, $win_n, $name1, $name2, $ali1_ref, $ali2_ref) = @_; 

    my $idx = 0;
    my $ali1;
    my $ali2;

    my $flag1 = 0;
    my $flag2 = 0;

    my $name;

    open (QFILE,"$qfile") || die;
    while (<QFILE>) {

	if (/^>(\S+)/ && $idx == 0) 
	{ 
	    $name = $1;

	    if ($flag1 == 1 && $flag2 == 1) { last; }

	    $ali1 = "";
	    $ali2 = "";
	    $flag1 = 0;
	    $flag2 = 0;

	    if ($name1 =~ $name) { $flag1 = 1; }
	    $idx = 1;
	}

	elsif (/^[^\>]/ && $idx == 1) 
	{
	    if ($flag1 == 1) { $ali1 .= $_; }

	}

	elsif (/^>(\S+)/ && $idx == 1) 
	{ 
	    $name = $1;
	    if ($name2 =~ $name) { $flag2 = 1; }
	    else                 { $flag1 = 0; $ali1 = ""; }
	    $idx = 0;
	}
	
	elsif (/^[^\>]/ && $idx == 0) 
	{
	    if ($flag1 == 1 && $flag2 == 1) { $ali2 .= $_; }
	}

    }
    close (QFILE);
   
    if ($flag1 == 0 || $flag2 == 0) {
	print "I coudnt identify this alignment  ($flag1,$flag2) ali_n $ali_n win_n $win_n.\n$name1\n$name2\n"; die; 
    }

    $$ali1_ref = $ali1;
    $$ali2_ref = $ali2;
}

sub get_gc_from_ali {
    my ($gc_ref, $motif_lend, $motif_rend, $ali1, $ali2) = @_;
    
    my $gc = 0;
    
    my $seq1 = $ali1;
    my $seq2 = $ali2;
    
    $seq1 =~ s/\n//g; $seq2 =~ s/\n//g;
    my $len_ali = length($seq1);
    if (length($seq2) != $len_ali) { print "bad len ali $len_ali but len_ali1", length($seq2), " \n"; die; }
    #print "len_ali $len_ali\n";

    my $len_frag = $motif_rend - $motif_lend + 1;
    #print "motif_lend $motif_lend motif_rend $motif_rend len_frag $len_frag\n";
	     
    #print "seq1$seq1\n";
    my $remove_left  = $motif_lend;
    my $remove_right = $len_ali-$motif_rend-1;

    $seq1 =~ s/^.{$remove_left}//;
    $seq2 =~ s/^.{$remove_left}//;
    
    $seq1 =~ s/(.{$remove_right})$//;
    $seq2 =~ s/.{$remove_right}$//;
    
    if (length($seq1) != $len_frag) { print "bad len $len_frag but len_seq1 ", length($seq1), " \n"; die; }
    if (length($seq2) != $len_frag) { print "bad len $len_frag but len_seq2 ", length($seq2), " \n"; die; }
   
    $seq1 =~ s/\-//g; $seq2 =~ s/\-//g;
    $seq1 =~ s/\.//g; $seq2 =~ s/\.//g;
    $seq1 =~ s/\_//g; $seq2 =~ s/\_//g;

    my $len = length($seq1) + length($seq2);
    
    $gc += ($seq1 =~ tr/GC/GC/);
    $gc += ($seq2 =~ tr/GC/GC/);

    $$gc_ref = 100*$gc/$len;

}

sub get_coords_from_ali {

    my ($nt_seq, $name, $startblast, $startfrag, $ali, $ali_lend, $ali_rend, $coorl_ref, $coorr_ref) = @_;

    my $cali = $ali;
    my $coorl = -1;
    my $coorr = -1;

    my $prev_ali;
    my $prev;
    my $post;

    if ($ali_lend == $ali_rend) { $$coorl_ref = 0; $$coorr_ref = 0; return; }

    $cali =~ s/\s+//g; $cali =~ s/\n//g;

    my $len_ali = length($cali);

    #paranoia test
    #
    if ($ali_lend >= $len_ali ||
	$ali_rend >= $len_ali   ) { 
	print "bad motif? lend=$ali_lend rend=$ali_rend total=$len_ali\n$nt_seq>ali=$name startblast $startblast startfraf $startfrag\n$ali\n"; 
	die; 
    }

    $prev = $ali_lend;
    $post = $len_ali - $ali_rend - 1;

    if ($post < 0) { print "wrong ends? len_ali=$len_ali $ali_lend $ali_rend \n"; die; }

    if ($prev > 0) {
	$cali =~ s/^(.{$prev})//;
	
	$prev_ali = $1;
	$prev_ali =~ s/\-//g;
	$prev_ali =~ s/\.//g;
	$coorl= length($prev_ali);
    }
    else {
	$coorl = 0;
    }


    $cali =~ s/.{$post}$//;
    $cali =~ s/\-//g;
    $cali =~ s/\.//g;

    if (length($cali) == 0) { $coorr = $coorl;                     }
    else                    { $coorr = $coorl + length($cali) - 1; }

    $$coorl_ref = $coorl;
    $$coorr_ref = $coorr;
}



sub identify_loci {
    my ($target, $ntseq, $overlap, 
	$startblast1, $startfrag1, $name1, $coor1, $ali1,
	$startblast2, $startfrag2, $name2, $coor2, $ali2,
	$type, $motif_lend, $motif_rend, $oth, $cod, $rna, 
	$nloci_ref, $name_ref, $lloci_ref, $rloci_ref, 
	$type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref)  = @_;

    my $l;
    my $coor1l;
    my $coor1r;
    my $coor2l;
    my $coor2r;
    my $startwin1;
    my $startwin2;
    my $endwin1;
    my $endwin2;
    my $new = 1;
    my $nloci;
    my $abs;
    my $name1_quote;
    my $name2_quote;
    my $rest;
    
    $nloci = $$nloci_ref;

    #print "\n>>$ntseq $name1\nCOOR1:$coor1\nSTARTBLAST1:$startblast1\nSTARTFRAG1:$startfrag1\n$motif_lend $motif_rend\n\n";
    #print "$ali1\n$ali2\n\n";

    if ($opt_q) {
	get_coords_from_ali ($ntseq, $name1, $startblast1, $startfrag1, $ali1, $motif_lend, $motif_rend, \$coor1l, \$coor1r);
	get_coords_from_ali ($ntseq, $name2, $startblast2, $startfrag2, $ali2, $motif_lend, $motif_rend, \$coor2l, \$coor2r);
    }
    else {
	#remember conventions for qrna output:
	#
	#      posX: 0-62 [0-59](60) 
	#
	# is an alignment of 63 positions with 3 gaps. 
	# So the actual positions are from 0 to 59 not to 62.
	#
	#
	if ($coor1 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) { $coor1l = $1; $coor1r = $2; }
	if ($coor2 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) { $coor2l = $1; $coor2r = $2; }
    }
    #print "coor1l $coor1l coor1r $coor1r\n";
    #print "coor2l $coor2l coor2r $coor2r\n";

    #get the ENDS depending on the strand
    #

    if ($startblast1 =~ /(\d+)>(\d+)/) {
	$startwin1 = $1 + $coor1l;
	$endwin1   = $1 + $coor1r;
    }
    elsif ($startblast1 =~ /(\d+)<(\d+)/) {
	$startwin1 = $2 - $coor1r;
	$endwin1   = $2 - $coor1l;
    }
    else { print "identify_loci(): fasta name has to indicate strand (1): $startblast1\n"; die; }

    if ($startblast2 =~ /(\d+)>(\d+)/) {
	$startwin2 = $1 + $coor2l;
	$endwin2   = $1 + $coor2r;
    }
    elsif ($startblast2 =~ /(\d+)<(\d+)/) {
	$startwin2 = $2 - $coor2r;
	$endwin2   = $2 - $coor2l;
    }
    else { print "identify_loci(): fasta name has to indicate strand (2): $startblast2\n"; die; }

    $startwin1 += $startfrag1;
    $startwin2 += $startfrag2;
    $endwin1   += $startfrag1;
    $endwin2   += $startfrag2;

    if (0) {
	print "\name  $name1\n";
	print "startblast  $startblast1\n";
	print "startfrag  $startfrag1\n";
	print "coor  $coor1\n";
	print "motif  $motif_lend $motif_rend\n";
	#print "ali  $ali1\n";
	print "startwin1 $startwin1 endwin1 $endwin1\n";
    }
    
   #paranoia
    if ( ($startwin1 > $endwin1) || $startwin1 < 0 || $endwin1 < 0 ) { 
	print "1:got ends of the window wrong $coor1l $coor1r $startblast1 $startwin1 $endwin1 \n"; die; 
    }
    if ( ($startwin2 > $endwin2) || $startwin2 < 0 || $endwin2 < 0 ) { 
	print "2:got ends of the window wrong $coor2l $coor2r $startblast2 $startwin2 $endwin2 \n"; die; 
    }
    
    # regex metacharacters: \ | ( ) [ { ^ $ * + ? .
    #
    $name1_quote = quotemeta $name1;
    $name2_quote = quotemeta $name2;
   
    $name1 =~ s/\\//g;
    if ($name1 =~ /(\S+)[\-\:](\d+)\-(\d+)/ || $name1 =~ /(\S+)\/frag.+\/(\d+)\-(\d+)/) { 
	$name1 = $1;
	if ($2 < $3) { $abs = $2-1; }
	else         { $abs = $3-1; }
	$startwin1 += $abs; $endwin1 += $abs;

    }

    $name2 =~ s/\\//g;
    if ($name2 =~ /(\S+)[\-\:](\d+)\-(\d+)/ || $name2 =~ /(\S+)\/frag.+\/(\d+)\-(\d+)/) { 
	$name2 = $1;
	if ($2 < $3) { $abs = $2-1; }
	else         { $abs = $3-1; }
	$startwin2 += $abs; $endwin2 += $abs;

    }

    # check
    #
    if ($name1 =~ /\S+[\-\:](\d+)[\>\<](\d+)/) {
	my $x1 = $1;
	my $x2 = $2;
	if ($endwin1-$startwin1 > $x2-$x1) { print "bad locus $name1 $startwin1 $endwin1\n"; die; }

    }
    if ($name2 =~ /\S+[\-\:](\d+)[\>\<](\d+)/) {
	my $x1 = $1;
	my $x2 = $2;
	if ($endwin2-$startwin2 > $x2-$x1) { print "bad locus $name2 $startwin2 $endwin2\n"; die; }

    }

   #start the first locus
    if ($nloci == 0 && $type =~ /^$target$/) {
	$name_ref->[$nloci]    = $name1;
	$lloci_ref->[$nloci]   = $startwin1;
	$rloci_ref->[$nloci]   = $endwin1;
	$type_ref->[$nloci]    = $type;
	$towhom_ref->[$nloci]  = $name2."-".$startwin2."-".$endwin2.":";
	$howmany_ref->[$nloci] = 1;
	$othsc_ref->[$nloci]   = $oth;
	$codsc_ref->[$nloci]   = $cod;
	$rnasc_ref->[$nloci]   = $rna;
	$nloci++;

    }
    
    elsif ($nloci > 0) {

	#identify if this is a new locus
	for ($l = 0; $l < $nloci; $l++) {
	    my $name_ref_quote = quotemeta $name_ref->[$l];
	    
	    #an already existing locus, modify the ends if convenient  ---  
	    if (
		#$name1_quote eq $name_ref_quote             &&
		$name1 eq $name_ref->[$l]             &&
		$type       =~ /^$type_ref->[$l]$/          && 
		overlap ($lloci_ref->[$l], $rloci_ref->[$l], $startwin1, $endwin1, $overlap) == 1)
	    {
		#modify ends
		if($startwin1 < $lloci_ref->[$l]) { $lloci_ref->[$l] = $startwin1; }
		if($endwin1   > $rloci_ref->[$l]) { $rloci_ref->[$l] = $endwin1;   }
		
		#
		#check
		#
		if ($name1 =~ /\S+[\-\:](\d+)[\>\<](\d+)/) {
		    my $x1 = $1;
		    my $x2 = $2;
		    if ($rloci_ref->[$l]-$lloci_ref->[$l] > $x2-$x1) { print "bad locus $name1_quote $lloci_ref->[$l] $rloci_ref->[$l]\n"; die; }
		    
		}

		$towhom_ref->[$l]  .= $name2."-".$startwin2."-".$endwin2.":";
		$howmany_ref->[$l] ++;
		$othsc_ref->[$l]   += $oth;
		$codsc_ref->[$l]   += $cod;
		$rnasc_ref->[$l]   += $rna;
		$new = 0;

		last;

	    }

	}
	
	if ($new == 1 && $type =~ /^$target$/) {
	    $name_ref->[$nloci]    = $name1;
	    $lloci_ref->[$nloci]   = $startwin1;
	    $rloci_ref->[$nloci]   = $endwin1;
	    $type_ref->[$nloci]    = $type;
	    $towhom_ref->[$nloci]  = $name2."-".$startwin2."-".$endwin2.":";
	    $howmany_ref->[$nloci] = 1;
	    $othsc_ref->[$nloci]   = $oth;
	    $codsc_ref->[$nloci]   = $cod;
	    $rnasc_ref->[$nloci]   = $rna;

	    $nloci++;
	}
    }

    $$nloci_ref = $nloci;
}

sub is_above_cutoff {
    my ($cutoff, $type_ref, $codlod, $rnalod, $noseq_ref, $ncseq_ref, $nrseq_ref, $ncoseq_ref, $nroseq_ref, $nrcseq_ref, $n3seq_ref) = @_;

    my $above;
    my $sqrt2 = sqrt(2.0);
    my $log2 = 0.69314718056;

   if ($codlod <= $cutoff && $codlod >= -$cutoff &&
	$rnalod <= $cutoff && $rnalod >= -$cutoff) { 
	$$n3seq_ref++; $$type_ref = 'RNA/COD/OTH'; $above = 0; 
    }
    elsif ($codlod < -$cutoff &&
	   $rnalod <= $cutoff && $rnalod >= -$cutoff) { 
	$$nroseq_ref++; $$type_ref = 'RNA/OTH'; $above = 0; 
    }
    elsif ($rnalod < -$cutoff &&
	   $codlod <= $cutoff && $codlod >= -$cutoff) { 
	$$ncoseq_ref++; $$type_ref = 'COD/OTH'; $above = 0; 
    }
    elsif ($rnalod <= $codlod+$sqrt2*$cutoff && $rnalod >= $codlod-$sqrt2*$cutoff) { 
	$$nrcseq_ref++; $$type_ref = 'RNA/COD'; $above = 0; 
    }
    elsif ($rnalod < -$cutoff && $codlod < -$cutoff) { 
	$$noseq_ref++; $$type_ref = 'OTH';     $above = 1; 
    } 
    elsif ($codlod > $cutoff && $rnalod < $codlod-$sqrt2*$cutoff) { 
	$$ncseq_ref++; $$type_ref = 'COD';     $above = 1; 
    } 
    elsif ($rnalod > $cutoff && $codlod < $rnalod-$sqrt2*$cutoff) { 
	$$nrseq_ref++; $$type_ref = 'RNA';     $above = 1; 
    } 
    else { print "unknown type for (rnasc, codsc) = (", $rnalod, ", $codlod)\n"; }

    return $above;
}

sub is_above_cutoff_sigmoidal {
    my ($cutoff_ref, $gc_min_cutoff_ref, $gc_max_cutoff_ref, $type_ref, $gc, $othlodsigm, $codlodsigm, $rnalodsigm, $noseq_ref, $ncseq_ref, $nrseq_ref) = @_;

    my $above;
  
    foreach my $cut (@$cutoff_ref) {

	if ($gc >= $gc_min_cutoff_ref->{$cut} && $gc < $gc_max_cutoff_ref->{$cut}) {
	    if    ($codlodsigm >= $cut) { $$ncseq_ref++; $$type_ref = 'COD'; $above = 1; }
	    elsif ($rnalodsigm >= $cut) { $$nrseq_ref++; $$type_ref = 'RNA'; $above = 1; }
	    elsif ($othlodsigm >= $cut) { $$noseq_ref++; $$type_ref = 'OTH'; $above = 1; }
	    else                        { $$type_ref = '';    $above = 0; }
	}
	else  { $$type_ref = '';    $above = 0; }
    }
    
    return $above;
}

sub is_above_cutoff_rnaz {
    my ($cutoff, $type_ref, $rnasc, $nrseq_ref) = @_;

    my $above;

    if ($rnasc >= $cutoff) { $$nrseq_ref++;   $above = 1; $$type_ref = 'RNA'; }
    else                   { $$type_ref = ''; $above = 0; }

    return $above;
}

sub is_hit_in_locus {
    my ($name_hit, $start_hit, $end_hit, $name_loci, $start_loci, $end_loci) = @_;

    my $is = 0;

    if ($name_hit  eq $name_loci  && 
	$start_hit >= $start_loci && 
	$end_hit   <= $end_loci     )        { $is = 1; }
    
    return $is;
}

sub loci_left_to_right {

    my $la;
    my $lb;
    my $ra;
    my $rb;
    my $ta;
    my $tb;

    $a =~ /(\d+)\-(\d+)(\S+)/;
    $la = $1; $ra = $2; $ta = $3;

    $b =~ /(\d+)\-(\d+)(\S+)/;
    $lb = $1; $rb = $2; $tb = $3;

    ($la <=> $lb) || ($ra <=> $rb) || ($ta cmp $tb);
}



sub overlap {
    my ($lend_loci, $rend_loci, $lend, $rend, $overlap)  = @_;

    my $is_same_loci = 0;

    if ($lend >= $lend_loci && $rend <= $rend_loci) { $is_same_loci = 1; } #is included
    if ($lend <= $lend_loci && $rend >= $rend_loci) { $is_same_loci = 1; } #extends over

    if ($rend < $rend_loci && $rend >= $lend_loci+$overlap)  { $is_same_loci = 1; } #left-end overlap
    if ($lend > $lend_loci && $lend <= $rend_loci-$overlap)  { $is_same_loci = 1; } #right-end overlap


    return $is_same_loci;

}

sub phase_count_target_rnaz {

    my ($target) = @_;

    my $coor1;
    my $coor2;
    
    my $othsc;
    my $rnasc;
    my $codsc;
    
    my $name1;
    my $name2;
    
    my $whole_name1;
    my $whole_name2;

    my $ali1;
    my $ali2;

    my $seq1;
    my $seq2;
    
    my @name1;
    my @name2;
    my $rest;
    
    my @lloci1;
    my @lloci2;
    
    my @rloci1;
    my @rloci2;
    
    my @type1;
    my @type2;
    
    my @howmany1;
    my @howmany2;
    
    my @othsc1;
    my @othsc2;
    
    my @codsc1;
    my @codsc2;
    
    my @rnasc1;
    my @rnasc2;
    
    my @towhom1;
    my @towhom2;
    
    my $startblast1;
    my $startblast2;

    my $startfrag1;
    my $startfrag2;
        
    my $nnamesseq = 0;   # total number of blast hits 
    my $ntseq = 0;       # total number of windows
    my $nloci1 = 0;      # total number of independent loci
    my $nloci2 = 0;      # total number of independent loci
    
    my $nrcseq  = 0; # number of seq in transition RNA/COD
    my $nroseq  = 0; # number of seq in transition RNA/OTH
    my $ncoseq  = 0; # number of seq in transition COD/OTH
    my $n3seq   = 0; # number of seq in transition RNA/COD/OTH
    my $nundseq;     # number of seqs in transitions
    # $nundseq = nrcseq + $nroseq + $ncoseq + $n3seq
    
    my $nrseq     = 0; # number of seq in RNA phase
    my $ncseq     = 0; # number of seq in COD phase
    my $noseq     = 0; # number of seq in OTH phase
    my $nphaseseq;     # number of seq that belong to full phases
    # $nphaseseq = $nundseq + $nrseq + $ncseq + $noseq
    
    my $codpos = 0; # number of windows with cod_lodsc > 0
    my $rnapos = 0; # number of windows with rna_lodsc > 0
    
    my $codcut = 0; # number of windows with cod_lodsc > cutoff
    my $rnacut = 0; # number of windows with rna_lodsc > cutoff
    
    my $seq = 0;
    
    my $idx = 0;
    
    my $len;
    my $len_new;
    my $id;
    my $id_new;
    my $gc;
    my $gc_new;
    
    my $time = -1;
    
    my $Nid = 100;
    my $kid = 1/1;
    
    my $motif_lend;
    my $motif_rend;

    my $total_len;

    my $new = 0;

    open (FILE,"$file") || die;
    while (<FILE>) {
	if  (/^\:\:\:/)
	{
	    $new = 1;
	}
	elsif  (/^\>(\S+)/ && $seq == 0 && $new == 1)
	{
	    $whole_name1 = $1;
	    $whole_name1 =~ s/\\//g;
	    
	    if ($whole_name1 =~ /^(\S+)[\-\:](\d+[><]\d+)\-(.*)$/) { 
		$name1 = $1;
		$startblast1 = $2; 
		$rest = $3;
		
		$startfrag1 = 0;
		if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name1  = $1; 
		    $startfrag1 = $2;
		    $name1 =~ s/\\//g;
		    
		    if ($startfrag1 =~ /(\S+)\-\S+/) {
			$startfrag1 = $1;
			$startfrag1 =~ s/\///g; $startfrag1 =~ s/\\//g; 
			$startfrag1 --;
			
		    }
		}
		if ($rest =~ /^\-(\S+)\-(\S+)/) { $motif_lend = $1-1; $motif_rend = $2-1; $coor1 = "$1\-$2"; }
	    }
	    $seq = 1; 
	    $nnamesseq++; 
	    
	}
	elsif  (/^\>(\S+)/ && $seq == 1 && $new == 1)
	{
	    
	    $whole_name2 = $1;
	    $whole_name2 =~ s/\\//g;
	    
	    if ($whole_name2 =~ /^(\S+)[\-\:](\d+[><]\d+)\-(.*)$/) { 
		$name2 = $1;
		$startblast2 = $2;
		$rest = $3;
		
		$startfrag2 = 0;
		if ($name2 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name2 = $1; 
		    $startfrag2 = $2;
		    $name2 =~ s/\\//g;
		    
		    if ($startfrag2 =~ /(\S+)\-\S+/) {
			$startfrag2 = $1;
			$startfrag2 =~ s/\///g; $startfrag2 =~ s/\\//g; 
			$startfrag2 --;
			
		    }
		}
		if ($rest =~ /^\-(\S+\-\S+)/) { $coor2 = $1; }
	    }
	    
	    $seq = 0; 
	    $nnamesseq++; 
	    
	    if ($opt_q) { 
		get_ali_from_qfile ($file_q, $nnamesseq*0.5, $ntseq, $whole_name1, $whole_name2, \$ali1, \$ali2); 
		get_gc_from_ali (\$gc, $motif_lend, $motif_rend, $ali1, $ali2);
	    }
	    else        { 
		$ali1 = ""; $ali2 = ""; 
	    }
	    
	    $new = 0;
	}
	elsif  (/Mean pairwise identity:\s+(\S+)/)
	{
	    $id  = $1;
	    if ($id >= $id_min && $id <= $id_max && $gc >= $gc_min && $gc <= $gc_max) { $ntseq ++; }
	}
	elsif (/SVM RNA-class probability:\s+(\S+)/) {
	    $rnasc = $1;
	    $othsc = 0.0; 
	    $codsc = 0.0;  
	    
	    if (0) {
		print "\nwhole name  $whole_name1\n";
		print "name  $name1\n";
		print "startblast  $startblast1\n";
		print "startfrag  $startfrag1\n";
		print "coor  $coor1\n";
		print "motif  $motif_lend $motif_rend\n";
		print "id  $id gc $gc\n";
	    }

	    analyze_window ($target, $ntseq, $overlap, 
			    \$noseq, \$ncseq, \$nrseq, \$ncoseq, \$nroseq, \$nrcseq, \$n3seq, \$codpos, \$rnapos, \$codcut, \$rnacut,
			    $id, $id_min, $id_max, $gc, $gc_min, $gc_max,
			    $startblast1, $startfrag1, $name1, $coor1, $ali1,
			    $startblast2, $startfrag2, $name2, $coor2, $ali2,
			    "rnaz", $motif_lend, $motif_rend, $othsc, $codsc, $rnasc,
			    \$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1,
			    \$nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	    
	    $motif_lend = 0;
	    $motif_rend = 0;
	}
	
	else  { next; }
    }

    close (FILE);
    
    print OUT "\n---------------Some General Statistics-------------------\n";
    print OUT "FILE:            \t", "$file\n";
    if (!$opt_z) { print OUT "Cutoff:          \t", "$fix_cutoff\n\n"; }
    else         { print OUT "Cutoff:          \t", "cutoff variable\n\n"; }
    foreach my $cut (@cutoff) {
	print OUT "gc:[$gc_min_cutoff{$cut}:$gc_max_cutoff{$cut}) ** cutoff = $cut\n";
    }
    print OUT "min id:          \t", "$id_min\n";
    print OUT "max id:          \t", "$id_max\n";
    print OUT "min gc:          \t", "$gc_min\n";
    print OUT "max gc:          \t", "$gc_max\n\n";
    if ($file_q) { print OUT "# inblastn hits: \t",  $n_in_ali ,"\n"; }
    print OUT "# blastn hits:   \t",  $nnamesseq/2.0 ,"\n";
    print OUT "# windows:       \t", "$ntseq\n";
    print OUT "---------------------------------------------------------\n";
    
    print OUT "\n---------------Statistics by Windows---------------------\n";
    print OUT "# windows:            \t", "$ntseq\n";
    print OUT "\nRNA>0:           \t", $rnapos,    "/", "$ntseq\n";
    print OUT "RNA>cutoff:       \t", $rnacut,    "/", "$ntseq\n\n";
    
    
    if (!$opt_N) {
#
#for QUERY
#
	if ($whichorg == 1 || $whichorg == 12 || $towhomness) {
	    if ($nloci1 > 0) {
		arrange_loci($org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
		recheck_loci(      \$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    }
	
	}
#
#for SUBJ
#
    if ($whichorg == 2 || $whichorg == 12 || $towhomness) {
	if ($nloci2 > 0) {
	    arrange_loci($org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	    recheck_loci(      \$nloci2, \@name2, \@lloci2, \@rloci2, \@type2,\ @towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
    }
	
	if ($whichorg == 1) 
	{
	    class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, 0,           $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
	elsif ($whichorg == 2) 
	{
	    class_stat("$outputfile1gff", $target, 0,           $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
	elsif ($whichorg == 12) 
	{
	    class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
    }
    
}


sub phase_count_target {
    
    my ($target) = @_;
    
    my $num = 0;
    
    my $coor1;
    my $coor2;
    
    my $othsc;
    my $rnasc;
    my $codsc;
    
    my $name1;
    my $name2;
    
    my $whole_name1;
    my $whole_name2;

    my $ali1;
    my $ali2;

    my $seq1;
    my $seq2;
    
    my @name1;
    my @name2;
    my $rest;
    
    my @lloci1;
    my @lloci2;
    
    my @rloci1;
    my @rloci2;
    
    my @type1;
    my @type2;
    
    my @howmany1;
    my @howmany2;
    
    my @othsc1;
    my @othsc2;
    
    my @codsc1;
    my @codsc2;
    
    my @rnasc1;
    my @rnasc2;
    
    my @towhom1;
    my @towhom2;
    
    my $startblast1;
    my $startblast2;

    my $startfrag1;
    my $startfrag2;
        
    my $nnamesseq = 0;   # total number of blast hits 
    my $ntseq = 0;       # total number of windows
    my $nloci1 = 0;      # total number of independent loci
    my $nloci2 = 0;      # total number of independent loci
    
    my $nrcseq  = 0; # number of seq in transition RNA/COD
    my $nroseq  = 0; # number of seq in transition RNA/OTH
    my $ncoseq  = 0; # number of seq in transition COD/OTH
    my $n3seq   = 0; # number of seq in transition RNA/COD/OTH
    my $nundseq;     # number of seqs in transitions
    # $nundseq = nrcseq + $nroseq + $ncoseq + $n3seq
    
    my $nrseq     = 0; # number of seq in RNA phase
    my $ncseq     = 0; # number of seq in COD phase
    my $noseq     = 0; # number of seq in OTH phase
    my $nphaseseq;     # number of seq that belong to full phases
    # $nphaseseq = $nundseq + $nrseq + $ncseq + $noseq
    
    my $codpos = 0; # number of windows with cod_lodsc > 0
    my $rnapos = 0; # number of windows with rna_lodsc > 0
    
    my $codcut = 0; # number of windows with cod_lodsc > cutoff
    my $rnacut = 0; # number of windows with rna_lodsc > cutoff
    
    my $seq = 0;
    
    my $idx = 0;
    
    my $len;
    my $len_new;
    my $id;
    my $id_new;
    my $gc;
    my $gc_new;
    
    my $time = -1;
    
    my $Nid = 100;
    my $kid = 1/1;
    
    my $motif_lend;
    my $motif_rend;

    my $total_len;

    open (FILE,"$file") || die;
    while (<FILE>) {
	
	if (/^Divergence time \(\S+\):\s+(\S+)/) {
	    $time = $1;
	}

	elsif (/^length of whole alignment after removing common gaps: (\d+)/) { 
	    $total_len = $1;
	}

	elsif (/^>(\S+)/ && $seq == 0) { 

	    $whole_name1 = $1;
	    $whole_name1 =~ s/\\//g;

	    if ($whole_name1 =~ /^(\S+)[\-\:](\d+[><]\d+)\-(.*)$/) { 
		$name1 = $1;
		$startblast1 = $2; 
		$rest = $3;
		
		$startfrag1 = 0;
		if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name1  = $1; 
		    $startfrag1 = $2;
		    $name1 =~ s/\\//g;
		    
		if ($startfrag1 =~ /(\S+)\-\S+/) {
		    $startfrag1 = $1;
		    $startfrag1 =~ s/\///g; $startfrag1 =~ s/\\//g; 
		    $startfrag1 --;
		    
		}
		}
		if ($rest =~ /^(\S+)/) { $name1 .= "\-$1"; }
	    }
	    $seq = 1; 
	    $nnamesseq++; 

	}
	
	elsif (/^>(\S+)/ && $seq == 1) { 

	    $whole_name2 = $1;
	    $whole_name2 =~ s/\\//g;

	    if ($whole_name2 =~ /^(\S+)[\-\:](\d+[><]\d+)\-(.*)$/) { 
		$name2 = $1;
		$startblast2 = $2;
		$rest = $3;
		
		$startfrag2 = 0;
		if ($name2 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name2 = $1; 
		    $startfrag2 = $2;
		    $name2 =~ s/\\//g;
		    
		    if ($startfrag2 =~ /(\S+)\-\S+/) {
			$startfrag2 = $1;
			$startfrag2 =~ s/\///g; $startfrag2 =~ s/\\//g; 
			$startfrag2 --;
			
		    }
		}
		if ($rest =~ /^(\S+)/) { $name2 .= "\-$1"; }
	    }
	    
	    $seq = 0; 
	    $nnamesseq++; 

	    if ($opt_q) { get_ali_from_qfile ($file_q, $nnamesseq*0.5, $ntseq, $whole_name1, $whole_name2, \$ali1, \$ali2); }
	    else        { $ali1 = ""; $ali2 = ""; }
	} 
	
	elsif (/^length alignment:\s+(\S+) \(id=(\d+\.\d+)\)/) { 
	    $len = $1;
	    $id  = $2;
	}
		
	elsif (/^posX: (.+)$/ ) { 
	    $coor1 = $1;

	    if ($coor1 =~ /\(\S+\s(\S+)\s(\S+)\s\S+\)/) { $gc = $1 + $2; }
	} 
	
	elsif (/^posY: (.+)$/) { 
	    $coor2 = $1;

	    if ($coor2 =~ /\(\S+\s(\S+)\s(\S+)\s\S+\)/) { $gc += $1 + $2; $gc *= 50; }
	    	    
	    if ($id >= $id_min && $id <= $id_max && $gc >= $gc_min && $gc <= $gc_max) { $ntseq ++; }
	    
	} 
	
	elsif (/^$tag/) { $num = 1; }
	
	elsif (/^$target ends \*\([\-\+]\)\s+=\s+\((\d+)\.\.\[\d+\]\.\.(\d+)\)/) { 
	    $motif_lend = $1; 
	    $motif_rend = $2; 

	}

	#this is to accomodate version 1.2b now obsolete
	elsif (/^$target\s+ends = (\d+)\s+(\d+)/) { 
	    $motif_lend = $1; 
	    $motif_rend = $2; 

	    if ($motif_lend > $motif_rend) { switch(\$motif_lend, \$motif_rend); }

	    # there is a bug in reporting bugs of version 1.2b sometimes they reach the end of the
	    # sequence. At this stage, I'm not going to fix the bug, but deal with it.
	    if ($motif_lend <  0         ) { print "laggg $motif_lend $ntseq\n"; die; }
	    if ($motif_lend >= $total_len) { $motif_lend = $total_len-1; }
	    if ($motif_rend >= $total_len) { $motif_rend = $total_len-1; }
	}

	elsif (/^\s+ OTH = \s+(\S+)\s+ COD = \s+(\S+)\s+ RNA = \s+(\S+)/ && $num == 1) { 
	    $othsc = $1; 
	    $codsc = $2; 
	    $rnasc = $3; 
	    
	    if (0) {
		print "\nwhole name  $whole_name1\n";
		print "name  $name1\n";
		print "startblast  $startblast1\n";
		print "startfrag  $startfrag1\n";
		print "coor  $coor1\n";
		print "motif  $motif_lend $motif_rend\n";
	    }
	    
	    analyze_window ($target, $ntseq, $overlap, 
			    \$noseq, \$ncseq, \$nrseq, \$ncoseq, \$nroseq, \$nrcseq, \$n3seq, \$codpos, \$rnapos, \$codcut, \$rnacut,
			    $id, $id_min, $id_max, $gc, $gc_min, $gc_max,
			    $startblast1, $startfrag1, $name1, $coor1, $ali1,
			    $startblast2, $startfrag2, $name2, $coor2, $ali2,
			    $type_of_score, $motif_lend, $motif_rend, $othsc, $codsc, $rnasc,
			    \$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1,
			    \$nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);	    
	    
	    $num = 0;
	    $motif_lend = 0;
	    $motif_rend = 0;
	}
	
	else  { next; }
	
    }
    
    close (FILE);

    print OUT "\n---------------Some General Statistics-------------------\n";
    print OUT "FILE:            \t", "$file\n";
    print OUT "method:          \t", "$tag\n";
    if (!$opt_z ) { print OUT "Cutoff:          \t", "$fix_cutoff\n\n"; }
    else          { print OUT "Cutoff:          \t", "cutoff variable\n\n"; }
    foreach my $cut (@cutoff) {
	print OUT "gc:[$gc_min_cutoff{$cut}:$gc_max_cutoff{$cut}) ** cutoff = $cut\n";
	
    }
    print OUT "min id:          \t", "$id_min\n";
    print OUT "max id:          \t", "$id_max\n";
    print OUT "min gc:          \t", "$gc_min\n";
    print OUT "max gc:          \t", "$gc_max\n\n";
    if ($file_q) { print OUT "# inblastn hits: \t",  $n_in_ali ,"\n"; }
    print OUT "# blastn hits:   \t",  $nnamesseq/2.0 ,"\n";
    print OUT "# windows:       \t", "$ntseq\n";
    print OUT "---------------------------------------------------------\n";
    
    print OUT "\n---------------Statistics by Windows---------------------\n";
    print OUT "# windows:            \t", "$ntseq\n";
    print OUT "\nRNA>0:           \t", $rnapos,    "/", "$ntseq\n";
    print OUT "RNA>cutoff:       \t", $rnacut,    "/", "$ntseq\n\n";
    
    print OUT "COD>0:            \t", $codpos,    "/", "$ntseq\n";
    print OUT "COD>cutoff:       \t", $codcut,    "/", "$ntseq\n\n";
    
    $nphaseseq = $nrseq + $ncseq + $noseq;
    print OUT " in phases:        \t", $nphaseseq, "/", "$ntseq\n";
    print OUT "\tRNA:             \t", $nrseq,     "/", "$nphaseseq\n";
    print OUT "\tCOD:             \t", $ncseq,     "/", "$nphaseseq\n";
    print OUT "\tOTH:             \t", $noseq,     "/", "$nphaseseq\n\n";
    
    $nundseq   = $nrcseq + $nroseq + $ncoseq + $n3seq;
    print OUT " in transitions:   \t", $nundseq, "/", "$ntseq\n";
    print OUT "\tRNA/COD:         \t", $nrcseq,  "/", "$nundseq\n";
    print OUT "\tRNA/OTH:         \t", $nroseq,  "/", "$nundseq\n";
    print OUT "\tCOD/OTH:         \t", $ncoseq,  "/", "$nundseq\n";
    print OUT "\tRNA/COD/OTH:     \t", $n3seq,   "/", "$nundseq\n";
    print OUT "---------------------------------------------------------\n";
    

    if (!$opt_N) {
#
#for QUERY
#
	if ($whichorg == 1 || $whichorg == 12 || $towhomness) {
	    if ($nloci1 > 0) {
		arrange_loci($org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
		recheck_loci(      \$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    }
	    
	}
#
#for SUBJ
#
	if ($whichorg == 2 || $whichorg == 12 || $towhomness) {
	    if ($nloci2 > 0) {
		arrange_loci($org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
		recheck_loci(      \$nloci2, \@name2, \@lloci2, \@rloci2, \@type2,\ @towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	    }
	}
	
#clean up towhom after both sets of loci have been obtained
#
	if ($towhomness) 
	{
	    if ($whichorg == 1 || $whichorg == 12) { cleanup_towhom($nloci1, \@towhom1, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2); }
	    if ($whichorg == 2 || $whichorg == 12) { cleanup_towhom($nloci2, \@towhom2, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1); }
	}
	
	if ($whichorg == 1) 
	{
	    class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, 0,           $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
	elsif ($whichorg == 2) 
	{
	    class_stat("$outputfile1gff", $target, 0,           $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
	elsif ($whichorg == 12) 
	{
	    class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	}
	
    }

}

sub read_gc_cutoff_file {
    my ($gc_cutoff_file, $cutoff_ref, $gc_min_cutoff_ref, $gc_max_cutoff_ref, $lambda_ref, $mu_ref, $size_ref) = @_;

    my $x = 0;
    my $cut;
    my $min_gc;
    my $max_gc;

    my $lambda;
    my $mu;
    my $size;

    my $verbose = 0;

    if ($gc_cutoff_file) {
	open (GC,"$gc_cutoff_file") || die;
	while (<GC>) {
	    if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
		$cut = $1;
		$min_gc = $2;
		$max_gc = $3;
		$lambda = $4;
		$mu     = $5;
		$size   = $6;

		$cutoff_ref->[$x] = $cut;
		$gc_min_cutoff_ref->{$cut} = $min_gc;
		$gc_max_cutoff_ref->{$cut} = $max_gc;

		$lambda_ref->{$cut} = $lambda;
		$mu_ref->{$cut}     = $mu;
		$size_ref->{$cut}   = $size;

		$x++;
	    }
	}
	close (GC);
    }
    else {
	$cutoff_ref->[0] = $fix_cutoff;
	$gc_min_cutoff_ref->{$fix_cutoff} = 0;
	$gc_max_cutoff_ref->{$fix_cutoff} = 100;

	$lambda_ref->{$fix_cutoff} = $fix_lambda;
	$mu_ref->{$fix_cutoff}     = $fix_mu;
	$size_ref->{$fix_cutoff}   = $fix_size;
    }
   
    if ($verbose) {
	foreach my $cut (@cutoff) {
	    print "gc:[$gc_min_cutoff_ref->{$cut}:$gc_max_cutoff_ref->{$cut}) ** cutoff = $cut\n";
	    
	}
    }
}

# the reason for this subroutine is that ones we have identified the independent loci,
# we have to go back to check that those loci (which may have expanded their limits after being defined)
# do not overlap.

sub recheck_loci {
    my ($nloci_ref, $name_ref, $lloci_ref, $rloci_ref, $type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $k;
    my $sh;
    my $name;
    my $name_ref_quote;
    my $startloci;
    my $endloci;
    my $type;
    my $towhom;
    my $howmany;
    my $othsc;
    my $codsc;
    my $rnasc;
    my $nloci;

    my $verbose = 0;
    
    $nloci = $$nloci_ref;
    
    if ($verbose) {
	print "\n before arrange_loci\n"; 
	for ($l = 0; $l < $nloci; $l++) { 
	    print "BBLOCI:$l $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l]\n"; 
	}
    }

    for ($k = 0; $k < $nloci; $k++) {
	$name_ref_quote = quotemeta $name_ref->[$k];
	
	for ($l = $k+1; $l < $nloci; $l++) {
	    $name      = quotemeta $name_ref->[$l];
	    $startloci = $lloci_ref->[$l];
	    $endloci   = $rloci_ref->[$l];
	    $type      = $type_ref->[$l];
	    $towhom    = $towhom_ref->[$l];
	    $howmany   = $howmany_ref->[$l];
	    $othsc     = $othsc_ref->[$l];
	    $codsc     = $codsc_ref->[$l];
	    $rnasc     = $rnasc_ref->[$l];
	    
	    #an already existing locus, modify the ends if convenient
	    if ($name      eq $name_ref_quote            && 
		$type      =~ /^$type_ref->[$k]$/        &&		
		overlap ($lloci_ref->[$k], $rloci_ref->[$k], $startloci, $endloci, $overlap) == 1)
	    {
		#modify ends
		if($startloci < $lloci_ref->[$k]) { $lloci_ref->[$k] = $startloci; }
		if($endloci   > $rloci_ref->[$k]) { $rloci_ref->[$k] = $endloci;   }

		$towhom_ref->[$k]  .= $towhom;
		$howmany_ref->[$k] += $howmany;
		$othsc_ref->[$k]   += $othsc;
		$codsc_ref->[$k]   += $codsc;
		$rnasc_ref->[$k]   += $rnasc;
		
		for ($sh = $l; $sh < $nloci-1; $sh++) {
		    $name_ref->[$sh]    = $name_ref->[$sh+1];
		    $lloci_ref->[$sh]   = $lloci_ref->[$sh+1];
		    $rloci_ref->[$sh]   = $rloci_ref->[$sh+1];
		    $type_ref->[$sh]    = $type_ref->[$sh+1];
		    $towhom_ref->[$sh]  = $towhom_ref->[$sh+1];
		    $howmany_ref->[$sh] = $howmany_ref->[$sh+1];
		    $othsc_ref->[$sh]   = $othsc_ref->[$sh+1];
		    $codsc_ref->[$sh]   = $codsc_ref->[$sh+1];
		    $rnasc_ref->[$sh]   = $rnasc_ref->[$sh+1];

		}
		
		if ($l>0)        { $l --; }
		if ($k>0)        { $k --; }
		if ($k<$nloci-1) { $nloci --; }
	    }
	}
    }
    
    #finallly, normalize mean scores and calculate types.
    for ($l = 0; $l < $nloci; $l++) {	
	$codsc_ref->[$l] /= $howmany_ref->[$l];
	$rnasc_ref->[$l] /= $howmany_ref->[$l];

    }

    if ($verbose) {
	print "\n after arrange_loci\n"; 
	for ($l = 0; $l < $nloci; $l++) { 
	    print "AALOCI:$l $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l]\n"; 
	}
    }
    
    $$nloci_ref = $nloci;

}

sub switch {

    my ($x1_ref, $x2_ref) = @_;

    my $x1 = $$x1_ref;
    my $x2 = $$x2_ref;
    my $x;

    $x  = $x1;
    $x1 = $x2;
    $x2 = $x;

    $$x1_ref = $x1;
    $$x2_ref = $x2;

    if ($$x1_ref > $$x2_ref) { print "bad switch\n"; die; }
}
