#include "../../peptide_spectrum_score.h"
#include <math.h>
#include <time.h>

void FastaDB::update_protein_aas() {
  protein_aas.resize(protein_length.size());
  for(int ind = 0; ind < protein_length.size(); ind++) {
    // cout << protein_length.size() << " " << protein_start_location.size() << " " << ind << endl;
    for(int loc_ind = 0; loc_ind <protein_length[ind]; loc_ind++) {
      // cout << all_aa_seqs.size() << " " << protein_start_location[ind] << " " << loc_ind << endl;
      if(all_aa_seqs[loc_ind+protein_start_location[ind]]>=Ala && all_aa_seqs[loc_ind+protein_start_location[ind]]<=Val) 
        protein_aas[ind].push_back(all_aa_seqs[loc_ind+protein_start_location[ind]]);
      // cout << config->get_aa2char()[all_aa_seqs[loc_ind+protein_start_location[ind]] << endl;
    }
    protein_aas[ind].push_back(0);
  }
}

void FastaDB::update_protein_seqs() {
  protein_seqs.resize(protein_length.size());
  for(int ind = 0; ind < protein_length.size(); ind++) {
    // cout << protein_length.size() << " " << protein_start_location.size() << " " << ind << endl;
    for(int loc_ind = 0; loc_ind <protein_length[ind]; loc_ind++) {
      // cout << all_aa_seqs.size() << " " << protein_start_location[ind] << " " << loc_ind << endl;
      if(all_aa_seqs[loc_ind+protein_start_location[ind]]>=Ala && all_aa_seqs[loc_ind+protein_start_location[ind]]<=Val) 
        protein_seqs[ind].push_back(config->get_aa2char()[all_aa_seqs[loc_ind+protein_start_location[ind]]]);
      // cout << config->get_aa2char()[all_aa_seqs[loc_ind+protein_start_location[ind]] << endl;
    }
  }
}

void FastaDB::search_pattern(SearchPattern pat, SearchPatternRes& res) {
  res.clear();
  int total_ind = 0;
  cout << "num proteins : " << protein_aas.size() << endl;
  cout << "pattern size : " << pat.size() << endl;
  for(int prot_ind = 0; prot_ind < protein_aas.size(); prot_ind++) {
    for(int loc_ind = 0; loc_ind < protein_aas[prot_ind].size(); loc_ind++) {
      bool flag = true;
      for(int ind = 0; ind < pat.size(); ind++) {
        if(loc_ind+pat[ind].pos>=protein_aas[prot_ind].size() || loc_ind+pat[ind].pos <= 0 || protein_aas[prot_ind][loc_ind+pat[ind].pos] != pat[ind].aa)
          flag = false;
      }
      if(flag == true) {
        SearchPatternResSingle res_single;
        res_single.prot_ind = prot_ind;
        res_single.prot_name = protein_names[prot_ind];
        res_single.loc = loc_ind;
        int min_pos = loc_ind + res.min_pos;
        if(min_pos <= 0)
          min_pos = 1;
        int max_pos = loc_ind + res.max_pos;
        if(max_pos >= protein_seqs[prot_ind].size())
          max_pos = protein_seqs[prot_ind].size()-1;
        for(int ind = min_pos; ind < max_pos; ind++) {
          res_single.str.push_back(protein_seqs[prot_ind][ind]);
        }
	res_single.ind = total_ind;
        res.push_back(res_single);
	total_ind++;
      }
    }
  }
}

void SearchPatternResSingle::print(string out_file) {
  ofstream out;
  out.open(out_file.c_str());
  out << ind << " protein name : " << prot_name << " location : " << loc << " peptide : " << str << endl;
  out.close();
}

void SearchPatternResSingle::print() {
  cout << ind << " protein name : " << prot_name << " location : " << loc << " peptide : " << str << endl;
}

void SearchPatternRes::print(string out_file) {
  ofstream out;
  out.open(out_file.c_str());
  out << "num match : " << size() << endl;
  out.close();
  for(int ind = 0; ind < size(); ind++) {
    (*this)[ind].print(out_file);
  }
}

void SearchPatternRes::print() {
  cout << "num match : " << size() << endl;
  for(int ind = 0; ind < size(); ind++) {
    (*this)[ind].print();
  }
}


void PeptideCounterList::est_pval(AnnotatedSpectrumCounterList& spectrum_vector) {
  // cout << " Hello ! " << endl;
  for(int ind = 0; ind < top_hits.size(); ind++){
    top_hits[ind].set_config(config);
    vector<vector<StatVec> > stat_vec_vec;
    list<PeptideCounter>::iterator it;  
    int max_len = 0;
    int min_len = 100;

    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      if(it->get_length()>max_len)
        max_len = it->get_length();
      if(it->get_length()<min_len)
        min_len = it->get_length();
    }

    vector<vector<int> > max_score;
    vector<vector<int> > min_score;
    max_score.resize(max_len+1);
    min_score.resize(max_len+1);
    for(int len_ind = min_len; len_ind <= max_len; len_ind++) {
      max_score[len_ind].resize(scoring_methods.size());
      min_score[len_ind].resize(scoring_methods.size());
      for(int scoring_method_index = 0; scoring_method_index < scoring_methods.size(); scoring_method_index++) {
        max_score[len_ind][scoring_method_index] = 0;
        min_score[len_ind][scoring_method_index] = 1000;
      }
    }
    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      if(it->score > max_score[it->get_length()][it->get_scoring_method_index()]) {
        max_score[it->get_length()][it->get_scoring_method_index()] = it->score;
      }
      if(it->score < min_score[it->get_length()][it->get_scoring_method_index()]) {
        min_score[it->get_length()][it->get_scoring_method_index()] = it->score;
      }
    }

    stat_vec_vec.resize(max_len+1);
    for(int len_ind = min_len; len_ind <= max_len; len_ind++) {
      stat_vec_vec[len_ind].resize(scoring_methods.size());
      for(int scoring_method_index = 0; scoring_method_index < scoring_methods.size(); scoring_method_index++) {
        stat_vec_vec[len_ind][scoring_method_index].set_length(len_ind);
        stat_vec_vec[len_ind][scoring_method_index].set_method(scoring_methods[scoring_method_index]);
        stat_vec_vec[len_ind][scoring_method_index].set_acc_thresh(acc_thresh);
        stat_vec_vec[len_ind][scoring_method_index].set_offset(offset);
        stat_vec_vec[len_ind][scoring_method_index].set_max_score(max_score[len_ind][scoring_method_index]);
        stat_vec_vec[len_ind][scoring_method_index].set_min_score(0);
        // stat_vec_vec[len_ind][scoring_method_index].set_min_score(min_score[len_ind][scoring_method_index]);
        // cout << max_score[len_ind][scoring_method_index] << endl;
        // cout << min_score[len_ind][scoring_method_index] << endl;
        stat_vec_vec[len_ind][scoring_method_index].est_pval(spectrum_vector[top_hits[ind].spectrum_ind]);
      }
    }
    // cout << " Hello! " << endl;
   
    int pep_ind = 0;
    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      // cout << "yeah! " << pep_ind << endl;
      // cout << "score : " << it->score << endl;
      // cout << it->get_length() << endl;
      // cout << it->get_scoring_method_index() << endl;
      // cout << max_score[it->get_length()][it->get_scoring_method_index()] << endl;
      if(it->score <= max_score[it->get_length()][it->get_scoring_method_index()]) {
        it->pval_est = stat_vec_vec[it->get_length()][it->get_scoring_method_index()].pv[it->score];
	// cout << it->get_length() << " " << it->get_scoring_method_index() << " " << it->score << " " << it->pval_est << endl;
      }
      pep_ind++;
    }

  }
}

int StatVec::bring_score(AnnotatedSpectrumCounter& spectrum, Non_Linear_Peptide& peptide) {
  return peptide_spectrum_score(spectrum, peptide, spectrum.getCharge(), acc_thresh, offset, scoring_method, 6); // integer, divided by 0.9995
}

int mutate_peptide(Non_Linear_Peptide peptide, Non_Linear_Peptide& mutated_peptide) {

  // cout << "start : ";
  // peptide.print();

  int min_aa_mass = 57;
  mutated_peptide = peptide;
  int mut_pos, mut_pos_next;
  if(peptide.structure == 0) 
	mut_pos = floor(myRandom()*peptide.seq.size()-1);
  else
	mut_pos = floor(myRandom()*peptide.seq.size());
  if(peptide.structure == 1 && mut_pos == peptide.seq.size()-1)
	mut_pos_next = 0;
  else
	mut_pos_next = mut_pos+1;
  //int min_mass = (peptide.seq[mut_pos]<peptide.seq[mut_pos_next])?peptide.seq[mut_pos]:peptide.seq[mut_pos_next];
  int delta = floor(myRandom()*(peptide.seq[mut_pos]-min_aa_mass));
  mutated_peptide.seq[mut_pos] = mutated_peptide.seq[mut_pos] - delta;
  mutated_peptide.seq[mut_pos_next] = mutated_peptide.seq[mut_pos_next] + delta;
  // cout << " delta : " << delta << " pos : " << mut_pos << endl;
  // cout << "end : ";
  // mutated_peptide.print();
}

void matrix_mult(vector<vector<float> > m_1, vector<vector <float> > m_2, vector<vector<float> >& m) {
  int m_1_w = m_1.size();
  int m_1_l = 0;
  if(m_1_w>0)
    m_1_l = m_1[0].size();
  int m_2_w =m_2.size();
  int m_2_l = 0;
  if(m_2_w>0)
    m_2_l = m_2[0].size();
  if(m_1_l != m_2_w) {
    cout << "dimension mismatch!" << endl;
    return;
  }
  m.resize(m_1_w);
  for(int ind = 0; ind < m_1_w; ind++) {
    m[ind].resize(m_2_l);
    for(int second_ind = 0; second_ind < m_2_l; second_ind++) {
      m[ind][second_ind] = 0;
      for(int var_ind = 0; var_ind < m_1_l; var_ind++) {
        m[ind][second_ind] += m_1[ind][var_ind]*m_2[var_ind][second_ind];
      }
    }
  }
}


void matrix_pow(vector<vector<float> > m, vector<vector<float> >& pow_m, int power) {
  pow_m = m;
  for(int ind = 1; ind < power; ind++)
    matrix_mult(pow_m, m, pow_m);
}


void matrix_pow_pow(vector<vector<float> > m, vector<vector<float> >& pow_m, int pow_2) {
  pow_m = m;
  for(int ind = 0; ind < pow_2; ind++)
    matrix_mult(pow_m, pow_m, pow_m);
}


void StatVec::print_pv() {
  for(int ind = min_score; ind<max_score; ind++) {
    cout << "score : " << ind;
    if(pv_flag)
      cout << " p-value : " << pv[ind];
    if(pve_flag)
      cout << " empirical p-value : " << pv_e[ind];
    cout << endl;
  }
}

void StatVec::est_empirical_prob_trans(AnnotatedSpectrumCounter& spectrum) {
	
}

void StatVec::est_empirical_pval(AnnotatedSpectrumCounter& spectrum) {
  pve_flag = true;
  float scale_factor = 0.9995;
  int max_num_try_start = 1000000;
  int num_start = 100000;
  
  NLP_list nlp_vec;
  nlp_vec.clear();
  Non_Linear_Peptide nlp;
  int mass = int((spectrum.get_org_pm() - MASS_PROTON)*scale_factor);
  int nlp_ind = 0;
  int num_try_start = 0;
  vector<int> total_num;
  total_num.resize(max_score+1);

  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
  	total_num[score_ind] = 0;
  }

  while(nlp_ind < num_start && num_try_start < max_num_try_start) {
	  nlp.generate_random_nlp(len, mass);
	  nlp.score = bring_score(spectrum, nlp);
	  if(nlp.score <= max_score & nlp.score >= min_score) {
		  nlp_vec.push_back(nlp);
	          total_num[nlp.score]++;
		  nlp_ind++;
	  }
	  num_try_start++;
  }

  pv_e.resize(max_score+1);
  for(int ind = min_score; ind <= max_score; ind++) {
    pv_e[ind] = float(total_num[ind])/nlp_ind;
  }

}

void StatVec::est_pval(AnnotatedSpectrumCounter& spectrum) {
	pv_flag = true;
	float child_death_ratio = 0.01;
	float scale_factor = 0.9995;
	int child_limit = 1000;
	int trans_limit = 50;
	int num_rep = 10;
	int num_start = 10000;
	int matrix_power = 10;

  	vector<float> prob_trans_power_sum;
	vector< vector< vector <Non_Linear_Peptide> > > nlp_vec_vec;
	vector<bool> parent_death_flag;
	vector<bool> child_death_flag;
	nlp_vec_vec.resize(num_rep);
 	vector<vector<int> > trans;
	vector<int> total_trans;
        vector<vector<float> > prob_trans_power;
	trans.resize(max_score+1);
	total_trans.resize(max_score+1);
	prob_trans.resize(max_score+1);
	parent_death_flag.resize(max_score+1);
	child_death_flag.resize(max_score+1);
  	int mass = int((spectrum.get_org_pm() - MASS_PROTON)*scale_factor);

	nlp_vec_vec[0].resize(max_score+1);
	for(int score = min_score; score <= max_score; score++) {
		nlp_vec_vec[0][score].clear();
		total_trans[score] = 0;
		trans[score].resize(max_score+1);
		prob_trans[score].resize(max_score+1);
		for(int score_second = min_score; score_second < max_score; score_second++) {
			trans[score][score_second] = 0;
		}
		parent_death_flag[score] = false;
		child_death_flag[score] = false;
	}
	// cout << "len : " << len << " mass : " << mass << endl;
	for(int ind = 0; ind < num_start; ind++) {
		Non_Linear_Peptide nlp;
		nlp.generate_random_nlp(len, mass);	
	        nlp.score = bring_score(spectrum, nlp);
       	        if(nlp.score <= max_score & nlp.score >= min_score) {
			nlp_vec_vec[0][nlp.score].push_back(nlp);
			// cout << nlp.score << " " << nlp_vec_vec[0][nlp.score][nlp_vec_vec[0][nlp.score].size()-1].score << endl;
		}
	}
	
	vector<int> sz;
	vector<int> num_child;
  vector<int> generation_population;

	sz.resize(max_score+1);
	num_child.resize(max_score+1);
  generation_population.resize(max_score+1);

	for(int iter = 1; iter<num_rep; iter++) {
		cout << "iteration : " << iter << endl;
    generation_population[iter] = 0;
		// cout << "min score : " << min_score << " max score : " << max_score << endl;
		// for(int score = min_score; score < max_score; score++)
		//	sz[score] = nlp_vec_vec[score].size();
		nlp_vec_vec[iter].resize(max_score+1);
		for(int score = min_score; score <= max_score; score++) {
			nlp_vec_vec[iter][score].clear();
		  num_child[score] = 0;
		}
    while(generation_population[iter]<num_start) {
		  for(int score = min_score; score <= max_score; score++) {
			  // cout << "score : " << score << " size : " << nlp_vec_vec[score].size() <<  endl;
			  if(parent_death_flag[score] == false) {
				  for(int nlp_ind = 0; nlp_ind < nlp_vec_vec[iter-1][score].size(); nlp_ind++) {
					  //Non_Linear_Peptide parent_nlp;
					  //parent_nlp = nlp_vec_vec[score][nlp_ind];
	    		  Non_Linear_Peptide child_nlp;
			      mutate_peptide(nlp_vec_vec[iter-1][score][nlp_ind], child_nlp);
					  child_nlp.score = bring_score(spectrum, child_nlp);
					  // nlp_vec_vec[iter-1][score][nlp_ind].print();
					  // child_nlp.print();	
					  // cout << bring_score(spectrum, nlp_vec_vec[iter-1][score][nlp_ind]) << " " << bring_score(spectrum, child_nlp) << endl;
					  // cout << score << " " << child_nlp.score << endl;
					  if(child_nlp.score >= min_score && child_nlp.score <= max_score) {
						  trans[score][child_nlp.score]++;
						  total_trans[score]++;
						  if(child_death_flag[child_nlp.score] == false) {
						  	num_child[score]++;
						  	nlp_vec_vec[iter][child_nlp.score].push_back(child_nlp);
                generation_population[iter]++;
						  	// nlp_vec_vec[iter][child_nlp.score][nlp_vec_vec[iter][child_nlp.score].size()-1].score = child_nlp.score;
						  	// cout << child_nlp.score << " " << nlp_vec_vec[iter][child_nlp.score][nlp_vec_vec[iter][child_nlp.score].size()-1].score << endl;				
					  	}
				  	}
				  }
			  }
		  	
		  }	
    }
    cout << "generation population : " << generation_population[iter] << endl;
		
    for(int score = min_score; score <= max_score; score++) {
			// cout << "score : " << score << endl;
			if(child_death_flag[score] == false && total_trans[score] > child_limit) {
				child_death_flag[score] = true;
				cout << "child death : " << score << endl;
			}
			if(parent_death_flag[score] == false && child_death_flag[score] == true  && num_child[score] < child_death_ratio*nlp_vec_vec[iter-1][score].size()) {
				// parent_death_flag[score] = true;
				// cout << "parent death : " << score << endl;
			}
				
		}
	}


  	vector<int> total_trans_fixed;
        total_trans_fixed.resize(max_score+1);

        for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
	        total_trans_fixed[score_ind] = 0;
	        for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
 	        	if(total_trans[score_ind] > trans_limit && total_trans[score_ind_second] > trans_limit)
				total_trans_fixed[score_ind] += trans[score_ind][score_ind_second];
	        }
 
	        for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
	        	if(total_trans[score_ind] > trans_limit && total_trans[score_ind_second] > trans_limit)
			      	prob_trans[score_ind][score_ind_second] = float(trans[score_ind][score_ind_second])/total_trans_fixed[score_ind];
			else
      				prob_trans[score_ind][score_ind_second] = 0;
		        	// cout << prob_trans[score_ind][score_ind_second] << " ";
			        // cout << trans[score_ind][score_ind_second] << " ";
	        }
		// cout << endl;
	        // cout << total_trans[score_ind] << endl;
        	// cout << endl;
	  }
	  // cout << "max score : " << max_score << endl;
	  // cout << "min score : " << min_score << endl;
	  // cout << "len : " << prob_trans.size() << endl;
	  // cout << "width : " << prob_trans[0].size() << endl;
	  matrix_pow_pow(prob_trans, prob_trans_power, matrix_power);

	  int max_ind = 0;
	  float max_val = 0;

	  prob_trans_power_sum.resize(max_score+1);
	  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
	            prob_trans_power_sum[score_ind] = 0;
	            for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
		            prob_trans_power_sum[score_ind] += prob_trans_power[score_ind][score_ind_second];
		            // cout << prob_trans_power[score_ind][score_ind_second] << " ";
	            }
		    // cout << endl;
		    if(prob_trans_power_sum[score_ind]>max_val)
		    	    max_ind = score_ind;
  	  }
	  pv.resize(max_score+1);
	  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
		    pv[score_ind] = prob_trans_power[max_ind][score_ind];
	  }
}

/* void StatVec::est_pval(AnnotatedSpectrumCounter& spectrum) {
  pv_flag = true;
  int num_sequential_failed_child = 1000000;
  float scale_factor = 0.9995;
  int max_num_try_start = 100000;
  int max_num_try = 1000000;
  int num_start = 100000;
  int lim = 1000;
  int matrix_power = 10;
  vector<int> num_match;
  num_match.resize(max_score+1);
  vector<int> failed_child;
  vector<bool> score_flag_parent;
  vector<bool> score_flag_child;
  failed_child.resize(max_score+1);
  score_flag_child.resize(max_score+1);
  score_flag_parent.resize(max_score+1);
  bool flag = false;
  NLP_list nlp_vec;
  nlp_vec.clear();
  Non_Linear_Peptide nlp;
  int mass = int((spectrum.get_org_pm() - MASS_PROTON)*scale_factor);
  int nlp_ind = 0;
  int total_total_trans;
  vector<int> total_trans;
  vector<vector<int> > trans;
  vector<vector<float> > prob_trans_power;
  vector<float> prob_trans_power_sum;

  total_trans.resize(max_score+1);
  trans.resize(max_score+1);
  prob_trans.resize(max_score+1);
  int num_try_start = 0;

  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
	failed_child[score_ind] = 0;
  	total_trans[score_ind] = 0;
        num_match[score_ind] = 0;
  	score_flag_child[score_ind] = false; 
  	score_flag_parent[score_ind] = false; 
  }

  while(nlp_ind < num_start && num_try_start < max_num_try_start) {
	  nlp.generate_random_nlp(len, mass);
	  nlp.score = bring_score(spectrum, nlp);
	  if(nlp.score <= max_score & nlp.score >= min_score) {
		  nlp_vec.push_back(nlp);
	          num_match[nlp.score]++;
		  nlp_ind++;
	  }
	  num_try_start++;
  }

  if(num_try_start == max_num_try_start) {
    // cout << "Warning : maximum number of tries reached during initialization" << endl;
  }

  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
  	trans[score_ind].resize(max_score+1);
  	prob_trans[score_ind].resize(max_score+1);
        for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
       	    trans[score_ind][score_ind_second] = 0;
        }
  }

  int num_try = 0;
  while(!flag && num_try<max_num_try) {
    // cout << "nlp size : " << nlp_vec.size() << endl;
    int ind = floor(myRandom()*nlp_vec.size());
    if(score_flag_parent[nlp_vec[ind].score] == false) {
	    Non_Linear_Peptide nlp;
	    mutate_peptide(nlp_vec[ind], nlp);
	    nlp.score = bring_score(spectrum, nlp);
	    if(nlp.score>=min_score && nlp.score<=max_score) {
	      total_total_trans++;
	      total_trans[nlp_vec[ind].score]++;
	      trans[nlp_vec[ind].score][nlp.score]++;
	      num_match[nlp.score]++;
	      if(score_flag_child[nlp.score] == false) {
		      nlp_vec.push_back(nlp);
	      	      failed_child[nlp_vec[ind].score] = 0;
	      } else if(score_flag_child[nlp_vec[ind].score] == true) {
		      failed_child[nlp_vec[ind].score]++;
	      }
	      if(score_flag_child[nlp.score] == false && total_trans[nlp.score] >= lim) {
		      score_flag_child[nlp.score] = true;
		      cout << "child death : " << nlp.score << endl;
	      }
	    }
	    if(score_flag_child[nlp_vec[ind].score] == true && failed_child[nlp_vec[ind].score] >= num_sequential_failed_child) {
		score_flag_parent[nlp_vec[ind].score] = true;
		cout << "parent death : " << nlp_vec[ind].score << endl;
	    }
    }
    num_try++;
  }
 
  if(num_try == max_num_try) {
    // cout << "Warning : maximum number of tries reached" << endl;
  }
  
  vector<int> total_trans_fixed;
  total_trans_fixed.resize(max_score+1);

  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
    total_trans_fixed[score_ind] = 0;
    for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
      if(total_trans[score_ind] > 100 && total_trans[score_ind_second] > 100)
	total_trans_fixed[score_ind] += trans[score_ind][score_ind_second];
    }

    for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
      if(total_trans[score_ind] > 100 && total_trans[score_ind_second] > 100)
      	prob_trans[score_ind][score_ind_second] = float(trans[score_ind][score_ind_second])/total_trans_fixed[score_ind];
      else
      	prob_trans[score_ind][score_ind_second] = 0;
      // cout << prob_trans[score_ind][score_ind_second] << " ";
      // cout << trans[score_ind][score_ind_second] << " ";
    }
    // cout << endl;
    // cout << total_trans[score_ind] << endl;
    // cout << endl;
  }
  // cout << "max score : " << max_score << endl;
  // cout << "min score : " << min_score << endl;
  // cout << "len : " << prob_trans.size() << endl;
  // cout << "width : " << prob_trans[0].size() << endl;
  matrix_pow_pow(prob_trans, prob_trans_power, matrix_power);

  int max_ind = 0;
  float max_val = 0;

  prob_trans_power_sum.resize(max_score+1);
  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
    prob_trans_power_sum[score_ind] = 0;
    for(int score_ind_second = min_score; score_ind_second <= max_score; score_ind_second++) {
      prob_trans_power_sum[score_ind] += prob_trans_power[score_ind][score_ind_second];
      // cout << prob_trans_power[score_ind][score_ind_second] << " ";
    }
    // cout << endl;
    if(prob_trans_power_sum[score_ind]>max_val)
      max_ind = score_ind;
  }
  pv.resize(max_score+1);
  for(int score_ind = min_score; score_ind <= max_score; score_ind++) {
    pv[score_ind] = prob_trans_power[max_ind][score_ind];
  }
}*/

void PeptideCounterList::sort_pval_est() {
	for(int scan_ind = 0; scan_ind < top_hits.size(); scan_ind++) {
		top_hits[scan_ind].sort(ComparePeptideCounterByEstPval);	
		top_hits[scan_ind].max_est_pval = top_hits[scan_ind].front().pval_est;
	}	
	sort(top_hits.begin(), top_hits.end(), ComparePeptideSpectrumHitsByMaxEstPval);
}




void FastaDB::add_decoy() {
  decoy_flag = true;
  int db_len = all_aa_seqs.size();
  int num_prot = protein_length.size();
  protein_length.resize(2*num_prot);
  protein_start_location.resize(2*num_prot);
  protein_names.resize(2*num_prot);
  all_aa_seqs.resize(2*db_len);
  protein_decoy.resize(2*num_prot);
  for(int protein_idx = 0; protein_idx < num_prot; protein_idx++) {
    protein_decoy[protein_idx] = false;
    protein_decoy[protein_idx + num_prot] = true;
    protein_length[protein_idx + num_prot] = protein_length[protein_idx];
    protein_start_location[protein_idx + num_prot] = protein_start_location[protein_idx] + db_len;
    protein_names[protein_idx + num_prot] = protein_names[protein_idx] + "_decoy";
    for(int pos = 0; pos < protein_length[protein_idx]; pos = pos + 2) {
      if (pos == protein_length[protein_idx] - 1) {
        all_aa_seqs[protein_start_location[protein_idx + num_prot] + pos] = all_aa_seqs[protein_start_location[protein_idx] + pos];
      }
      else {
        all_aa_seqs[protein_start_location[protein_idx + num_prot] + pos] = all_aa_seqs[protein_start_location[protein_idx] + pos + 1];
        all_aa_seqs[protein_start_location[protein_idx + num_prot] + pos + 1] = all_aa_seqs[protein_start_location[protein_idx] + pos];
      }
    }
  }
}

void PeptideCounterList::unique() {
  for(int ind = 0; ind < top_hits.size(); ind++){
    top_hits[ind].set_config(config);
    top_hits[ind].unique();
  }
}

void PeptideSpectrumHits::unique() {
    list<PeptideCounter>::iterator it;
    PeptideCounter old_peptide, temp_pep;
    for(it = begin(); it != end(); it++) {
      minimize(config, *it);
    }
    sort(ComparePeptideCounterByAmino);
    vector<bool> mark_removal;
    mark_removal.resize(size());
    int ind = 0;
    for(it = begin(); it != end(); it++) {
      if(it != begin() && ComparePeptideCounterByAminoEq(*it, old_peptide)) {
        // cout << (*it).getPeptideStr() << " " << (*it).get_length() << " " << (*it).get_mass() << endl;
        // cout << old_peptide.getPeptideStr() << " " << (*it).get_length() << " " << (*it).get_mass() << endl;
        mark_removal[ind] = true;
      } else {
        mark_removal[ind] = false;
      }
      old_peptide = *it;
      ind++;
    }
    ind = 0;
    for(it = begin(); it != end(); ) {
      // cout << ind << " " << size() << " " << mark_removal[ind] << endl;
      if(mark_removal[ind]) {
        erase(it++);
      } else {
        it++;
      }
      ind++;
    }
    sort(ComparePeptideCounterByNormScore);
}



void minimize(Config* config, PeptideCounter& pep) {
  if(pep.scoring_method == 0)
    minimize_linear(config, pep);
  else
    minimize_cyclic(config, pep);
}

void minimize_linear(Config* config, PeptideCounter& pep) {
  PeptideCounter final_pep, rev_pep;
  final_pep = pep;
  reverse(config, pep, rev_pep);
  if(ComparePeptideCounterByAmino(rev_pep, final_pep))
    final_pep = rev_pep;
  pep = final_pep;
}

void minimize_cyclic(Config* config, PeptideCounter& pep) {
  PeptideCounter final_pep, temp_pep, rev_pep;
  final_pep = pep;
  for(int ind = 0; ind < pep.get_length(); ind++) {
    rotate(config, pep, temp_pep, ind);
    if(ComparePeptideCounterByAmino(temp_pep, final_pep))
      final_pep = temp_pep;
  }
  reverse(config, pep, rev_pep);
  for(int ind = 0; ind < pep.get_length(); ind++) {
    rotate(config, rev_pep, temp_pep, ind);
    if(ComparePeptideCounterByAmino(temp_pep, final_pep))
      final_pep = temp_pep;
  }
  pep = final_pep;
}

void reverse(Config* config, PeptideCounter& pep, PeptideCounter& rev_pep) {
  rev_pep = pep;
  vector<int> aas = pep.get_amino_acids();
  vector<int> rev_aas;
  for(int ind = aas.size() - 1; ind >= 0; ind--)
    rev_aas.push_back(aas[ind]);
  rev_pep.set_peptide_aas(config, rev_aas);
}

void rotate(Config* config, PeptideCounter& pep, PeptideCounter& rot_pep, int rot_ind) {
  rot_pep = pep;
  vector<int> aas = pep.get_amino_acids();
  vector<int> rot_aas;
  for(int ind = rot_ind; ind < aas.size(); ind++)
    rot_aas.push_back(aas[ind]);
  for(int ind = 0; ind < rot_ind; ind++)
    rot_aas.push_back(aas[ind]);
  rot_pep.set_peptide_aas(config, rot_aas);
}

/*void PeptideCounterList::normalize_score() {
  for(int ind = 0; ind < top_hits.size(); ind++){
    list<PeptideCounter>::iterator it;
    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      it->normalize_score();
    }
  }
}*/

/*void PeptideCounterList::normalize_score() {
  for(int ind = 0; ind < top_hits.size(); ind++){
    list<PeptideCounter>::iterator it;
    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      statistics.insert_psm(*it);
    }
  }
  statistics.finalize();
  for(int ind = 0; ind < top_hits.size(); ind++){
    list<PeptideCounter>::iterator it;
    for(it = top_hits[ind].begin(); it != top_hits[ind].end(); it++) {
      normalize_score(*it);
    }
  }
}*/

int FastaDB::searchFirstOccurence(PeptideAdd& pep) {
  return searchFirstOccurence(pep, 0, loc_vec.size());
}

int FastaDB::searchFirstOccurence(PeptideAdd& pep, int start, int end) {
  // cout << start << " " << end << " ";
  if(end <= start+1) {
    if(ComparePeptidePepLocBySeqEq(pep, loc_vec[start]))
      return start;
    else
      return -1;
  }
  int mid = (start+end)/2-1;

  PeptideAdd temp_pep;
  bool is_eq = ComparePeptidePepLocBySeqEq(pep, loc_vec[mid]);
  bool is_gr = ComparePeptidePepLocBySeq(pep, loc_vec[mid]);
  // cout << is_eq << " " << is_gr << endl;
  // loc_vec[mid].print();
  if(is_gr) {
    return searchFirstOccurence(pep, mid+1, end);
  } else {
    return searchFirstOccurence(pep, start, mid+1);
  }
}

int  FastaDB::searchLastOccurence(PeptideAdd& pep) {
  return searchLastOccurence(pep, 0, loc_vec.size());
}

int FastaDB::searchLastOccurence(PeptideAdd& pep, int start, int end) {
  // cout << start << " " << end << " ";
  if(end <= start+1) {
    if(ComparePeptidePepLocBySeqEq(pep, loc_vec[start]))
      return start;
    else
      return -1;
  }
  int mid = (start+end)/2;
  PeptideAdd temp_pep;
  bool is_eq = ComparePeptidePepLocBySeqEq(pep, loc_vec[mid]);
  bool is_gr = ComparePeptidePepLocBySeq(pep, loc_vec[mid]);
  // cout << is_eq << " " << is_gr << endl;
  // loc_vec[mid].print();
  if(!is_gr && !is_eq) {
    return searchLastOccurence(pep, start, mid);
  } else {
    return searchLastOccurence(pep, mid, end);
  }
}

void PeptideLocation::print() {
  PeptideAdd temp_pep;
  if(fdb->load_pep_loc(*this, temp_pep))
    cout << temp_pep.getPeptideStr() << " ";
  cout << aa_loc << endl;
}

void PepLocVector::print() {
  cout << "total of " << size() << " tags ..." << endl;
  for(int ind = 0; ind < size(); ind++) {
    (*this)[ind].print();
  }
}

bool FastaDB::load_pep_loc(PeptideLocation pep_loc, PeptideAdd& pep) const {
  vector<int> aas;
  aas.clear();
  for(int ind = pep_loc.aa_loc; ind < pep_loc.aa_loc + pep_loc.length; ind++) {
    aas.push_back(all_aa_seqs[ind]);
  }
  if(pep.check_if_valid(aas) == 1) {
    pep.set_peptide_aas(config, aas, 1);
    return true;
  } else {
    return false;
  }

}

bool ComparePepLocBySeq(const PeptideLocation& pl_1, const PeptideLocation& pl_2) {
  int sz = (pl_1.fdb)->all_aa_seqs.size();
  int loc_1 = pl_1.aa_loc;
  int loc_2 = pl_2.aa_loc;
  if(loc_1 == loc_2)
    return true;
  while(loc_1<sz && loc_2<sz) {
    if((pl_1.fdb)->all_aa_seqs[loc_1]>(pl_1.fdb)->all_aa_seqs[loc_2])
      return false;
    if((pl_1.fdb)->all_aa_seqs[loc_1]<(pl_1.fdb)->all_aa_seqs[loc_2])
      return true;
    loc_1++;
    loc_2++;
  }
  if(loc_1 == sz) {
    return true;}
  else {
    return false;}
}

bool ComparePeptidePepLocBySeq(const PeptideAdd& pep, const PeptideLocation& pl) {
  int sz = (pl.fdb)->all_aa_seqs.size();
  int loc = pl.aa_loc;
  int pep_sz = pep.get_length();
  int ind = 0;
  while(loc<sz && ind < pep_sz) {
    if((pl.fdb)->all_aa_seqs[loc]>pep.get_amino_acids()[ind])
      return false;
    if((pl.fdb)->all_aa_seqs[loc]<pep.get_amino_acids()[ind])
      return true;
    loc++;
    ind++;
  }
  if(ind == pep_sz)
    return false;
  else
    return true;
}


bool ComparePeptidePepLocBySeqEq(const PeptideAdd& pep, const PeptideLocation& pl) {
  int sz = (pl.fdb)->all_aa_seqs.size();
  int pep_sz = pep.get_length();
  int loc = pl.aa_loc;
  int ind = 0;
  while(loc<sz && ind < pep_sz) {
    if((pl.fdb)->all_aa_seqs[loc]>pep.get_amino_acids()[ind])
      return false;
    if((pl.fdb)->all_aa_seqs[loc]<pep.get_amino_acids()[ind])
      return false;
    ind++;
    loc++;
  }
  if(ind == pep_sz)
    return true;
  else
    return false;
}


void FastaDB::update_pep_loc_vector() {
  cout << "updating tags ... ";
  clock_t start, end;
  start = clock();
  loc_vec.clear();
  PeptideLocation temp_pep_loc;
  for(int protein_idx = 0; protein_idx < protein_length.size(); protein_idx++) {
    for(int position = 0; position < protein_length[protein_idx]; position++) {
      temp_pep_loc.protein_idx = protein_idx;
      temp_pep_loc.loc_in_protein = position;
      temp_pep_loc.aa_loc = protein_start_location[protein_idx] + position;
      temp_pep_loc.fdb = this;
      temp_pep_loc.length = default_tag_len;
      loc_vec.push_back(temp_pep_loc);
    }
  }
  sort(loc_vec.begin(), loc_vec.end(), ComparePepLocBySeq);
  end = clock();
  cout << float(end - start)/CLOCKS_PER_SEC << " seconds elapsed." << endl;

}

void PeptideCounterList::score(vector<AnnotatedSpectrumCounter>& spectrum_vector, vector<float>acc_thresh, Offset offset, int scoring_method, int score_type) {
  for(int ind = 0; ind < top_hits.size(); ind++) {
    top_hits[ind].score(spectrum_vector[top_hits[ind].spectrum_ind], acc_thresh, offset, scoring_method, score_type);
  }
}

void PeptideSpectrumHits::score(AnnotatedSpectrumCounter& spectrum, vector<float>acc_thresh, Offset offset, int scoring_method, int score_type) {

  list<PeptideCounter>::iterator it;
  for(it = begin(); it != end(); it++) {
    it->score = peptide_spectrum_score(spectrum, *it, spectrum.getCharge(), acc_thresh, offset, scoring_method, score_type);
  }
}


bool FastaDB::find_mass_match(vector<AnnotatedSpectrumCounter>& spectrum_vector, PeptideLocation& pl_1, PeptideLocation& pl_2, float in_mass, float thresh, PeptideSpectrumHits& psh, int spectrum_ind, int scr_ind) {
  bool flag = false;
	const vector<mass_t>& aa2mass = config->get_aa2mass();
  psh.set_spectrum_ind(spectrum_ind);
  if(true) {
    PeptideCounter temp_pep;
    // psh.clear();
    float mass = 0;
    for(int ind = pl_1.aa_loc; ind < pl_1.length + pl_1.aa_loc; ind++) {
      mass += aa2mass[all_aa_seqs[ind]];
    }
    for(int ind = pl_2.aa_loc; ind < pl_2.length + pl_2.aa_loc; ind++) {
      mass += aa2mass[all_aa_seqs[ind]];
    }
    pl_1.length = 0;
    pl_2.length = 0;
    int len_1 = pl_1.length;
    int len_2 = pl_2.length;
    float mass_1 = mass;
    float mass_2;
    while(mass_1 < in_mass + thresh) {
      mass_2 = mass_1;
      while(mass_2 < in_mass - thresh && pl_2.aa_loc + len_2 < all_aa_seqs.size() && all_aa_seqs[pl_2.aa_loc + len_2] <= Val && all_aa_seqs[pl_2.aa_loc + len_2] >= Ala) {
        mass_2 += aa2mass[all_aa_seqs[pl_2.aa_loc + len_2]];
        len_2++;
      }
      if(mass_2 < in_mass + thresh) {
        flag = true;
        vector<int> aas;
        aas.clear();
        for(int ind = 0; ind < len_1; ind++)
          aas.push_back(all_aa_seqs[pl_1.aa_loc + ind]);
        for(int ind = 0; ind < len_2; ind++)
          aas.push_back(all_aa_seqs[pl_2.aa_loc + ind]);
        if(temp_pep.check_if_valid(aas) == 1) {
          // temp_pep.scan_index = spectrum_ind;
          temp_pep.scoring_method_index = scr_ind;
          temp_pep.scoring_method = scr_ind;
          // cout << scr_ind << endl;
          // temp_pep.set_scan_list_index(spectrum_ind);
          temp_pep.protein_idx_1 = pl_1.protein_idx;
          temp_pep.length_1 = len_1;
          temp_pep.position_1 = pl_1.loc_in_protein;
          temp_pep.protein_idx_2 = pl_2.protein_idx;
          temp_pep.length_2 = len_2;
          temp_pep.position_2 = pl_2.loc_in_protein;
          temp_pep.set_peptide_aas(config, aas, 1);
          //cout << "orig mass : " << temp_pep.get_mass() << endl;
          //cout << "orig length : " << temp_pep.get_length() << endl;
          //cout << "orig str : " << temp_pep.getPeptideStr() << endl;
          // cout << temp_pep.get_mass() << endl;
      		float me = temp_pep.get_mass() - spectrum_vector[spectrum_ind].get_org_pm();
		      temp_pep.set_mass_error(me);
          psh.push_back(temp_pep);
        }
      }
      len_2 = pl_2.length;
      mass_1 += aa2mass[all_aa_seqs[pl_1.aa_loc + len_1]];
      len_1++;
    }
  }
  return flag;
}



void reverse_peptide(Peptide& pep, Peptide& pep_rev) {
  pep_rev = pep;
  vector<int> pep_rev_seq;
  pep_rev_seq.clear();
  for(int ind = pep.get_length()-1; ind>=0; ind--)
    pep_rev_seq.push_back(pep.get_amino_acids()[ind]);
  pep.set_peptide_aas(pep_rev_seq);
}

bool FastaDB::match_pep(int pi, int pos, Peptide& pep, bool direction) {
  int pos_ind = protein_start_location[pi] + pos;
  if(direction == true) {
    for(int ind = 0; ind < pep.get_length(); ind++) {
      if(pep.get_amino_acids()[ind] != all_aa_seqs[ind+pos_ind])
        return false;
    }
    return true;
  }
  if(direction == false) {
    Peptide pep_rev;
    reverse_peptide(pep, pep_rev);
    return match_pep(pi, pos, pep_rev, true);
  }
}


void SearchResults::calculate_pval_norm(vector<AnnotatedSpectrumCounter>& spectrum_vector, PeptideCounterList& pep_list, vector<float> acc_thresh, Offset offset, int score_type) {
  PvalModel pval_model;
  pval_model.set_config(config);
  if(enforce_cys_flag)
    pval_model.enforce_cys();
  if(enforce_mass_flag)
    pval_model.enforce_mass(syn_thresh);
  pval_model.set_num_peptide(syn_num_peptide);
  PeptideCounter pep;

  // int loc_num_spectra = num_spectra;
  // int loc_num_top_hits;
  // if((max_num_spectra>0) && (loc_num_spectra>max_num_spectra))
  // loc_num_spectra = max_num_spectra;
  // cout << pep_list.top_hits.size() << endl;

  
	for(int spec_ind = 0; spec_ind < pep_list.top_hits.size(); spec_ind++) {

    if(sort_score_flag) {
      // sort(pep_list.top_hits[spec_ind].begin(), pep_list.top_hits[spec_ind].end(), ComparePeptideCounterByNormScore);
      pep_list.top_hits[spec_ind].sort(ComparePeptideCounterByScore);
    }
    if(pep_list.top_hits[spec_ind].size() > selected_top_num)
      pep_list.top_hits[spec_ind].resize(selected_top_num);
    list<PeptideCounter>::iterator it;
		int ind = 0;
    for(it = pep_list.top_hits[spec_ind].begin(); it != pep_list.top_hits[spec_ind].end(); it++) {
      // cout << ind << endl;
      // cout << syn_num_peptide << endl;
     	// pep = *it;
      // cout << "aa size : " << pep.get_amino_acids().size() << endl;
      // cout << "length : " << pep.get_length() << endl;
      pval_model.set_pval_norm(spectrum_vector[pep_list.top_hits[spec_ind].spectrum_ind], *it, spectrum_vector[pep_list.top_hits[spec_ind].spectrum_ind].getCharge(), acc_thresh, offset, it->scoring_method, score_type, pval_flag, norm_flag);
      // *it = pep;
      // cout << "original pval : " << pep.pval << endl;
      ind++;
    }
    if(sort_norm_flag) {
      // sort(pep_list.top_hits[spec_ind].begin(), pep_list.top_hits[spec_ind].end(), ComparePeptideCounterByNormScore);
      pep_list.top_hits[spec_ind].sort(ComparePeptideCounterByNormScore);
    }
    if(sort_pval_flag) {
      // sort(pep_list.top_hits[spec_ind].begin(), pep_list.top_hits[spec_ind].end(), ComparePeptideCounterByPval);
      pep_list.top_hits[spec_ind].sort(ComparePeptideCounterByPval);
    }

  }
}


void SearchResults::print_html(string out_file_string) {
  // cout << "search res decoy flag : " << decoy_flag << endl; 
  ofstream out_file;
  out_file.open(out_file_string.c_str());

  out_file << "<html>" << endl;
  out_file << "<head><TITLE>Database Search Results</TITLE></head>" << endl;
  out_file << "<body>" << endl;
  out_file << "<h2> Database Search Results </h2>" << endl;
  out_file << endl;
  // cout << "num spectra : " << num_spectra << endl;


  int loc_num_spectra = num_spectra;
  int loc_num_top_hits;
  if((max_num_spectra>0) && (loc_num_spectra>max_num_spectra))
      loc_num_spectra = max_num_spectra;
    

  for(int ind = 0; ind<loc_num_spectra; ind++) {
    out_file << "<table border=\"1\">" << endl;
    out_file << "<tr><td> scan index <td> spectrum mass <td> charge </td></tr>" << endl;
    out_file << "<tr><td>" << spectrum_index[ind];
    out_file << " <td> " << spectrum_mass[ind];
    out_file << " <td> " << spectrum_charge[ind];
    out_file << "</td></tr>" << endl;
    out_file << "</table>" << endl;
    out_file << "<table border=\"1\">" << endl;
    if(!concat_flag) {
    out_file << "<tr><td> rank <td> peptide sequence <td> structure <td> mass error <td> length <td> protein name <td> location <td> score <td> norm score";
    } else {
    out_file << "<tr><td> rank <td> peptide sequence <td> structure <td> mass error <td> total length <td> length <td> protein name <td> location <td> length <td> protein name <td> location <td> score <td> norm score";
    }

    if(decoy_flag) {
      out_file << "<td> decoy/db " << endl;
    }

    if(pepnovo_flag) {
      out_file << "<td> pepnovo score " << endl;
    }

    if(pval_flag) {
      out_file << "<td> p-value " << endl;
    }

    if(pval_est_flag) {
      out_file << "<td> estimated p-value " << endl;
    }

    out_file << "</td></tr>" << endl;

    loc_num_top_hits = num_hits[ind];
    if((max_num_top_hits>0) && (loc_num_top_hits>max_num_top_hits))
      loc_num_top_hits = max_num_top_hits;
    for(int top_ind = 0; top_ind<loc_num_top_hits; top_ind++) {
      out_file << "<tr><td> " << top_ind;
      out_file << " <td> " << all_peptide_str[ind][top_ind];
      if(all_structs[ind][top_ind] == 0)      
        out_file << " <td> linear";
      else
        out_file << " <td> cyclic";
      out_file << " <td> " << mass_error[ind][top_ind];
      if(concat_flag) {
        // cout << "second protein index : " << all_protein_indices_second[ind][top_ind] << endl;

        out_file << " <td> " << all_protein_lengths[ind][top_ind] + all_protein_lengths_second[ind][top_ind];
        out_file << " <td> " << all_protein_lengths[ind][top_ind];
        out_file << " <td> " << protein_names[all_protein_indices[ind][top_ind]];
        out_file << " <td> " << all_protein_positions[ind][top_ind];
        out_file << " <td> " << all_protein_lengths_second[ind][top_ind];
        out_file << " <td> " << protein_names[all_protein_indices_second[ind][top_ind]];
        out_file << " <td> " << all_protein_positions_second[ind][top_ind];
      } else {
        out_file << " <td> " << all_protein_lengths[ind][top_ind];
        out_file << " <td> " << protein_names[all_protein_indices[ind][top_ind]];
        out_file << " <td> " << all_protein_positions[ind][top_ind];
      }
      out_file << " <td> " << score[ind][top_ind];
      out_file << " <td> " << norm_score[ind][top_ind];
      if(decoy_flag) {
        if(is_decoy[ind][top_ind])
          out_file << " <td> decoy ";
        else
          out_file << " <td> target ";
      }
      if(pepnovo_flag) {
        out_file << " <td> " << pepnovo_score[ind][top_ind];
      }
      if(pval_flag) {
        out_file << " <td> " << pval_vector[ind][top_ind];
      }
      if(pval_est_flag) {
        out_file << " <td> " << pval_est_vector[ind][top_ind];
      }
      out_file << "</td></tr>" << endl;
    }
    out_file << "</table>" << endl;
  }
  out_file << "</body>" << endl;
  out_file << "</html>" << endl;

}
  

void AnnotatedSpectrumCounterList::add_native_information(vector<int> scan_set, vector<int> scan_set_native, AnnotatedSpectrumCounterList& spectrum_vector_native, Config* config_native) {
	// cout << "scan set size : " << scan_set.size() << endl;
	for(int ind = 0; ind < scan_set.size(); ind++) {
	    int red_index = scan_set[ind];
	    int native_index = scan_set_native[ind];
	    // cout << " ind : " << ind << " red index : " << red_index << " native index : " << native_index << endl << flush;
	    (*this)[red_index].index = red_index;
	    (*this)[red_index].aux_spec_list.push_back(&spectrum_vector_native[native_index]);
	    (*this)[red_index].config_list.push_back(config_native);
	    (*this)[red_index].aux_index.push_back(native_index);
	}
}

void FastaDB::create_test_fasta_file(char *temp_fasta_file, Config *conf, int len, char *pep_str, bool enforce_cys) {
	
}

void SearchResults::parse_from_file(string res_file_string){
    ifstream in_file;
    in_file.open(res_file_string.c_str());
	  in_file >> num_spectra;
    num_hits.resize(num_spectra);
    spectrum_index.resize(num_spectra);
    spectrum_mass.resize(num_spectra);
    spectrum_charge.resize(num_spectra);
    spectrum_peak_num.resize(num_spectra);
    if(native_information_flag) {
	    spectrum_index_native.resize(num_spectra);
	    spectrum_mass_native.resize(num_spectra);
	    spectrum_charge_native.resize(num_spectra);
	    spectrum_peak_num_native.resize(num_spectra);
	    total_shared_peaks.resize(num_spectra);
	    zero_shared_peaks.resize(num_spectra);
	    delta_shared_peaks.resize(num_spectra);
    }
    score.resize(num_spectra);
    norm_score.resize(num_spectra);
    mass_error.resize(num_spectra);
    all_peptide_str.resize(num_spectra);
    all_structs.resize(num_spectra);
    all_protein_indices.resize(num_spectra);
    all_protein_lengths.resize(num_spectra);
    all_protein_positions.resize(num_spectra);
  	for(int sp_index = 0; sp_index<num_spectra; sp_index++) {	
        in_file >> spectrum_index[sp_index] >> spectrum_mass[sp_index] >> spectrum_charge[sp_index] >> spectrum_peak_num[sp_index] >> num_hits[sp_index];
        if(native_information_flag) {
		in_file >> spectrum_index_native[sp_index] >> spectrum_mass_native[sp_index] >> spectrum_charge_native[sp_index] >> spectrum_peak_num_native[sp_index] >> total_shared_peaks[sp_index] >> zero_shared_peaks[sp_index] >> delta_shared_peaks[sp_index];
	}
        score[sp_index].resize(num_hits[sp_index]);
        norm_score[sp_index].resize(num_hits[sp_index]);
        mass_error[sp_index].resize(num_hits[sp_index]);
        all_peptide_str[sp_index].resize(num_hits[sp_index]);
        all_structs[sp_index].resize(num_hits[sp_index]);
        all_protein_indices[sp_index].resize(num_hits[sp_index]);
        all_protein_lengths[sp_index].resize(num_hits[sp_index]);
        all_protein_positions[sp_index].resize(num_hits[sp_index]);

        for(int ind = 0; ind<num_hits[sp_index]; ind++) {

            in_file >> all_peptide_str[sp_index][ind] >> all_structs[sp_index][ind] >> score[sp_index][ind] >> norm_score[sp_index][ind] >> mass_error[sp_index][ind] >> all_protein_indices[sp_index][ind] >> all_protein_positions[sp_index][ind] >>  all_protein_lengths[sp_index][ind];
		    }
  	}
    in_file.close();
}

void SearchResults::print_xls(string out_file_string) {
    
    ofstream out_file;
    out_file.open(out_file_string.c_str());
    string struct_t;
    int loc_num_spectra = num_spectra;
    int loc_num_top_hits;
    if((max_num_spectra>0) && (loc_num_spectra>max_num_spectra))
        loc_num_spectra = max_num_spectra;
  	for(int sp_index = 0; sp_index<loc_num_spectra; sp_index++) {	
        loc_num_top_hits = num_hits[sp_index];
        if((max_num_top_hits>0) && (loc_num_top_hits>max_num_top_hits))
            loc_num_top_hits = max_num_top_hits;
        if(loc_num_top_hits>0) {
            if(all_structs[sp_index][0] == 0) {
                struct_t = "linear"; }
            else {
            struct_t = "cyclic"; }
            out_file << spectrum_index[sp_index] << " " << spectrum_mass[sp_index] << " " << spectrum_charge[sp_index] <<  " " << all_peptide_str[sp_index][0] << " " << struct_t << " " << norm_score[sp_index][0] << " " << mass_error[sp_index][0] << " " << all_protein_lengths[sp_index][0] << endl;
            }
  	}
    out_file.close();
}

void SearchResults::print_data(string res_file_string) {

    ofstream res_file;
    res_file.open(res_file_string.c_str());
	  res_file << num_spectra << endl;
  	for(int sp_index = 0; sp_index<num_spectra; sp_index++) {	
            res_file << spectrum_index[sp_index] << " " << spectrum_mass[sp_index] << " " << spectrum_charge[sp_index] << " " << spectrum_peak_num[sp_index] << " " << num_hits[sp_index] << endl;
	    if(native_information_flag) {
		res_file << spectrum_index_native[sp_index] << " " <<  spectrum_mass_native[sp_index] << " " <<  spectrum_charge_native[sp_index] << " " <<  spectrum_peak_num_native[sp_index] << " " <<  total_shared_peaks[sp_index] << " " <<  zero_shared_peaks[sp_index] << " " <<  delta_shared_peaks[sp_index] << endl;
		    }
        for(int ind = 0; ind<num_hits[sp_index]; ind++) {
	    // cout << num_spectra << " " << sp_index << " " << ind << " " << num_hits[sp_index] << " " << all_peptide_str.size() << endl;
	    // cout << all_peptide_str[sp_index].size() << endl;
            res_file << all_peptide_str[sp_index][ind] << " " << all_structs[sp_index][ind] << " " << score[sp_index][ind] << " " << norm_score[sp_index][ind] << " " << mass_error[sp_index][ind] << " " << all_protein_indices[sp_index][ind] << " " << all_protein_positions[sp_index][ind] <<  " " <<  all_protein_lengths[sp_index][ind] << endl;
		    }
  	}
    res_file.close();
}
	

void SearchResults::print(string out_file_string) {
    
    ofstream out_file;
    out_file.open(out_file_string.c_str());
    string struct_t;
    int loc_num_spectra = num_spectra;
    int loc_num_top_hits;
    if((max_num_spectra>0) && (loc_num_spectra>max_num_spectra))
        loc_num_spectra = max_num_spectra;
    out_file << "number of spectra : " << loc_num_spectra << endl;
    for(int sp_index = 0; sp_index<loc_num_spectra; sp_index++) {	
        loc_num_top_hits = num_hits[sp_index];
        if((max_num_top_hits>0) && (loc_num_top_hits>max_num_top_hits))
            loc_num_top_hits = max_num_top_hits;
        if(loc_num_top_hits>0) {
            out_file << "scan number : " << spectrum_index[sp_index] << ", mass : " << spectrum_mass[sp_index] << ", charge : " << spectrum_charge[sp_index] << ", number of peaks : " << spectrum_peak_num[sp_index] << ", number of hits : " << loc_num_top_hits << endl;
            if(native_information_flag) {
                out_file << "scan number native : " << spectrum_index_native[sp_index] << ", mass : " <<  spectrum_mass_native[sp_index] << ", charge : " <<  spectrum_charge_native[sp_index] << ", number of peaks : " <<  spectrum_peak_num_native[sp_index] << ", shared peaks : " <<  total_shared_peaks[sp_index] << ", shared 0-peaks :" <<  zero_shared_peaks[sp_index] << ", shared delta-peaks : " <<  delta_shared_peaks[sp_index] << endl;
	    }
        }
        for(int ind = 0; ind<loc_num_top_hits; ind++) {
            if(all_structs[sp_index][ind] == 0) {
                struct_t = "linear"; 
	    }
            else {
                struct_t = "cyclic"; 
            }
            out_file << "rank : " << ind << ", Peptide : " << all_peptide_str[sp_index][ind] << ", struct : " << struct_t << ", score : " << score[sp_index][ind] << ", norm score : " << norm_score[sp_index][ind] << ", mass error : " << mass_error[sp_index][ind] << ", prot : " << all_protein_indices[sp_index][ind] << ", loc in prot : " << all_protein_positions[sp_index][ind] <<  ", len : " <<  all_protein_lengths[sp_index][ind] << endl;
  	}
    }
    out_file.close();
}

void SearchResults::parse_from_pep_list(PeptideCounterList& pep_list, AnnotatedSpectrumCounterList& spectrum_vector) {
    decoy_flag = pep_list.decoy_flag;
    pepnovo_flag = pep_list.pepnovo_flag;
    pval_est_flag = pep_list.pval_est_flag;
    // cout << " pval est flag : " << pval_est_flag << endl;
    // num_spectra = spectrum_vector.size();
    set_scan_list(pep_list.scan_list);
    num_spectra = scan_list.size();
    num_hits.resize(num_spectra);
    spectrum_index.resize(num_spectra);
    spectrum_mass.resize(num_spectra);
    spectrum_peak_num.resize(num_spectra);
    spectrum_charge.resize(num_spectra);
    score.resize(num_spectra);
    norm_score.resize(num_spectra);
    mass_error.resize(num_spectra);
    all_peptide_str.resize(num_spectra);
    all_structs.resize(num_spectra);
    all_protein_indices.resize(num_spectra);
    all_protein_lengths.resize(num_spectra);
    all_protein_positions.resize(num_spectra);
    PeptideCounter pep;
    if(decoy_flag) {
      is_decoy.resize(num_spectra);
    }
    if(concat_flag) { 
      all_protein_indices_second.resize(num_spectra);
      all_protein_lengths_second.resize(num_spectra);
      all_protein_positions_second.resize(num_spectra);
    }
    if(native_information_flag) {
	    spectrum_index_native.resize(num_spectra);
	    spectrum_mass_native.resize(num_spectra);
	    spectrum_charge_native.resize(num_spectra);
	    spectrum_peak_num_native.resize(num_spectra);
	    total_shared_peaks.resize(num_spectra);
	    zero_shared_peaks.resize(num_spectra);
	    delta_shared_peaks.resize(num_spectra);
    }
    if(pval_flag) {
      pval_vector.resize(num_spectra);
    }
    if(pval_est_flag) {
      pval_est_vector.resize(num_spectra);
    }

    if(pepnovo_flag) {
      pepnovo_score.resize(num_spectra);
    }

    for(int sp_index = 0; sp_index<num_spectra; sp_index++) {	
        num_hits[sp_index] = pep_list.top_hits[sp_index].size();
        score[sp_index].resize(pep_list.top_hits[sp_index].size());
        norm_score[sp_index].resize(pep_list.top_hits[sp_index].size());
        mass_error[sp_index].resize(pep_list.top_hits[sp_index].size());
        all_peptide_str[sp_index].resize(pep_list.top_hits[sp_index].size());
        all_structs[sp_index].resize(pep_list.top_hits[sp_index].size());
        all_protein_indices[sp_index].resize(pep_list.top_hits[sp_index].size());
        all_protein_lengths[sp_index].resize(pep_list.top_hits[sp_index].size());
        all_protein_positions[sp_index].resize(pep_list.top_hits[sp_index].size());
      	spectrum_index[sp_index] = pep_list.top_hits[sp_index].spectrum_ind;
      	spectrum_mass[sp_index] = spectrum_vector[spectrum_index[sp_index]].get_org_pm();
      	spectrum_charge[sp_index] = spectrum_vector[spectrum_index[sp_index]].getCharge();
      	spectrum_peak_num[sp_index] = spectrum_vector[spectrum_index[sp_index]].getNumPeaks();
        if(pval_flag) {
          pval_vector[sp_index].resize(pep_list.top_hits[sp_index].size());
        }
        if(pval_est_flag) {
          pval_est_vector[sp_index].resize(pep_list.top_hits[sp_index].size());
        }
        if(pepnovo_flag) {
          pepnovo_score[sp_index].resize(pep_list.top_hits[sp_index].size());
        }
        if(concat_flag) {
          all_protein_indices_second[sp_index].resize(pep_list.top_hits[sp_index].size());
          all_protein_lengths_second[sp_index].resize(pep_list.top_hits[sp_index].size());
          all_protein_positions_second[sp_index].resize(pep_list.top_hits[sp_index].size());
        }
        if(decoy_flag) {
          is_decoy[sp_index].resize(pep_list.top_hits[sp_index].size());
        }
	if(native_information_flag) {
	    // cout << spectrum_vector[spectrum_index[sp_index]].aux_index.size() << " " << sp_index << " " << spectrum_index[sp_index] << endl << flush;
	    spectrum_index_native[sp_index] = spectrum_vector[spectrum_index[sp_index]].aux_index[0];
	    spectrum_mass_native[sp_index] = spectrum_vector[spectrum_index[sp_index]].aux_spec_list[0]->get_org_pm();
	    spectrum_charge_native[sp_index] = spectrum_vector[spectrum_index[sp_index]].aux_spec_list[0]->getCharge();
	    spectrum_peak_num_native[sp_index] =spectrum_vector[spectrum_index[sp_index]].aux_spec_list[0]->getNumPeaks();
	    int spec_pair_ind = spec_pair_list_list.pair_ind[spectrum_index[sp_index]][num_cys];
	    // cout << sp_index << " " << spectrum_index[sp_index] << " " << spec_pair_ind << " " << num_cys << endl;
	    total_shared_peaks[sp_index] = spec_pair_list_list[num_cys][spec_pair_ind].total_share_peaks;
	    zero_shared_peaks[sp_index] = spec_pair_list_list[num_cys][spec_pair_ind].share_peaks[0];
	    delta_shared_peaks[sp_index] = spec_pair_list_list[num_cys][spec_pair_ind].share_peaks[1];
	}
    		list<PeptideCounter>::iterator it;
		    int ind = 0;
    		for(it = pep_list.top_hits[sp_index].begin(); it != pep_list.top_hits[sp_index].end(); it++) {
		      	pep = *it;
      			score[sp_index][ind] = pep.score;
      			norm_score[sp_index][ind] = pep.norm_score;
      			mass_error[sp_index][ind] = pep.mass_error;
	       	        all_peptide_str[sp_index][ind] = pep.getPeptideStr();     
                        all_structs[sp_index][ind] = pep_list.scoring_methods[pep.scoring_method_index];
                        if(!concat_flag) {
                          all_protein_indices[sp_index][ind] = pep.protein_idx;
                          all_protein_positions[sp_index][ind] = pep.position;
                          all_protein_lengths[sp_index][ind] = pep.length;
                        } else {
                          all_protein_indices[sp_index][ind] = pep.protein_idx_1;
                          all_protein_positions[sp_index][ind] = pep.position_1;
                          all_protein_lengths[sp_index][ind] = pep.length_1;
                          all_protein_indices_second[sp_index][ind] = pep.protein_idx_2;
                          all_protein_positions_second[sp_index][ind] = pep.position_2;
                          all_protein_lengths_second[sp_index][ind] = pep.length_2;
                        }
                if(decoy_flag) {
                  is_decoy[sp_index][ind] = pep.is_decoy;
                }
                if(pepnovo_flag) {
                  pepnovo_score[sp_index][ind] = pep.pepnovo_score;
                }
                if(pval_flag) {
                  pval_vector[sp_index][ind] = pep.pval;
                  // cout << "pep pval : " << pep.pval << endl;
                }
                if(pval_est_flag) {
                  pval_est_vector[sp_index][ind] = pep.pval_est;
                }
    		        ind++;
	        }
    }
}

void PeptideCounterList::add_fixed_mod(int pos, float mass_offset, bool is_forward) {
	ModifType mod;
	mod.pos = pos;
	mod.mass_offset = mass_offset;
	mod.direction = is_forward;
	mod_list.push_back(mod);
}

void PeptideCounter::apply_mod(ModifType mod) {
	if(mod.direction == true) {
		this->add_offset(mod.pos, mod.mass_offset);
	} else {
		this->add_offset(get_length()-mod.pos, mod.mass_offset);
	}
}

float my_mean(vector<int> v) {
	float sum = 0;
	for(int ind = 0; ind<v.size(); ind++)
		sum += v[ind];
	return sum/v.size();
}

float my_std(vector<int> v) {
	float sum = 0;
	float mn = my_mean(v);
	for(int ind = 0; ind<v.size(); ind++)
		sum += pow(v[ind]-mn, 2);
	return sqrt(sum/v.size());
}

void PeptideCounterList::score_pepnovo(vector<AnnotatedSpectrumCounter>& spectrum_vector, string log_file_string) {
  ofstream log_file;
  log_file.open(log_file_string.c_str(), fstream::in | fstream::out | fstream::app);
	PeptideRankScorer *db_score = (PeptideRankScorer *)model.get_rank_model_ptr(0);
	// PeptideRankScorer db_score;
  // cout << "model_type : " << db_score->get_model_type() << endl;
  // db_score->set_model_type(0);
	vector <PeptideSolution> sol_vec;
	PeptideSolution sol;
	int spectrum_index;
	int counter = 0;
	int num_pep = 0;
	int next_ind = 0;
	int next_val = 0;
	for(int sp_ind = 0; sp_ind< num_spectra; sp_ind++)
		num_pep += top_hits[sp_ind].size();
	for(int sp_ind = 0; sp_ind< num_spectra; sp_ind++) {
		spectrum_index = top_hits[sp_ind].spectrum_ind;
		sol_vec.clear();
		Peptide correct_peptide = spectrum_vector[spectrum_index].getPeptide();
		list<PeptideCounter>::iterator it;
		for(it = top_hits[sp_ind].begin(); it != top_hits[sp_ind].end(); it++) {
			if(print_flag && (counter > next_val)) {
				cout << next_ind << "% ... " << flush;
        log_file << next_ind << "% ... " << flush;
				next_ind++;
				next_val = next_ind*num_pep/100;
			}
	    sol.pep = *it;
      sol.pep.parseFromString(config, sol.pep.getPeptideStr());
		  sol.charge = spectrum_vector[spectrum_index].getCharge();
			sol.pm_with_19 = spectrum_vector[spectrum_index].get_org_pm();
			sol.reaches_n_terminal = true;
			sol.reaches_c_terminal = true;
			sol_vec.clear();
			sol_vec.push_back(sol);
			vector<score_pair> scores;
      // spectrum_vector[spectrum_index].getHeader()->setMOverZ(100);
      // cout << "counter : " << counter << endl;
			db_score->scoreCompleteSequences(sol_vec, spectrum_vector[spectrum_index], scores);
			int top_ind = 0;
			it->pepnovo_score = scores[0].score;
	        	top_ind++;
			counter++;
		}
		spectrum_vector[spectrum_index].set_peptide(correct_peptide);
	}
	if(print_flag) {
		cout << endl;
    log_file << endl;
  }
  log_file.close();
}

ostream& operator << (ostream& os, const PeptideCounter& pep)
{
	os << pep.protein_idx << " " << pep.position << " " << pep.score << " " << pep.spectrum_ind << " " << pep.length << " " << pep.scoring_method_index << " " <<  pep.scan_index << " " << pep.is_decoy;
	return os;
}

istream& operator >> (istream& is, PeptideCounter& pep)
{
	is >> pep.protein_idx >> pep.position >> pep.score >> pep.spectrum_ind >> pep.length >> pep.scoring_method_index >> pep.scan_index >> pep.is_decoy;
	return is;
}

bool ComparePeptideCounterByMass(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.get_mass() < pep_2.get_mass();
}
bool ComparePeptideCounterByLength(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.get_length() < pep_2.get_length();
}

bool ComparePeptideCounterByEstPval(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.pval_est < pep_2.pval_est;
}

bool ComparePeptideCounterByNormScore(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.norm_score > pep_2.norm_score;
}

bool ComparePeptideCounterByScore(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.score > pep_2.score;
}

bool ComparePeptideCounterByPval(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.pval < pep_2.pval;
}

bool ComparePeptideCounterByPepNovoScore(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.pepnovo_score > pep_2.pepnovo_score;
}

bool ComparePeptideCounterByAmino(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	
	for(int ind = 0; ind<pep_1.get_length() && ind<pep_2.get_length(); ind++) {
		if(pep_1.get_amino_acids()[ind] < pep_2.get_amino_acids()[ind])
			return true;
		if(pep_1.get_amino_acids()[ind] > pep_2.get_amino_acids()[ind])
			return false;
	}
	if(pep_1.get_length()<pep_2.get_length()) {
		return true;
	}
	else {
		return false;
	}

}

bool ComparePeptideCounterByAminoEq(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	
	for(int ind = 0; ind<pep_1.get_length() && ind<pep_2.get_length(); ind++) {
		if(pep_1.get_amino_acids()[ind] < pep_2.get_amino_acids()[ind])
			return false;
		if(pep_1.get_amino_acids()[ind] > pep_2.get_amino_acids()[ind])
			return false;
	}
	if(pep_1.get_length()!=pep_2.get_length()) {
		return false;
	} else {
		return true;
	}

}

bool ComparePeptideCounterByCounter(const PeptideCounter& pep_1, const PeptideCounter& pep_2) {
	return pep_1.get_counter() < pep_2.get_counter();
}


int FastaDB::write_peptide(PeptideAdd& pep, int protein_idx, int position, int length, int complete_initialization){
	
	// cout << "comp_init lev 3 :" << complete_initialization << endl;
	vector<int> aas;
	aas.resize(length);
	int final_position = protein_start_location[protein_idx]+position;
	// cout << "length: " << length << endl;
	copy(all_aa_seqs.begin()+final_position, all_aa_seqs.begin()+final_position+length, aas.begin());
	// cout << "aas size: " << aas.size() << endl;
	return pep.set_peptide_aas(config, aas, complete_initialization);
}


void ScoreStatistics::insert_psm(PeptideCounter& pep) {
	/*int length = pep.get_length();
	stat.resize(length+1);
	for(int len = max_length+1; len<=length; len++) {
		stat[len].resize(num_scoring_methods);
		for(int scoring_met_ind = 0; scoring_met_ind<num_scoring_methods; scoring_met_ind++) {
			stat[len][scoring_met_ind].resize(num_spectra);
			
		}
	}*/
	// cout << pep.length << " " << pep.scoring_method << " " << pep.spectrum_ind << endl;
  // cout << num_scoring_methods << " " << pep.scoring_method_index << endl;
	flag[pep.length][pep.scoring_method_index][pep.scan_index] = true;
	num_pep[pep.length][pep.scoring_method_index][pep.scan_index]++;
	sum_score[pep.length][pep.scoring_method_index][pep.scan_index] += pep.score;
	sum_square_score[pep.length][pep.scoring_method_index][pep.scan_index] += (pep.score*pep.score);

}


void construct_db_from_list(vector<string>& protein_seqs, vector<char> char_list, int num) {
	string protein_seq_temp;
	protein_seqs.clear();
	if(num == 0) {
		protein_seqs.push_back("");
	} else {
		vector<string> protein_seqs_prev;
		construct_db_from_list(protein_seqs_prev, char_list, num-1);
		for(int ind = 0; ind<protein_seqs_prev.size(); ind++) {
			for(int char_ind = 0; char_ind<char_list.size(); char_ind++) {
				protein_seq_temp = protein_seqs_prev[ind];
				protein_seq_temp.push_back(char_list[char_ind]);
				protein_seqs.push_back(protein_seq_temp);
			}
		}
	}
}

void FastaDB::make_database_fixed_sides(vector<int> aa_list, int num, int aa_first, int aa_last)
{
	protein_names.clear();
	protein_seqs.clear();
	vector<string> protein_seqs_tmp;
	vector<char> char_list;
	char_list.resize(aa_list.size());
	for(int ind = 0; ind < aa_list.size(); ind++) {
		// cout << ind << " " << aa_list[ind] << " " << config->get_aa2char().size() << endl; 
		char_list[ind] = config->get_aa2char()[aa_list[ind]];}
	char char_first = config->get_aa2char()[aa_first];
	char char_last = config->get_aa2char()[aa_last];

	construct_db_from_list(protein_seqs_tmp, char_list, num);
	string temp_str;
	string prot_temp;
	for(int ind = 0; ind<protein_seqs_tmp.size();ind++) {
		prot_temp.resize(protein_seqs_tmp[ind].size()+2);
		prot_temp[0] = char_first;
		for(int pep_ind = 0; pep_ind<protein_seqs_tmp[ind].size(); pep_ind++) {
			prot_temp[pep_ind+1] = protein_seqs_tmp[ind][pep_ind];
		}
		prot_temp[protein_seqs_tmp[ind].size()+1] = char_last;
		protein_seqs.push_back(prot_temp);
		protein_names.push_back(prot_temp);
	}
}
void FastaDB::write_from_name_seq(string fasta_file_name) {
  	ofstream fasta_file;
	fasta_file.open(fasta_file_name.c_str());
	for(int ind = 0; ind<protein_seqs.size(); ind++) {
		fasta_file << "> " <<  protein_names[ind] << endl; 
		fasta_file << protein_seqs[ind] << endl; 
	}
}

void ScoreStatistics::finalize() {
	int nm;
	int sm;
	int sms;
	float mn;
	float mns;
	float std_temp;
	// cout << max_length << " " << num_scoring_methods << " " << num_spectra << endl;
	for(int len = 0; len<max_length; len++) {
		for(int scr_met_ind = 0; scr_met_ind < num_scoring_methods; scr_met_ind++) {
			for(int scan_list_ind = 0; scan_list_ind < num_spectra; scan_list_ind++) {
				if(num_pep[len][scr_met_ind][scan_list_ind]>=min_req) {					
					nm = num_pep[len][scr_met_ind][scan_list_ind];
					sm = sum_score[len][scr_met_ind][scan_list_ind];
					sms = sum_square_score[len][scr_met_ind][scan_list_ind];
					mn = float(sm)/float(nm);
					mns = float(sms)/float(nm);
					std_temp = sqrt(mns - mn*mn);
					mean[len][scr_met_ind][scan_list_ind] = mn;
					std[len][scr_met_ind][scan_list_ind] = std_temp;
				} else {
					mean[len][scr_met_ind][scan_list_ind] = -1.0;
					std[len][scr_met_ind][scan_list_ind] = -1.0;
				}
			}
		}
	}			
}


bool CompareSpectrumMethodByParentMass(const SpectrumMethod& spec_met_1, const SpectrumMethod& spec_met_2) {
	return spec_met_1.parent_mass < spec_met_2.parent_mass;
}

class SpectrumMethodList : public vector<SpectrumMethod> {
	public:
	vector<int> bring_match_list(float mass, vector<float> threshold);
	int bring_first_larger_tmp(float mass, int start, int end);
	int bring_first_larger(float mass);
};

int SpectrumMethodList::bring_first_larger_tmp(float mass, int start, int end) {
	if(start >= end)
		return end;
	int mid_elem = (start+end)/2;
	if((*this)[mid_elem].parent_mass > mass) {
		return bring_first_larger_tmp(mass, 0, mid_elem);}
	else {
		return bring_first_larger_tmp(mass, mid_elem+1, end);}
	
}

int SpectrumMethodList::bring_first_larger(float mass) {
	return bring_first_larger_tmp(mass, 0 , size());
}

vector<int> SpectrumMethodList::bring_match_list(float mass, vector<float> threshold) {
	vector<int> spec_ind;	
	spec_ind.clear();
	int start_ind;
	int end_ind;
	for(int ch = 1; ch<=threshold.size(); ch++) {
		start_ind = bring_first_larger(mass - threshold[ch-1]);
		end_ind = bring_first_larger(mass + threshold[ch-1]);
		for(int ind = start_ind; ind < end_ind; ind++) {
			if((*this)[ind].charge == ch) {
				spec_ind.push_back(ind);
			}
		}
	}
	return spec_ind;
}


void ScoreStatistics::normalize_score(PeptideCounter& pep) {
	// cout << "scan index : " << pep.scan_index << " spectrum index : " << pep.spectrum_ind << endl;
	// cout << " size : " << num_pep[pep.length][pep.scoring_method_index].size() << endl;
	int n = num_pep[pep.length][pep.scoring_method_index][pep.scan_index];
	float mn = mean[pep.length][pep.scoring_method_index][pep.scan_index];
	float std_temp = std[pep.length][pep.scoring_method_index][pep.scan_index];
	// cout << n << " " << mn << " " << std_temp << endl;
	if(std_temp>0) {
		pep.norm_score = (pep.score - mn)/std_temp;
	} else {
		pep.norm_score = 0;
	}

		
}


void ScoreStatistics::print(int scan_list_ind) {
	
  // ofstream log_file;
  // log_file.open(log_file_string.c_str(), fstream::in | fstream::out | fstream::app);
  for(int len = 0; len<max_length;len++) {
	for(int met = 0; met < num_scoring_methods; met++) {
		if(flag[len][met][scan_list_ind]) {
			cout << "length : " << len << " struct : " << met << " num : " << num_pep[len][met][scan_list_ind] << endl;
		}
	}
   }
  // log_file.close();	
}

void PeptideCounter::load_score(AnnotatedSpectrumCounter& spectrum, vector<float> acc_thresh, Offset offset, int score_type) {
	score = peptide_spectrum_score(spectrum, (*this), spectrum.getCharge(), acc_thresh, offset, scoring_method, score_type);
	// Peptide aux_pep;
	// for(int ind = 0; ind<spectrum.spec_list.size(); ind++) {
	//	aux_pep.getPeptideFromStr();
	// }
	// cout << "score type: " << score_type << endl;
	// cout << "scoring method: " << scoring_method << endl;
	// cout << "acc thresh size: " << acc_thresh.size() << " " << acc_thresh[0] << endl;
	// cout << "charge: " << spectrum.getCharge() << endl;
}

bool ComparePeptideSpectrumHitsByMaxEstPval(const PeptideSpectrumHits& psh_1, const PeptideSpectrumHits& psh_2) {
	return psh_1.max_est_pval < psh_2.max_est_pval;
}

bool ComparePeptideSpectrumHitsByMaxScore(const PeptideSpectrumHits& psh_1, const PeptideSpectrumHits& psh_2) {
	return psh_1.max_score > psh_2.max_score;
}

bool ComparePeptideSpectrumHitsByMaxNormScore(const PeptideSpectrumHits& psh_1, const PeptideSpectrumHits& psh_2) {
	return psh_1.max_norm_score > psh_2.max_norm_score;
}

bool ComparePeptideSpectrumHitsByMaxPepNovoScore(const PeptideSpectrumHits& psh_1, const PeptideSpectrumHits& psh_2) {
	return psh_1.max_pepnovo_score > psh_2.max_pepnovo_score;
}

void PeptideCounterList::insert_psm_norm_score(PeptideCounter& pep) {
	if(top_hits[pep.scan_index].min_norm_score<pep.norm_score || top_hits[pep.scan_index].size() <= num_top_hits) {
		list<PeptideCounter> new_list;
		new_list.push_back(pep);
		top_hits[pep.scan_index].merge(new_list, ComparePeptideCounterByNormScore);
		// new_vec.resize(top_hits[pep.spectrum_ind].size());
		// merge(top_hits[pep.spectrum_ind].begin(), top_hits[pep.spectrum_ind].end()-1, top_hits[pep.spectrum_ind].end()-1, top_hits[pep.spectrum_ind].end(), top_hits[pep.spectrum_ind].begin(), ComparePeptideCounterByNormScore);
		// copy(new_vec.begin(), new_vec.end(), top_hits[pep.spectrum_ind].begin());
		top_hits[pep.scan_index].max_norm_score = top_hits[pep.scan_index].front().norm_score;
		if(top_hits[pep.scan_index].size() > num_top_hits) {
			top_hits[pep.scan_index].pop_back();
			top_hits[pep.scan_index].min_norm_score = top_hits[pep.scan_index].back().norm_score;
		}
	}
}


void PeptideCounterList::sort_score() {
	for(int scan_ind = 0; scan_ind < num_spectra; scan_ind++) {
		top_hits[scan_ind].sort(ComparePeptideCounterByScore);	
		top_hits[scan_ind].max_score = top_hits[scan_ind].front().score;
	}	
	sort(top_hits.begin(), top_hits.end(), ComparePeptideSpectrumHitsByMaxScore);
}

void PeptideCounterList::sort_pepnovo() {
	for(int scan_ind = 0; scan_ind < top_hits.size(); scan_ind++) {
		top_hits[scan_ind].sort(ComparePeptideCounterByPepNovoScore);	
		top_hits[scan_ind].max_pepnovo_score = top_hits[scan_ind].front().pepnovo_score;
	}	
	sort(top_hits.begin(), top_hits.end(), ComparePeptideSpectrumHitsByMaxPepNovoScore);
}


void FastaDB::load_peptide(PeptideCounter& peptide, int comp_init) {
	write_peptide(peptide, peptide, comp_init);
};




void FastaDB::FilterDB(PeptideCounterList& pep_list, AnnotatedSpectrumCounterList& spectrum_vector, vector<float>& threshold, vector<float> offsets, vector<int> scoring_methods, vector<float> acc_thresh, Offset offset, int score_type) {
  int total_target = 0;
  int total_decoy = 0;
  // cout << "fdb decoy flag : " << decoy_flag << endl;
  pep_list.set_scoring_methods(scoring_methods);
  pep_list.set_acc_thresh(acc_thresh);
  pep_list.set_offset(offset);
  pep_list.set_config(config);
	pep_list.decoy_flag = decoy_flag;
  pep_list.scan_first = scan_first;
  pep_list.scan_last = scan_last;
  if(set_start_flag == false)
	scan_first = 0;
  if(set_stop_flag == false)
	scan_last = spectrum_vector.size();
  if(set_scan_flag == false) {
	scan_set.clear();
	for(int ind = scan_first; ind < scan_last; ind++)
		scan_set.push_back(ind);
  }
  ofstream log_file;
  log_file.open(log_file_string.c_str(), fstream::in | fstream::out | fstream::app);
  ofstream temp_out_file;
  temp_out_file.open(temp_file_string.c_str());
  pep_list.scoring_methods = scoring_methods;
  pep_list.offsets = offsets;
  SpectrumMethodList spectrum_method_list;
  spectrum_method_list.clear();
  SpectrumMethod sm;


	for(int set_ind = 0; set_ind < scan_set.size(); set_ind++){
		int ind = scan_set[set_ind];
		for(int method_ind = 0; method_ind < scoring_methods.size(); method_ind++) {
			sm.spectrum_index = ind;
			sm.scan_index = set_ind;
			sm.method_index = method_ind;
			sm.charge = spectrum_vector[ind].getCharge();
			sm.parent_mass = spectrum_vector[ind].get_org_pm()+offsets[method_ind];
			spectrum_method_list.push_back(sm);
		}
	}
	
	sort(spectrum_method_list.begin(), spectrum_method_list.end(), CompareSpectrumMethodByParentMass);
	
	/*cout << spectrum_method_list[0].parent_mass << " ";
	cout << spectrum_method_list[1].parent_mass << " ";
	cout << spectrum_method_list[2].parent_mass << " ";
	cout << spectrum_method_list[3].parent_mass << " "; 
	cout << spectrum_method_list[4].parent_mass << " ";
	cout << spectrum_method_list[5].parent_mass << endl;*/
  if(scan_set.size() == 0)
		cout << "Error : No spectra selected!" << endl << flush;
        // cout << "Number of spectra : " << spectrum_vector.size() << endl << flush;
	// cout << "Number of spectrum methods : " << spectrum_method_list.size() << endl << flush;
	float max_threshold = *max_element(threshold.begin(), threshold.end());	
	float max_mass = spectrum_method_list[spectrum_method_list.size()-1].parent_mass + max_threshold;
	float min_mass = spectrum_method_list[0].parent_mass - max_threshold;
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	vector <int> aas;
	int position_start;
	int position_end;
	int length;
	float mass;
	PeptideCounter pep;
	int count = 0;
	pep_list.set_fasta_db(this);
	bool flag_stop;
	count = 0;
	vector<int> spec_list;
	// pep_list.set_acc_thresh(acc_thresh);
	// pep_list.set_offset(offset);
	pep_list.set_scan_list(scan_set);
	pep_list.statistics.set_num_spectra(scan_set.size());
	if(spectrum_method_list.size() != offsets.size()) {
	    cout << "Number of spectrum methods : " << spectrum_method_list.size() << " offset size : " << offsets.size() << endl;
	    cout << "Error : offset size should be equal to number of scoring methods " << endl;
	}
	pep_list.statistics.set_num_scoring_methods(scoring_methods.size());
	pep_list.statistics.set_max_length(pep_list.max_length_filter_val+1);
	// pep_list.statistics.set_min_length(pep_list.min_length_filter_val);
	pep_list.statistics.initialize();
	int next_ind = 0;
	long int next_val = 0;
	int num_aa = 1;
	int aa_counter = 0;	
	for (int protein_idx = 0; protein_idx<protein_length.size(); protein_idx++)
		num_aa += protein_length[protein_idx];
	cout << "Number of Amino acids in the database: " << num_aa << endl;
	cout << "filtering and scoring ..." << endl;
	log_file << "Number of Amino acids in the database: " << num_aa << endl;
	log_file << "filtering and scoring ..." << endl;
	// cout << pep_list.print_flag << endl;
  clock_t begin;
	clock_t end;
	begin = clock();
  // cout << "number of proteins : " << protein_length.size() << endl;
	for (int protein_idx = 0; protein_idx<protein_length.size(); protein_idx++){
    // cout << protein_idx << " " << protein_decoy[protein_idx] << endl;
		// cout << protein_idx << endl;
		for (int position = 0; position <= protein_length[protein_idx]; position++){
			aa_counter = position + protein_start_location[protein_idx];
			if((pep_list.print_flag == true) && (aa_counter >= next_val)) {
				cout << next_ind << "% ... " << flush;
				log_file << next_ind << "% ... " << flush;
 			       // cout << protein_idx << " " << position << " " << aa_counter << " " << next_val << " " << num_aa << endl;
				next_ind++;
				next_val = next_ind*(num_aa/100);
			}
				
			position_start = protein_start_location[protein_idx] + position;
			length = 0;
			mass = 0;
			aas.clear();
			flag_stop = false;
			while((length <= protein_length[protein_idx] - position) && (mass < min_mass) && flag_stop == false) {
				length++;
				position_end = length + position_start - 1;
				if ((all_aa_seqs[position_end]<Ala) || (all_aa_seqs[position_end]>Val)) {
					position = position+length-1;
					flag_stop = true;
				} else {
					aas.push_back(all_aa_seqs[position_end]);
					mass += aa2mass[all_aa_seqs[position_end]];
				}					
			}
			
			while((length <= protein_length[protein_idx] - position) && (mass > min_mass) && (mass < max_mass) && flag_stop == false) {
				pep.clear();
				pep.set_peptide_aas(config, aas, 1);
				pep.set_protein_index(protein_idx);
				pep.set_position(position);
				pep.set_fasta_db(this);
				pep.apply_mod_list(pep_list.mod_list);
				pep.length = pep.get_length();
				pep.num_cys = calc_num_aa(pep, Cys);
        if(decoy_flag) {
          pep.is_decoy = protein_decoy[protein_idx];
          if(pep.is_decoy) {
            total_decoy++;
          }
          else {
            total_target++;
          }
        }
        else
          pep.is_decoy = false;
        // cout << pep.is_decoy << endl;
				// cout << pep.length << " " << pep_list.max_length_filter_val << " " << pep_list.min_length_filter_val << endl;
				if(pep.check_if_valid(aas) && pep.length<=pep_list.max_length_filter_val && pep.length>=pep_list.min_length_filter_val && ((enforce_cys_flag == false)|| (num_cys == pep.num_cys))) {
					// cout << pep.get_mass() << " " << threshold[0] << endl;
					spec_list = spectrum_method_list.bring_match_list(pep.get_mass(), threshold);
					// cout << spec_list.size() << endl;
					for(int ind = 0; ind<spec_list.size(); ind++) {
						pep.set_counter(count);
						pep.set_mass_error(pep.get_mass() - spectrum_method_list[spec_list[ind]].parent_mass);
						pep.set_spectrum_index(spectrum_method_list[spec_list[ind]].spectrum_index);
						pep.set_scan_list_index(spectrum_method_list[spec_list[ind]].scan_index);
						pep.set_scoring_index(spectrum_method_list[spec_list[ind]].method_index);
						pep.set_scoring(scoring_methods[spectrum_method_list[spec_list[ind]].method_index]);
						pep.load_score(spectrum_vector[spectrum_method_list[spec_list[ind]].spectrum_index], acc_thresh, offset, score_type);
						// cout << "original scan index : " << pep.scan_index << endl;
				                if(print_pep_flag){
  							cout << pep << " " << pep.getPeptideStr() << " " << pep.get_mass() << endl;
						}
						temp_out_file << pep << endl;
						pep_list.statistics.insert_psm(pep);
						count++; 
					}
				}
				length++;
				position_end = length + position_start - 1;
				if ((all_aa_seqs[position_end]<Ala) || (all_aa_seqs[position_end]>Val)) {
					position = position+length-1;
					flag_stop = true;
				} else {
					aas.push_back(all_aa_seqs[position_end]);
					mass += aa2mass[all_aa_seqs[position_end]];
				}
			}
			
		}
	
	}

	if(pep_list.print_flag == true) {
		cout << endl << flush;
		log_file << endl << flush;
  }
  
  end = clock();
  cout << "time elapsed: " << double(end - begin)/CLOCKS_PER_SEC << endl;
  log_file << "time elapsed: " << double(end - begin)/CLOCKS_PER_SEC << endl;


	cout << "finalizing scores for " << count << " hits to " << scan_set.size() << " spectra ... " << endl;
	log_file << "finalizing scores for " << count << " hits to " << scan_set.size() << " spectra ... " << endl;

  	// log_file.close();
	temp_out_file.close();

	pep_list.statistics.finalize();
	// pep_list.statistics.print(log_file_string);

  	// log_file.open(log_file_string.c_str(), fstream::in | fstream::out | fstream::app);
  // cout << "scan set size : " << scan_set.size() << endl;
	pep_list.set_num_spectra(scan_set.size());
        ifstream temp_in_file;
        temp_in_file.open(temp_file_string.c_str());
        next_ind = 0;
        next_val = 0;
        begin = clock();
        // cout << "count : " << count << endl;

	for(int ind = 0;ind < count; ind++) {
		temp_in_file >> pep;
		// cout << "read scan index : " << pep.scan_index << " read spectrum index : " << pep.spectrum_ind << endl;
		// cout << pep.scoring_method << endl;
		// cout << pep;
		pep_list.statistics.normalize_score(pep);
		// cout << pep.norm_score << endl;
		// int spectrum_method_index = pep.spectrum_ind*offsets.size() + pep.scoring_method;
		// cout << pep.spectrum_ind << " " << pep.scoring_method << " " << scoring_method_index << endl;
		load_peptide(pep);
		pep.apply_mod_list(pep_list.mod_list);
		pep.set_scoring(scoring_methods[pep.scoring_method_index]);
		float pm = pep.get_mass() - (spectrum_vector[pep.spectrum_ind].get_org_pm()+offsets[pep.scoring_method_index]);
		// float pm = spectrum_vector[pep.spectrum_ind].get_org_pm();
		pep.set_mass_error(pm);
		pep_list.insert_psm_norm_score(pep);
		if((pep_list.print_flag == true) && (ind >=  next_val)) {
			cout << next_ind << "% ... " << flush;
			log_file << next_ind << "% ... " << flush;
			next_ind++;
			next_val = next_ind*(count/100);
		}
	}
	temp_in_file.close();
	remove(temp_file_string.c_str());
	
	log_file.close();

	if(pep_list.print_flag == true) {
		cout << endl << flush;
	log_file << endl << flush;
  }

  end = clock();
  cout << "time elapsed: " << double(end - begin)/CLOCKS_PER_SEC << endl;
  log_file << "time elapsed: " << double(end - begin)/CLOCKS_PER_SEC << endl;
  pep_list.unique();
  // cout << "sort by : " << sort_by << endl;
  if(sort_by == 0) {
    sort(pep_list.top_hits.begin(), pep_list.top_hits.end(), ComparePeptideSpectrumHitsByMaxNormScore);
  } else if(sort_by == 1) {
    pep_list.sort_score();
  } else if(sort_by == 2) {
    pep_list.pepnovo_flag = true;
    cout << "calculating pepnovo scores ..." << endl;
    pep_list.score_pepnovo(spectrum_vector, log_file_string);
    cout << "sorting pepnovo scores ..." << endl;
    pep_list.sort_pepnovo();
  } else if(sort_by == 4) {
    // pep_list.set_pval_est_flag();
    pep_list.pval_est_flag = true;
    cout << "calculating estimated p-values ..." << endl;
    pep_list.est_pval(spectrum_vector);
    cout << "sorting estimated p-value scores ..." << endl;
    pep_list.sort_pval_est();
  }

  if(decoy_flag) {
    cout << "num target hits : " << total_target << endl;
    cout << "num decoy hits : " << total_decoy << endl;
  }


}

FastaDB::FastaDB()
{
  sort_by = 0;
  decoy_flag = false;
  default_tag_len = 5;
  min_tag_len = 1;
  max_tag_len = 5;
  temp_file_string = "temp.txt";
  log_file_string = "log.txt";
  print_pep_flag = false;
	int a,i;
	config = NULL;
	fasta_file=NULL;
	a=0;
	set_start_flag = false;
	set_stop_flag = false;
	set_scan_flag = false;
	enforce_cys_flag = false;

	aa_codes.resize(Val+1);
	for (i=0; i<=Val; i++)
		aa_codes[i]=0;

	for (i=Xle; i<=Val; i++)
		aa_codes[i]=a++;

	aa_codes[Ile]=Xle; // 
	aa_codes[Leu]=Xle;

	mult_val=a;
}

FastaDB::~FastaDB()
{
	if (fasta_file)
		delete [] fasta_file;
}


struct TagLoc {
	bool operator< (const TagLoc& other) const
	{
		if (idx<other.idx)
			return true;
		if (idx>other.idx)
			return false;
		if (loc<other.loc)
			return true;
		return false;
	}
	int idx;
	int loc;
};

/*******************************************************************
  creates all relevant data structures from the fasta file
  includes the sequences (stored as aa - ints), protein names,
  and tag hashes (direct hash table)
********************************************************************/
void FastaDB::create_db_from_fasta(char *file_name, Config *con,
		bool create_tags, int min_length, int max_length)
{
	char buff[1024];
	int file_size;
	int seq_p=0;

	this->config = con;
	const vector<int>& char2aa = config->get_char2aa();

	ifstream file (file_name, ios::in|ios::ate);
	if (file.is_open())
	{
		file_size = file.tellg();
		file.seekg (0, ios::beg);
	}
	else
	{
		cout << "Error: reading!"<< file_name << endl;
		exit(1);
	}
	
	min_tag_length = min_length;
	max_tag_length = max_length;

	if (create_tags && (min_length <3 || max_length>6 || min_tag_length>max_tag_length))
	{
		printf("Tag length must be 3-6 !\n");
		exit(1);
	}

	fasta_file = new char[strlen(file_name)+1];
	strcpy(fasta_file,file_name);

	aa_seq_starts.clear();     
	protein_name_starts.clear(); 
	all_aa_seqs.clear();
	all_protein_names.clear();


	// add sequence terminatng symbol -1
	// before first sequence
	all_aa_seqs.push_back(-1);

	file.getline(buff,1024);
	while(1)
	{
		if (file.eof())
			break;

		if (file.gcount()>0 && buff[0] != '>')
		{
			file.getline(buff,1024);
			if (file.gcount() <= 0)
				break;

			continue;
		}

		// push the protein name
		int len=strlen(buff);
		int name_start_idx=all_protein_names.size();
		int i;
    string temp_protein_name;
    temp_protein_name.clear();
		for (i=1; i<len; i++)
		{
			if (buff[i] != '\n') {
				all_protein_names.push_back(buff[i]);
        temp_protein_name.push_back(buff[i]);
        }
		}
		all_protein_names.push_back('\0');
    protein_names.push_back(temp_protein_name);

		aa_seq_starts.insert(INT_MAP::value_type(all_aa_seqs.size(),protein_name_starts.size()));
		protein_name_starts.push_back(name_start_idx);
		protein_start_location.push_back(all_aa_seqs.size());
		protein_number.push_back(protein_name_starts.size());

		// read protein sequence
		while ( 1)
		{
			file.getline(buff,1024);
			if (file.gcount()<=0 || buff[0] == '>')
				break;

			const int len=strlen(buff);
			for (i=0; i<len; i++)
				if ((buff[i]>= 'A' && buff[i]<'Z') || buff[i] == '*')
					all_aa_seqs.push_back(char2aa[buff[i]]);
		}
		// add sequence terminatng symbol -1
		all_aa_seqs.push_back(-1);
	}

	// creates tags using a vector of vector, then transforms it into a tag_hash
	// and a sequence of tag locations
	if (create_tags)
	{
		tag_maps.resize(max_tag_length+1);
		tag_locations.resize(max_tag_length+1);

		int tag_length;

		for (tag_length=min_tag_length; tag_length<=max_tag_length; tag_length++)
		{
			vector<TagLoc> tag_locs;
			vector<int> tag;
			const int max_loc = all_aa_seqs.size()-tag_length;
			const int max_tag_idx = static_cast<int>(pow(static_cast<double>(mult_val),
														 static_cast<double>(tag_length)));
			int i;

			tag_locs.reserve(all_aa_seqs.size());
			tag.resize(tag_length);

			for (i=0; i<max_loc; i++)
			{
				int j;
				for (j=0; j<tag_length; j++)
				{
					if (all_aa_seqs[i+j]<Ala)
						break;

					tag[j]=all_aa_seqs[i+j];
				}
				if (j<tag_length)
					continue;

				int idx=calc_tag_index(tag);
				
				TagLoc tl;
				tl.idx = idx;
				tl.loc = i;
				tag_locs.push_back(tl);	
			}
			sort(tag_locs.begin(),tag_locs.end());
		
			// transfer tags to map
			int total_locs=tag_locs.size();

			tag_locations[tag_length].clear();
			tag_locations[tag_length].reserve(total_locs);
			

			tag_maps[tag_length].clear();

			int idx=-1;
			int start=-1;
			int num_locs=0;
			for (i=0; i<tag_locs.size(); i++)
			{
				if (tag_locs[i].idx != idx)
				{
					if (num_locs>0)
					{
						TagListPointer tlp;
						tlp.list_start_idx = start;
						tlp.num_locations = num_locs;

						tag_maps[tag_length].insert(INT2TLP_MAP::value_type(idx,tlp));
					}

					idx=tag_locs[i].idx;
					start=i;
					num_locs=1;
					
					tag_locations[tag_length].push_back(tag_locs[i].loc);
				}
				else
				{
					tag_locations[tag_length].push_back(tag_locs[i].loc);
					num_locs++;
				}
			}

		
		}
	}
	for(int ind = 1; ind<protein_start_location.size(); ind++)
		protein_length.push_back(protein_start_location[ind] - protein_start_location[ind-1] - 1);
	protein_length.push_back(all_aa_seqs.size() - protein_start_location[protein_start_location.size()-1]-1);

  update_db_string();
}


void FastaDB::update_db_string() {
  const vector<char> aa2char = config->get_aa2char();
  db_string.resize(all_aa_seqs.size());
  int aa;
  for(int ind = 0; ind < db_string.size(); ind++) {
    aa = all_aa_seqs[ind];
    if((aa>=Ala) && (aa<=Val)) {
      db_string[ind] = aa2char[aa]; 
    } else {
      db_string[ind] = '*';
    }
  }
}


/*************************************************************
	reads all info from dat file.
**************************************************************/
void FastaDB::read_FastaDB(const char *file_name, Config *con)
{
	fstream ifs(file_name, ios::in|ios::binary);
	if (! ifs.good())
	{
		cout << "Error: couldn't open for writing: "<< file_name << endl;
		exit(1);
	}

	config = con;

	// read file name
	int name_len;
	ifs.read(reinterpret_cast<char *>(&name_len),sizeof(int));
	fasta_file = new char[name_len+1];
	ifs.read(reinterpret_cast<char *>(fasta_file),name_len*sizeof(char));
	fasta_file[name_len]='\0';

	// read protein names
	int num_proteins;
	int all_protein_names_length;
	int all_seqs_length;

	ifs.read(reinterpret_cast<char *>(&num_proteins),sizeof(int));
	protein_name_starts.resize(num_proteins);
	ifs.read(reinterpret_cast<char *>(&protein_name_starts[0]),sizeof(int) * num_proteins);

	ifs.read(reinterpret_cast<char *>(&all_protein_names_length), sizeof(int));
	all_protein_names.resize(all_protein_names_length);
	ifs.read(reinterpret_cast<char *>(&all_protein_names[0]),sizeof(char) * all_protein_names_length);

	// read aa_ses_starts map 
	aa_seq_starts.clear();
	int i;
	for (i=0; i<num_proteins; i++)
	{
		int aa_pos, prot_num;
		ifs.read(reinterpret_cast<char *>(&aa_pos),sizeof(int));
		ifs.read(reinterpret_cast<char *>(&prot_num),sizeof(int));
		aa_seq_starts.insert(INT_MAP::value_type(aa_pos,prot_num));
		protein_start_location.push_back(aa_pos);
		protein_number.push_back(prot_num);
	}

	// write all seqs
	ifs.read(reinterpret_cast<char *>(&all_seqs_length), sizeof(int));
	all_aa_seqs.resize(all_seqs_length);
	ifs.read(reinterpret_cast<char *>(&all_aa_seqs[0]),sizeof(int)*all_seqs_length);

	// read tags
	
	ifs.read(reinterpret_cast<char *>(&min_tag_length),sizeof(int));
	ifs.read(reinterpret_cast<char *>(&max_tag_length),sizeof(int));
	tag_locations.resize(max_tag_length+1);
	tag_maps.resize(max_tag_length+1);

	int t;
	for (t=min_tag_length; t<=max_tag_length; t++)
	{
		int i;
		int num_tag_locations;

		ifs.read(reinterpret_cast<char *>(&num_tag_locations),sizeof(int));
		tag_locations[t].resize(num_tag_locations);
		ifs.read(reinterpret_cast<char *>(&tag_locations[t][0]),sizeof(int)*num_tag_locations);

		int map_size;

		ifs.read(reinterpret_cast<char *>(&map_size),sizeof(int));
		tag_maps[t].clear();

		for (i=0; i<map_size; i++)
		{
			int idx;
			TagListPointer tlp;

			ifs.read(reinterpret_cast<char *>(&idx),sizeof(int));
			ifs.read(reinterpret_cast<char *>(&tlp),sizeof(TagListPointer));
			tag_maps[t].insert(INT2TLP_MAP::value_type(idx,tlp));
		}
	}


	ifs.close();

}

/*************************************************************
	writes all info to dat file.
**************************************************************/
void FastaDB::write_FastaDB(const char *file_name) const
{
	fstream ofs(file_name, ios::out|ios::binary);
	if (! ofs.good() || ! ofs.is_open())
	{
		cout << "Error: couldn't open for writing: "<< file_name << endl;
		exit(1);
	}

	// write file name
	if (! fasta_file)
	{
		cout << "Error: must initialize from a fasta file!" << endl;
		exit(0);
	}
	int name_len = strlen(this->fasta_file);
	ofs.write(reinterpret_cast<const char *>(&name_len),sizeof(int));
	ofs.write(reinterpret_cast<const char *>(fasta_file),name_len*sizeof(char));
	
	// write protein names
	int num_proteins = protein_name_starts.size();
	int all_protein_names_length = all_protein_names.size();
	int all_seqs_length = all_aa_seqs.size();

	ofs.write(reinterpret_cast<char *>(&num_proteins),sizeof(int));
	ofs.write(reinterpret_cast<const char *>(&protein_name_starts[0]),sizeof(int) * num_proteins);
	ofs.write(reinterpret_cast<char *>(&all_protein_names_length), sizeof(int));
	ofs.write(reinterpret_cast<const char *>(&all_protein_names[0]),sizeof(char) * all_protein_names_length);

	// write aa_seq_starts map
	INT_MAP::const_iterator it;
	for (it=aa_seq_starts.begin(); it!= aa_seq_starts.end(); it++)
	{
		ofs.write(reinterpret_cast<const char *>(&it->first),sizeof(int));
		ofs.write(reinterpret_cast<const char *>(&it->second),sizeof(int));
		
	}

	ofs.write(reinterpret_cast<char *>(&all_seqs_length), sizeof(int));
	const char *seqs=(char *)(&all_aa_seqs[0]);
	ofs.write(seqs,sizeof(int)*all_seqs_length);

	// write tags
	
	ofs.write(reinterpret_cast<const char *>(&min_tag_length),sizeof(int));
	ofs.write(reinterpret_cast<const char *>(&max_tag_length),sizeof(int));
	
	int t;
	for (t=min_tag_length; t<=max_tag_length; t++)
	{
		int num_tag_locations = tag_locations[t].size();

		ofs.write(reinterpret_cast<const char *>(&num_tag_locations),sizeof(int));
		ofs.write(reinterpret_cast<const char *>(&tag_locations[t][0]),sizeof(int)*num_tag_locations);
		
		int map_size = tag_maps[t].size();
		ofs.write(reinterpret_cast<const char *>(&map_size),sizeof(int));

		INT2TLP_MAP::const_iterator it;
		for (it=tag_maps[t].begin(); it!= tag_maps[t].end(); it++)
		{
			ofs.write(reinterpret_cast<const char *>(&it->first),sizeof(int));
			ofs.write(reinterpret_cast<const char *>(&it->second),sizeof(TagListPointer));
			
		}
	}


	ofs.close();
}


/*
// returns a merged list of the locations of all the tags
// the indices in the tag_loc records are according to the position
	vector<list_record> lists;
	lists.resize(tag_seqs.size());
	
	for (i=0; i<tag_seqs.size(); i++)
	{
		int j;

		const vector<int>& locations =get_tag_locations(tag_seqs[i]);
		lists[i].list.resize(locations.size());
		lists[i].size = locations.size();
		lists[i].free_memory = true;

		for (j=0; j<locations.size(); j++)
		{
			lists[i].list[j].loc= locations[j];
			lists[i].list[j].tag_idx = i;
		}
	}
	merge_lists(lists,merged);
	return merged;
}

*/

void FastaDB::print_stats() const
{
	printf("FILE          : %s\n",fasta_file);
	printf("SEQUENCES     : %d\n",protein_name_starts.size());
	printf("AMINO ACIDS   : %d\n",all_aa_seqs.size());
//	printf("TAG LOCATIONS : %d\n",num_tag_locations);
}


void FastaDB::print_protein_names() const
{
	int i;

	for (i=0; i<protein_name_starts.size(); i++)
	{
		cout << i << " " << 
			&all_protein_names[protein_name_starts[i]]<< endl;
	}
}



int FastaDB::get_num_cands_with_mass(float mass, float tolerance) const
{
	const float min_mass = mass - tolerance;
	const float max_mass = mass + tolerance;
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	int i;
	int n=0;

	for (i=0; i<all_aa_seqs.size(); i++)
	{
		int j=0;
		float m=0;
		while (all_aa_seqs[i+j]>0 && m<min_mass)
		{
			m+=aa2mass[all_aa_seqs[i+j]];
			j++;
		}

		if (m>=min_mass && m<=max_mass)
			n++;
	}

	return n;
}



void FastaDB::print_aas_at_loc(int loc_idx, int num_aas) const
{
	int i;

	for (i=0; i<num_aas; i++)
	{
		const int sym = all_aa_seqs[i+loc_idx];
		if (sym<0)
		{
			cout << "$$$";
			break;
		}

		cout<<config->get_aa2label()[sym];
	}
	cout << endl;
}
