#include "util.h"
#include "peptide_spectrum_score.h"


//bool ComparePeakByMass(const Peak& p_1, const Peak& p_2) {
//	return (p_1.mass<p_2.mass);
//}


void Non_Linear_Peptide::get_cyclic_nlp(Non_Linear_Peptide& cyclic_nlp) {
    float H2O_mass = 18.01528;
    vector<float> seq_lin_complete;
    vector<float> seq_cyc_complete;
    float mass_lin = 0;
    for(int ind = 0; ind < seq_lin.size(); ind++)
	mass_lin += seq_lin[ind];
    for(int ind = 0; ind < seq_cyc.size(); ind++)
	seq_cyc_complete.push_back(seq_cyc[ind]);	
    if(seq_lin_first) {
 	seq_cyc_complete[0] += (mass_lin+H2O_mass-bound_offset);
    } else {
	seq_cyc_complete[seq_cyc_complete.size()-1] += (mass_lin+H2O_mass-bound_offset);
    }
    cyclic_nlp.seq = seq_cyc_complete;
    cyclic_nlp.structure = 1;
}
    
void Non_Linear_Peptide::get_linear_nlp(Non_Linear_Peptide& linear_nlp) {
    // float H2O_mass = 18.01528;
    vector<float> seq_lin_complete;
    float mass_cyc = 0;
    for(int ind = 0; ind < seq_cyc.size(); ind++)
 	mass_cyc += seq_cyc[ind];
    if(!seq_lin_first)
	seq_lin_complete.push_back(mass_cyc-bound_offset);
    for(int ind = 0; ind < seq_lin.size(); ind++)
 	seq_lin_complete.push_back(seq_lin[ind]);
    if(seq_lin_first)
	seq_lin_complete.push_back(mass_cyc-bound_offset);
    linear_nlp.seq = seq_lin_complete;
    linear_nlp.structure = 0;
}


void AnnotatedSpectrumCounterList::print_pm_charge(string out) {
  ofstream out_stream;
  out_stream.open(out.c_str());
  for(int ind = 0; ind < size(); ind++)
    out_stream << (*this)[ind].original_scan_index << " " << (*this)[ind].getScanNumber()-1 << " " <<  (*this)[ind].get_m_over_z() << " " << (*this)[ind].getCharge() << endl;
  out_stream.close();
}

void AnnotatedSpectrumCounterList::print_pm_charge() {
  for(int ind = 0; ind < size(); ind++)
    cout << (*this)[ind].original_scan_index << " " << (*this)[ind].getScanNumber()-1 << " " << (*this)[ind].get_m_over_z() << " " << (*this)[ind].getCharge() << endl;
}

void Non_Linear_Peptide::generate_random_nlp(int length, int mass) {
  if(length == 0)
	return;
  if(length == 1){
	seq.clear();
	seq.push_back(mass);
	return;
  }
  int mass_zero = mass - min_aa_mass*length;
  vector<int> cumul_mass;
  for(int ind = 0; ind < length-1; ind++) {
    cumul_mass.push_back(int(myRandom()*mass_zero));
  }
  sort(cumul_mass.begin(), cumul_mass.end());
  seq.clear();
  seq.push_back(cumul_mass[0] + min_aa_mass);
  for(int ind = 1; ind < length-1; ind++) {
    seq.push_back(cumul_mass[ind] - cumul_mass[ind-1] + min_aa_mass);
  }
  seq.push_back(mass_zero - cumul_mass[length -2] + min_aa_mass);
}

float PvalCalculator::calculate_pvalue(AnnotatedSpectrumCounterList& spectrum_vector, Non_Linear_Peptide& nlp) {
  MultistageTreeNode mtn;
  // mtn.set_integer();
  mtn.initialize_from_spectra(spectrum_vector);
  int original_score = calculate_score(mtn, nlp);
  nlp.print();
  cout << original_score << endl;
  int temp_score;
  Non_Linear_Peptide nlp_temp;
  int num_high = 0;
  for(int ind = 0; ind < num_pep; ind++) {
    nlp_temp.generate_random_nlp(nlp.get_length(), nlp.get_mass());
    temp_score = calculate_score(mtn, nlp_temp);
    if(temp_score >= original_score)
      num_high++;
  }
  return (float(num_high)/num_pep);
}
void ScoreCalculator::calculate_score(AnnotatedSpectrumCounterList& spectrum_vector, NLP_list& nlp_vec) {
  MultistageTreeNode mtn;
  // mtn.set_integer();
  mtn.initialize_from_spectra(spectrum_vector);
  for(int ind = 0; ind < nlp_vec.size(); ind++) {
    // cout << "org is integer : " << nlp_vec[ind].is_integer << endl;
    nlp_vec[ind].score = calculate_score(mtn, nlp_vec[ind]);
  }
}


int ScoreCalculator::calculate_score(AnnotatedSpectrumCounterList& spectrum_vector, Non_Linear_Peptide& nlp) {
  MultistageTreeNode mtn;
  // mtn.set_integer();
  mtn.initialize_from_spectra(spectrum_vector);
  calculate_score(mtn, nlp);
  return nlp.score;
}

int ScoreCalculator::calculate_score(MultistageTreeNode& mtn, Non_Linear_Peptide& nlp) {
  if(!ms_level_flag)
    ms_level = mtn.max_ms_level;
  TheoricMultistageTreeNode tmtn;
  tmtn.initialize_root_from_NLP(nlp, ms_level, structure);
  mult_tree_scorer.set_real_score();
  nlp.score = mult_tree_scorer.score(tmtn, mtn);
  return nlp.score;
}

void split_peptide(Config* config, PeptideAdd& pep, PeptideAdd& pep_1, PeptideAdd& pep_2, int k) {
  vector<int> aas_1, aas_2;
  for(int ind = 0; ind < k; ind++) {
    aas_1.push_back(pep.get_amino_acids()[ind]);
  }
  pep_1.set_peptide_aas(config, aas_1, 1);
  for(int ind = k; ind < pep.get_length(); ind++) {
    aas_2.push_back(pep.get_amino_acids()[ind]);
  }
  pep_2.set_peptide_aas(config, aas_2, 1);
}

void GappedPeptideList::parseFromPeptide(Config* config, PeptideAdd& pep, float pm, int denovo_len) {
  set_config(config);
  set_parent_mass(pm);
  vector<int> pep_seq = pep.get_amino_acids();
  vector<int> rep_seq = pep_seq;
  vector<int> aas;
  for(int ind = 0; ind < pep_seq.size(); ind++)
    rep_seq.push_back(pep_seq[ind]);
  PeptideAdd temp_pep;
  GappedPeptide temp_gap_pep;
  for(int ind = 0; ind < rep_seq.size() - denovo_len; ind++) {
    aas.clear();
    for(int seq_ind = ind; seq_ind < ind+denovo_len; seq_ind++) {
      aas.push_back(rep_seq[seq_ind]);
    }
    temp_pep.set_peptide_aas(config, aas, 1);
    temp_gap_pep.parseFromPeptide(temp_pep, pm);
    push_back(temp_gap_pep);
  }
}

void GappedPeptide::parseFromPeptide(PeptideAdd& peptide, float pm) {
  pep = peptide;
  gap_mass = pm - peptide.get_mass();
};

void GappedPeptideList::generate(int first_step) {
  if(first_step == 0) {
    GappedPeptide gap_pep;
    PeptideAdd pep;
    vector<int> aas;
    aas.clear();
    pep.set_peptide_aas(config, aas, 1);
    gap_pep.pep = pep;
    gap_pep.gap_mass = parent_mass;
    push_back(gap_pep);
  }
}

void GappedPeptide::extend(Config* config, GappedPeptide& gap_pep, int aa_ind) {
  vector<int> aas = gap_pep.pep.get_amino_acids();
  aas.push_back(aa_ind);
  pep.set_peptide_aas(config, aas, 1);
  gap_mass = gap_pep.gap_mass + gap_pep.pep.get_mass() - pep.get_mass();
}

void GappedPeptideList::extend() {
  GappedPeptideList final_gap_pep_list;
  GappedPeptide gap_pep;
  final_gap_pep_list.clear();
  for(int ind = 0; ind < size(); ind++) {
    for(int aa_ind = Ala; aa_ind <= Val; aa_ind++) {
      // if(aa_ind != Ile) {
        gap_pep.extend(config, (*this)[ind], aa_ind);
        final_gap_pep_list.push_back(gap_pep);
      //}
    }
  }
  clear();
  for(int ind = 0; ind < final_gap_pep_list.size(); ind++) {
    push_back(final_gap_pep_list[ind]);
  }
}


void GappedPeptideList::select_top(int num_top) {
  if(size()>num_top)
    resize(num_top);
}

void GappedPeptide::load_nlp() {
  nlp.seq.clear();
  nlp.seq = pep.get_amino_acid_masses();
  nlp.seq.push_back(gap_mass);
}

bool CompareGappedPeptideByScore(GappedPeptide gp_1, GappedPeptide gp_2) {
  return (gp_1.score > gp_2.score);
}

void GappedPeptideList::sort_score() {
  sort(begin(), end(), CompareGappedPeptideByScore);
}


void GappedPeptideList::print(string out_file) {
  ofstream out;
  out.open(out_file.c_str());
  for(int ind = 0; ind < size(); ind++) {
    out << "seq : " << (*this)[ind].pep.getPeptideStr();
    out << " gap mass : " << (*this)[ind].pep.get_mass();
    out << " score : " << (*this)[ind].score << endl;
  }
  out.close();
}


void GappedPeptideList::print() {
  for(int ind = 0; ind < size(); ind++) {
    cout << "seq : " << (*this)[ind].pep.getPeptideStr();
    cout << " gap mass : " << (*this)[ind].pep.get_mass();
    cout << " score : " << (*this)[ind].score << endl;
  }
}

void DenovoGappedModel::score(GappedPeptideList& gap_pep_list, AnnotatedSpectrumCounter& spectrum) {  
  for(int ind = 0; ind < gap_pep_list.size(); ind++) {
    gap_pep_list[ind].load_nlp();
    gap_pep_list[ind].score = peptide_spectrum_score(spectrum, gap_pep_list[ind].nlp, spectrum.getCharge(), acc_thresh, offset, structure, score_type);
  }
}

void DenovoGappedModel::reconstruct_denovo(GappedPeptideListList& gap_pep_list_list, vector<AnnotatedSpectrumCounter>& spectrum_vector) {  
  gap_pep_list_list.resize(spectrum_vector.size());
  for(int ind = 0; ind < spectrum_vector.size(); ind++)
    reconstruct_denovo(gap_pep_list_list[ind], spectrum_vector[ind]); 
}


void DenovoGappedModel::reconstruct_denovo(GappedPeptideList& gap_pep_list, AnnotatedSpectrumCounter& spectrum) {  
  gap_pep_list.set_parent_mass(spectrum.get_org_pm());
  gap_pep_list.set_config(config);
  gap_pep_list.generate(first_step);
  for(int ind = first_step+1; ind <= denovo_len; ind++) {
    gap_pep_list.extend();
    score(gap_pep_list, spectrum);
    gap_pep_list.sort_score();
    gap_pep_list.select_top(num_top);
  }
}

MultDenovoSeqModel::MultDenovoSeqModel() {
  scale_factor = 0.9995;
  html_flag = false;
  min_aa_mass = 57; 
  first_step = 2; 
  ms_level = 2; 
  ms_level_flag = false; 
  structure = 1; 
  parent_mass_flag = false; 
  top_select_num = 10;
  print_flag = false;
}

void NLP_list::write_to_html(string out_html, string html_prefix, string html_suffix) {
  ofstream out_file;
  out_file.open(out_html.c_str());
  out_file << html_prefix;
  out_file << "<table border=\"1\">" << endl;
  out_file << "<tr><td> rank <td> Amino acid sequence <td> score </td></tr>" << endl;
  for(int ind = 0; ind < size(); ind ++) {
    out_file << "<tr><td>";
    out_file << ind+1 << "<td>" << endl;
    out_file << (*this)[ind].seq[0];
    for(int seq_ind = 1; seq_ind < (*this)[ind].seq.size(); seq_ind++) {
      out_file << ", " << (*this)[ind].seq[seq_ind];
    }
    out_file << "<td>" << (*this)[ind].score;
    out_file << "</td></tr>" << endl;
  }
  out_file << "</table>" << endl;
  out_file << html_suffix;
  out_file.close();
}

void NLP_list::write_to_html(string out_html) {
  string prefix, suffix;
  prefix.clear();
  suffix.clear();
  write_to_html(out_html, prefix, suffix);
}

/*void Non_Linear_Peptide::operator= (const Non_Linear_Peptide& other) {
  structure = other.structure;
  score = other.score;
  seq = other.seq;
  seq_str = other.seq_str;
  //seq.clear();
  //for(int ind = 0; ind < other.seq.size(); ind++)
  //  seq.push_back(other.seq[ind]);
}*/

bool CompareNLPBySeq(const Non_Linear_Peptide& nlp_1, const Non_Linear_Peptide& nlp_2) {
  for(int ind = 0; ind < nlp_1.seq.size(); ind++) {
    if(nlp_1.seq[ind] > nlp_2.seq[ind])
      return true;
    if(nlp_1.seq[ind] < nlp_2.seq[ind])
      return false;
  }
  return false;
}

bool CompareNLPBySeqEq(const Non_Linear_Peptide& nlp_1, const Non_Linear_Peptide& nlp_2) {
  for(int ind = 0; ind < nlp_1.seq.size(); ind++) {
    if(nlp_1.seq[ind] > nlp_2.seq[ind])
      return false;
    if(nlp_1.seq[ind] < nlp_2.seq[ind])
      return false;
  }
  return true;
}

void reverse_nlp(Non_Linear_Peptide& nlp, Non_Linear_Peptide& nlp_rev) {
  nlp_rev = nlp;
  nlp_rev.seq.clear();
  for(int ind = nlp.seq.size()-1; ind>=0; ind--)
    nlp_rev.seq.push_back(nlp.seq[ind]);
}

void rotate_nlp(Non_Linear_Peptide& nlp, Non_Linear_Peptide& nlp_rot, int num) {
  nlp_rot = nlp;
  nlp_rot.seq.clear();
  for(int ind = num; ind<nlp.seq.size(); ind++)
    nlp_rot.seq.push_back(nlp.seq[ind]);
  for(int ind = 0; ind<num; ind++)
    nlp_rot.seq.push_back(nlp.seq[ind]);
}

void rotate_minimal_cyclic_nlp(Non_Linear_Peptide& nlp) {
  Non_Linear_Peptide nlp_rot;
  Non_Linear_Peptide max_nlp = nlp;
  for(int ind = 0; ind < nlp.seq.size(); ind++) {
    rotate_nlp(nlp, nlp_rot, ind);
    rotate_minimal_linear_nlp(nlp_rot);
    if(CompareNLPBySeq(nlp_rot, max_nlp))
      max_nlp = nlp_rot;
  }
  nlp = max_nlp;
}

void rotate_minimal_linear_nlp(Non_Linear_Peptide& nlp) {
  Non_Linear_Peptide nlp_rev;
  reverse_nlp(nlp, nlp_rev);
  if(CompareNLPBySeq(nlp_rev, nlp))
    nlp = nlp_rev;
}
    
void rotate_minimal_nlp(Non_Linear_Peptide& nlp, int structure) {
  if(structure == 1)
    rotate_minimal_cyclic_nlp(nlp);
  else
    rotate_minimal_linear_nlp(nlp);
}

void NLP_list::rotate_minimal(int structure) {
  for(int ind = 0; ind < size(); ind++){
    rotate_minimal_nlp((*this)[ind], structure);
  }
}

void NLP_list::unique() {
   NLP_list final_nlp_list;
   if(size()>0)
     final_nlp_list.push_back((*this)[0]);
   for(int ind = 1; ind < size(); ind++) {
       if(!CompareNLPBySeqEq((*this)[ind-1], (*this)[ind]))
          final_nlp_list.push_back((*this)[ind]);   
   }
   clear();
   for(int ind = 0; ind < final_nlp_list.size(); ind++)
     push_back(final_nlp_list[ind]);
}

void NLP_list::sort_unique(int structure) {
   rotate_minimal(structure);
   sort(begin(), end(), CompareNLPBySeq);
   unique();
}


void NLP_list::copy_aa(NLP_list& tmp_nlp_list, int last_aa) {
  Non_Linear_Peptide nlp;
  for(int ind = 0; ind < tmp_nlp_list.size(); ind++) {
    nlp = tmp_nlp_list[ind];
    nlp.seq.push_back(last_aa);
    push_back(nlp);
  }
}
void NLP_list::generate(int first_step, int parent_mass, int structure) {
  clear();
  if(parent_mass<0)
    return;
  if(first_step == 1) {
    Non_Linear_Peptide nlp;
    nlp.seq.clear();
    nlp.seq.push_back(parent_mass);
    nlp.structure = structure;
    push_back(nlp);
    return;
  }
  NLP_list tmp_nlp_list;
  for(int last_aa = min_aa_mass; last_aa <= parent_mass - min_aa_mass; last_aa++) {
    tmp_nlp_list.generate(first_step-1, parent_mass - last_aa, structure);
    copy_aa(tmp_nlp_list, last_aa);
  }
  // this only works correctly if first_step = 1
}

void Non_Linear_Peptide::print() {
    if (structure == 2) {
      cout << "lin first : " << seq_lin_first;
      cout << " lin seq : ";
      for(int seq_ind = 0; seq_ind < seq_lin.size(); seq_ind++)
        cout << seq_lin[seq_ind] << " ";
      cout << " cyc seq : ";
      for(int seq_ind = 0; seq_ind < seq_cyc.size(); seq_ind++)
        cout << seq_cyc[seq_ind] << " ";
    } else {
      cout << "seq : ";
      for( int seq_ind = 0; seq_ind < seq.size(); seq_ind++)
        cout << seq[seq_ind] << " ";
      }
    cout << "score : " << score << endl;
}

void Non_Linear_Peptide::print_standard() {
    cout << "seq : ";
    cout << seq_str << " ";
    cout << "score : " << score << endl;
}

void NLP_list::print_standard() {
  cout << "num peptide : " << size() << endl;
  for( int ind = 0; ind < size() ; ind++) {
    (*this)[ind].print_standard();
  }
}

void NLP_list::print_standard(int num) {
  cout << "num peptide : " << size() << endl;
  for( int ind = 0; ind < size() && ind < num; ind++) {
    (*this)[ind].print_standard();
  }
}

void NLP_list::print() {
  cout << "num peptide : " << size() << endl;
  for( int ind = 0; ind < size() ; ind++) {
    (*this)[ind].print();
  }
}

void NLP_list::print(int num) {
  cout << "num peptide : " << size() << endl;
  for( int ind = 0; ind < size() && ind < num; ind++) {
    (*this)[ind].print();
  }
}

void NLP_list::sort_score() {
   sort(begin(), end(), CompareNLPByScore);
}

void NLP_list::select_top(int top_select_num) {
   if(top_select_num < size())
     resize(top_select_num);
}

void add_breakage(Non_Linear_Peptide& nlp, int pos_ind, int break_pos, Non_Linear_Peptide& nlp_mod) {
  nlp_mod.seq.clear();
  for(int ind = 0; ind < pos_ind; ind++)
    nlp_mod.seq.push_back(nlp.seq[ind]);
  nlp_mod.seq.push_back(break_pos);
  nlp_mod.seq.push_back(nlp.seq[pos_ind] - break_pos);
  for(int ind = pos_ind+1; ind < nlp.seq.size(); ind++)
    nlp_mod.seq.push_back(nlp.seq[ind]);
}

void NLP_list::add_nlp_extension(Non_Linear_Peptide& nlp) {
  for(int ind = 0; ind < nlp.seq.size(); ind++) {
    for(int break_pos = min_aa_mass; break_pos <= nlp.seq[ind] - min_aa_mass; break_pos++) {
      Non_Linear_Peptide mod_nlp;
      add_breakage(nlp, ind, break_pos, mod_nlp);
      push_back(mod_nlp);
    }
  }
}

void NLP_list::extend() {
  NLP_list final_nlp_list;
  final_nlp_list.clear();
  for(int ind = 0; ind < size(); ind++)
    final_nlp_list.add_nlp_extension((*this)[ind]);
  clear();
  for(int ind = 0; ind < final_nlp_list.size(); ind++)
    push_back(final_nlp_list[ind]);
}


void MultDenovoSeqModel::denovo_sequence(AnnotatedSpectrumCounterList& spectrum_vector, NLP_list& top_list) {
  mult_tree_root.set_integer();
  mult_tree_root.initialize_from_spectra(spectrum_vector);
  cout << "Reading " << mult_tree_root.num_ms_level[mult_tree_root.root_ms_level] << " MS" << mult_tree_root.root_ms_level << " spectra";
  for(int level_ind = mult_tree_root.root_ms_level+1; level_ind < mult_tree_root.level_num; level_ind++) {
    if(mult_tree_root.num_ms_level[level_ind]>0)
      cout << ", " << mult_tree_root.num_ms_level[level_ind] << " MS" << level_ind << " spectra";
  }
  cout << endl;
   
  if(!ms_level_flag)
    ms_level = mult_tree_root.max_ms_level;
  if(!parent_mass_flag) {
    parent_mass = mult_tree_root.get_mass();
    parent_mass_real_no_charge = mult_tree_root.get_mass_real() - MASS_PROTON;
  } else {
    mult_tree_root.set_mass(parent_mass_real_no_charge+MASS_PROTON);
  }
  // cout << "parent mass: " << parent_mass << endl;
  NLP_list nlp_list;
  nlp_list.set_min_aa_mass(min_aa_mass);
  cout << "reconstructing " << first_step << " tags ... " << endl;
  nlp_list.generate(first_step, parent_mass, structure);
  nlp_list.sort_unique(structure);
  int_unique_score(nlp_list);
  nlp_list.sort_score();
  nlp_list.select_top(top_select_num);
  if(print_flag) {
    nlp_list.print(10);
  }
  for(int ind = first_step; ind < length; ind++) {
    cout << "reconstructing " << ind+1 << " tags ... " << endl;
    nlp_list.extend();
    nlp_list.sort_unique(structure);
    int_unique_score(nlp_list);
    nlp_list.sort_score();
    nlp_list.select_top(top_select_num);
    if(print_flag) {
      nlp_list.print(10);
    }
  }
  top_list = nlp_list;
  if(html_flag)
    top_list.write_to_html(out_html);
}


void MultDenovoSeqModel::int_unique_score(NLP_list& nlp_list) {
  for(int ind = 0; ind < nlp_list.size(); ind++)
    int_unique_score(nlp_list[ind]);
}

void MultDenovoSeqModel::int_unique_score(Non_Linear_Peptide& nlp) {
  mult_tree_scorer.set_integer_score();
  TheoricMultistageTreeNode tmtn;
  nlp.set_integer();
  tmtn.initialize_root_from_NLP(nlp, ms_level, structure);
  nlp.score = mult_tree_scorer.int_unique_score(tmtn, mult_tree_root);
}


float TheoricMultistageTreeNode::get_mass() const {
    if(is_integer) {
      // cout << "here wrong " << endl;
      return get_mass_int();
    }
    else {
      // cout << "here correct " << endl;
      return get_mass_real();
      }
}

float MultistageTreeNode::get_mass() const {
    if(is_integer) {
      return get_mass_int();
    }
    else {
      return get_mass_real();
      }
}

void TheoricMultistageTreeNode::set_mass(float m) {
    if(is_integer) {
      // cout << "here wrong " << endl;
      set_mass_int(m);
    }
    else {
      // set_mass_int(convert_mass_to_int(m));
      // cout << "here correct " << endl; 
      set_mass_real(m);
      }
}

void MultistageTreeNode::set_mass(float m) {
      set_mass_int(convert_mass_to_int(m));
      set_mass_real(m);
}

MultistageTreeScorer::MultistageTreeScorer() {
    score_cof.clear();
    score_cof.push_back(0);
    score_cof.push_back(0);
    score_cof.push_back(1);
    score_cof.push_back(1);
    score_cof.push_back(1);
    score_cof.push_back(1);
    print_flag = false;
    thresh = 0.5;
    score_type = 1;
}


float MultistageTreeScorer::score(TheoricMultistageTreeNode& tmtn, MultistageTreeNode& mtn) {
  if(score_type == 0)
    return real_unique_score(tmtn, mtn);
  else
    return int_unique_score(tmtn, mtn);
}

float MultistageTreeScorer::real_unique_score(TheoricMultistageTreeNode& tmtn, MultistageTreeNode& mtn) {
  // cout << "thresh : " << thresh << endl;
  float scr;

  // cout << "tmtn : " << tmtn.get_mass() + MASS_PROTON << endl;
  // cout << "mtn : " << mtn.get_mass() << endl;
  if(abs(tmtn.get_mass() + MASS_PROTON - mtn.get_mass()) > thresh) {
    return 0;
  } else {
    scr = score_cof[tmtn.get_ms_level()];
  }
  int tmtn_ind = 0;
  for(int mtn_ind = 0; mtn_ind <mtn.children.size(); mtn_ind++) {
    float max_scr = 0;
    // cout << "tmtn : " << tmtn.children[tmtn_ind].get_mass_real() << endl;
    // cout << "mtn : " << mtn.children[mtn_ind].get_mass_real() << endl;
    while(tmtn_ind<tmtn.children.size() && tmtn.children[tmtn_ind].get_mass_real() + MASS_PROTON < mtn.children[mtn_ind].get_mass_real() - thresh)
      tmtn_ind++;
    while(tmtn_ind<tmtn.children.size() && tmtn.children[tmtn_ind].get_mass_real() + MASS_PROTON < mtn.children[mtn_ind].get_mass_real() + thresh) {
      float temp_scr = real_unique_score(tmtn.children[tmtn_ind], mtn.children[mtn_ind] );
      if(temp_scr>max_scr)
        max_scr = temp_scr;
      tmtn_ind++;
    }
    scr += max_scr;
  }
  return scr;
}

float MultistageTreeScorer::int_unique_score(TheoricMultistageTreeNode& tmtn, MultistageTreeNode& mtn) {
  float scr;
  if(tmtn.get_mass_int()!=mtn.get_mass_int())
    return 0;
  else {
    scr = score_cof[tmtn.get_ms_level()];
    if(print_flag)
      cout << tmtn.get_ms_level() << " " << tmtn.get_mass_int() << " " <<  mtn.get_mass_int() << endl;
  }
  int tmtn_ind = 0;
  for(int mtn_ind = 0; mtn_ind <mtn.children.size(); mtn_ind++) {
    float max_scr = 0;
    while(tmtn_ind<tmtn.children.size() && tmtn.children[tmtn_ind].get_mass_int() < mtn.children[mtn_ind].get_mass_int())
      tmtn_ind++;
    while(tmtn_ind<tmtn.children.size() && tmtn.children[tmtn_ind].get_mass_int() == mtn.children[mtn_ind].get_mass_int()) {
      float temp_scr = int_unique_score(tmtn.children[tmtn_ind], mtn.children[mtn_ind] );
      if(temp_scr>max_scr)
        max_scr = temp_scr;
      tmtn_ind++;
    }
    scr += max_scr;
  }
  return scr;
}



void MultistageTreeNode::print(int depth) {
  if(get_ms_level() <= depth) {
    cout  << get_ms_level() << " "  << children.size() << " " << get_mass() << endl;
    for(int ind = 0; ind < children.size(); ind++) {
      children[ind].print(depth);
    }
  }
}

void MultistageTreeNode::print() {
  cout  << get_ms_level() << " "  << children.size() << " " << get_mass() << endl;
  for(int ind = 0; ind < children.size(); ind++) {
    children[ind].print();
  }
}

void TheoricMultistageTreeNode::print(int depth) {
  if(get_ms_level() <= depth) {
    cout << get_ms_level() << " "  << children.size() << " " << get_mass() << endl;
    for(int ind = 0; ind < children.size(); ind++) {
      children[ind].print(depth);
    }
  }
}

void TheoricMultistageTreeNode::print() {
  cout << get_ms_level() << " "  << children.size() << " " << get_mass() << endl;
  for(int ind = 0; ind < children.size(); ind++) {
    children[ind].print();
  }
}



void Non_Linear_Peptide::scale_by(float r) {
    for(int ind = 0; ind < seq.size(); ind++) {
	seq[ind] = seq[ind]*r;
    }
}

void Non_Linear_Peptide::calculate_theoric_cyclic_spectra() {
    
    int n = seq.size();
    float mat_val[n][n-1];
    float temp_val;
    
    vector<float> seq_rep;
    seq_rep.resize(2*n);
    for(int ind = 0; ind<n; ind++) {
	    seq_rep[ind] = seq[ind];
    	seq_rep[ind+n] = seq[ind];
    }
  
    float parent_mass = 0.f;
    for(int i = 0; i < seq.size(); i++){
        parent_mass += seq[i];
    }

    TheoricSpecNode tsn;
    cyclic_spectra.clear();

    for(int charge_number = 1; charge_number <= max_charge; charge_number++){
        for(int pos_1 = 0; pos_1<n; pos_1++) {
      	    temp_val = seq_rep[pos_1];
       	    mat_val[pos_1][0] = temp_val;
	          for(int offset_ind = 0; offset_ind < offset.val.size(); offset_ind++) {
        		    tsn.mass = (temp_val+MASS_PROTON*charge_number+offset.val[offset_ind])/charge_number;
                tsn.start_ind = pos_1;
                tsn.length = 0;
                tsn.charge = charge_number;
                tsn.offset_ind = offset_ind;
                cyclic_spectra.push_back(tsn);
            }
	      }

        for(int len_seq = 1; len_seq<n-1; len_seq++){
	          for(int pos_1 = 0; pos_1<n; pos_1++){
	              temp_val = mat_val[pos_1][len_seq-1] + seq_rep[pos_1+len_seq];
        		    mat_val[pos_1][len_seq] = temp_val;	
    	          for(int offset_ind = 0; offset_ind < offset.val.size(); offset_ind++) {
                		tsn.mass = (temp_val+MASS_PROTON*charge_number+offset.val[offset_ind])/charge_number;
                		tsn.start_ind = pos_1;
                    tsn.length = len_seq;
                    tsn.charge = charge_number;
                    tsn.offset_ind = offset_ind;
                    tsn.structure = 1;
                    cyclic_spectra.push_back(tsn);
                }
		        }
      	}
    }
}


void Non_Linear_Peptide::calculate_theoric_linear_spectra()  {

    int n = seq.size();
    
    float parent_mass = 0.f;
    // cout << seq.size() << endl << flush;
    for(int i = 0; i < seq.size(); i++){
        parent_mass += seq[i];
    }
    
    TheoricSpecNode tsn;
    linear_spectra.clear();
    
    // cout << "seq : ";
    // for(int ind = 0; ind < seq.size(); ind++)
    //    cout << seq[ind] << " ";
    // cout << endl;

    for(int charge_number= 1; charge_number <= max_charge; charge_number++){
        float v = 0.f;
	      float w;
        for(int pos = 0 ; pos < n-1; pos++){
            v += seq[pos];
	          for(int offset_ind = 0; offset_ind < offset.val.size(); offset_ind++) {
        		    tsn.mass = (v+MASS_PROTON*charge_number+offset.val[offset_ind])/charge_number;
                tsn.length = pos;
                tsn.direction = 0;
                tsn.structure = 0;
                tsn.charge = charge_number;
                tsn.offset_ind = offset_ind;
                linear_spectra.push_back(tsn);
                // cout << tsn.mass << " " << tsn.direction << " " << tsn.length << endl;
            }
            w = parent_mass - v;
	          for(int offset_ind = 0; offset_ind < offset.val.size(); offset_ind++) {
        		    tsn.mass = (w+MASS_PROTON*charge_number+offset.val[offset_ind])/charge_number;
                tsn.length = pos;
                tsn.direction = 1;
                tsn.structure = 0;
                tsn.charge = charge_number;
                tsn.offset_ind = offset_ind;
                linear_spectra.push_back(tsn);
                // cout << tsn.mass << " " << tsn.direction << " " << tsn.length << endl;
            }
        }
    }
}

void Non_Linear_Peptide::get_nlp_fragment(TheoricSpecNode tsn, Non_Linear_Peptide& nlp_frag) {
    nlp_frag.seq.clear();
    nlp_frag.is_integer = is_integer;
    if(tsn.structure == 0) {
        if(tsn.direction == 0) {
            for(int ind = 0; ind <= tsn.length; ind++)
                nlp_frag.seq.push_back(seq[ind]);
        } else {
            for(int ind = tsn.length+1; ind < seq.size(); ind++)
                nlp_frag.seq.push_back(seq[ind]);
        }
        // cout <<  tsn.length << " " << tsn.direction << " " << nlp_frag.seq.size() << endl;
    } 
    else {
        vector<float> seq_rep;
        seq_rep.resize(2*seq.size());
        for(int ind = 0; ind<seq.size(); ind++) {
      	    seq_rep[ind] = seq[ind];
    	      seq_rep[ind+seq.size()] = seq[ind];
        }
        for(int ind = tsn.start_ind; ind <= tsn.start_ind + tsn.length; ind++)
            nlp_frag.seq.push_back(seq_rep[ind]);
    }
}

bool CompareTMTNByMass(const TheoricMultistageTreeNode tmtn_1, const TheoricMultistageTreeNode tmtn_2) {
  return tmtn_1.get_mass()<tmtn_2.get_mass();
}

bool CompareMTNByMass(const MultistageTreeNode mtn_1,const  MultistageTreeNode mtn_2) {
  return mtn_1.get_mass()<mtn_2.get_mass();
}

void MultistageTreeNode::sort_children() {
  sort(children.begin(), children.end(), CompareMTNByMass);
}

void TheoricMultistageTreeNode::sort_children() {
  sort(children.begin(), children.end(), CompareTMTNByMass);
}

void TheoricMultistageTreeNode::initialize_root_from_NLP(Non_Linear_Peptide& nlp, int depth, int structure) { 
  if(structure == 1)
    initialize_root_from_cyclic_NLP(nlp, depth);
  else 
    initialize_root_from_linear_NLP(nlp, depth);
}
void TheoricMultistageTreeNode::initialize_root_from_cyclic_NLP(Non_Linear_Peptide& nlp, int depth) {
    // cout << "is integer : " << nlp.is_integer << endl;
    is_integer = nlp.is_integer;
    set_ms_level(root_ms_level);
    // cout << get_ms_level() << endl;
    set_root();
    // set_nlp(nlp);
    set_mass(nlp.get_mass());
    if(depth>=get_ms_level()) {
      nlp.calculate_theoric_cyclic_spectra();
      // cout << nlp.cyclic_spectra.size() << endl;
      for(int ind = 0; ind < nlp.cyclic_spectra.size(); ind++) {
        Non_Linear_Peptide nlp_lin;
        nlp.get_nlp_fragment(nlp.cyclic_spectra[ind], nlp_lin);
        TheoricMultistageTreeNode tmtn;
        tmtn.initialize_from_linear_NLP(nlp_lin, nlp.cyclic_spectra[ind], get_ms_level(), depth);
        children.push_back(tmtn);
      }
    }
    sort_children();
}

void TheoricMultistageTreeNode::initialize_root_from_linear_NLP(Non_Linear_Peptide& nlp, int depth) {
    // cout << "is integer : " << nlp.is_integer << endl;
    is_integer = nlp.is_integer;
    set_ms_level(root_ms_level);
    // cout << get_ms_level() << endl;
    set_root();
    // set_nlp(nlp);  
    set_mass(nlp.get_mass());
    if(depth>=get_ms_level()) {
      nlp.calculate_theoric_linear_spectra();
      for(int ind = 0; ind < nlp.linear_spectra.size(); ind++) {
        Non_Linear_Peptide nlp_lin;
        nlp.get_nlp_fragment(nlp.linear_spectra[ind], nlp_lin);
        TheoricMultistageTreeNode tmtn;
        tmtn.initialize_from_linear_NLP(nlp_lin, nlp.linear_spectra[ind], get_ms_level(), depth);
        children.push_back(tmtn);
      }
    }
    sort_children();
}

void TheoricMultistageTreeNode::initialize_from_linear_NLP(Non_Linear_Peptide& nlp, TheoricSpecNode tsn, int current_depth, int depth) {
    // cout << "is integer : " << nlp.is_integer << endl;
    is_integer = nlp.is_integer;
    // cout << is_integer << endl;
    set_ms_level(current_depth+1);
    // cout << get_ms_level() << endl;
    // set_nlp(nlp);
    //cout << "seq 0 : " << nlp.seq[0] << endl;
    //cout << "mass : " << nlp.get_mass() << endl;
    //cout << "is integer : " << nlp.is_integer << endl;
    set_mass(nlp.get_mass());
    if(depth>=get_ms_level()) {
      nlp.calculate_theoric_linear_spectra();
      // cout << nlp.linear_spectra.size() << endl;
      for(int ind = 0; ind < nlp.linear_spectra.size(); ind++) {
        Non_Linear_Peptide nlp_lin;
        nlp.get_nlp_fragment(nlp.linear_spectra[ind], nlp_lin);
        TheoricMultistageTreeNode tmtn;
        tmtn.initialize_from_linear_NLP(nlp_lin, nlp.linear_spectra[ind], get_ms_level(), depth);
        children.push_back(tmtn);
      }
    }
    sort_children();
}

TheoricMultistageTreeNode::TheoricMultistageTreeNode() {
    root_ms_level = 2;
    is_root = false;
    scale_factor = 0.9995;
    children.clear();
}

int TheoricMultistageTreeNode::convert_mass_to_int(float mass) {
    cout << "You are not supposed to use this !" << endl;
    return round((mass-MASS_PROTON)*scale_factor);
}


MultistageTreeNode::MultistageTreeNode() {
    root_ms_level = 2;
    is_root = false;
    is_leaf = false;
    scale_factor = 0.9995;
    is_integer = false;
    max_ms_level = 0;
    level_num = 100;
    num_ms_level.resize(level_num);
    for(int ind = 0; ind<level_num; ind++)
      num_ms_level[ind] = 0;
}

void MultistageTreeNode::initialize_from_spectra(AnnotatedSpectrumCounterList& spectrum_vector) {
    int spec_ind = 0;
    while(spec_ind<spectrum_vector.size() && spectrum_vector[spec_ind].getMsLevel() < root_ms_level) {
      spec_ind++;
    }
    if(spec_ind == spectrum_vector.size())
      return;
    set_root();
    root = this;
    spec_ind = initialize_from_spectra(spectrum_vector, spec_ind, root_ms_level);
}

int MultistageTreeNode::initialize_from_spectra(AnnotatedSpectrumCounterList& spectrum_vector, int spec_ind, int ms_level) {
    int original_spec_ind = spec_ind;
    set_spec_index(original_spec_ind);
    set_ms_level(ms_level);
    if(ms_level < (root->level_num))
      (root->num_ms_level)[ms_level]++;
    set_mass(spectrum_vector[spec_ind].get_m_over_z());
    spec_ind++;
    if(spec_ind == spectrum_vector.size()) {
      set_num_children(0);
      return spec_ind;
    }
    int current_ms_level = spectrum_vector[spec_ind].getMsLevel();
    int num_child = 0;
    while(spec_ind<spectrum_vector.size() && spectrum_vector[spec_ind].getMsLevel() > ms_level) {
      current_ms_level = spectrum_vector[spec_ind].getMsLevel();
      num_child++;
      MultistageTreeNode child_node;
      child_node.root = root;
      spec_ind = child_node.initialize_from_spectra(spectrum_vector, spec_ind, current_ms_level);
      child_node.is_integer = is_integer;
      children.push_back(child_node);
      if(child_node.max_ms_level>max_ms_level)
        max_ms_level = child_node.max_ms_level;
      // if(spec_ind==spectrum_vector.size()) {
      //  set_num_children(num_child);
      //  sort_children();
      //  return spec_ind;
      // }
    }
    // cout << original_spec_ind << " " << ms_level << " " << num_child << endl;
    if(num_child == 0) {
      max_ms_level = spectrum_vector[original_spec_ind].getMsLevel();
      num_child = spectrum_vector[original_spec_ind].getNumPeaks();
      for(int ind = 0; ind < spectrum_vector[original_spec_ind].getNumPeaks(); ind++) {
        MultistageTreeNode child_node;
        child_node.root = root;
        child_node.initialize_from_mass(spectrum_vector, original_spec_ind, ind);
        children.push_back(child_node);
      }
    }
    set_num_children(num_child);
    sort_children();
    return spec_ind;
}

int MultistageTreeNode::initialize_from_mass(AnnotatedSpectrumCounterList& spectrum_vector, int spec_index, int index) {
    set_leaf();
    set_parent_spec_index(spec_index);
    set_ms_level(spectrum_vector[spec_index].getMsLevel()+1);
    set_mass(spectrum_vector[spec_index].getPeakMass(index));
    set_num_children(0);
}

int MultistageTreeNode::convert_mass_to_int(float mass) {
    return round((mass-MASS_PROTON)*scale_factor);
}

void add_spec_pair_msclust(string path_clust, SpecPairEqVec spec_pair_vec_msclust) {

  ifstream in_file;
  in_file.open(path_clust.c_str()); 
  in_file.close();
}

bool CompareStr(string s_1, string s_2) {
	return strcmp(s_1.c_str(), s_2.c_str())<0;
}

bool CompareStrEq(string s_1, string s_2) {
	return strcmp(s_1.c_str(), s_2.c_str())==0;
}


bool CompareSingleResInspectByFile(SingleResInspect sri_1, SingleResInspect sri_2) {
  if(!CompareStrEq(sri_1.spectrum_file, sri_2.spectrum_file))
    return CompareStr(sri_1.spectrum_file, sri_2.spectrum_file);
  else
    return sri_1.scan_index < sri_2.scan_index;
}


bool is_equal(string s_1, string s_2) {
  if(s_1.size() != s_2.size())
      return false;
	for(int ind = 0; ind<s_1.size(); ind++) {
		if(s_1[ind] != s_2[ind])
			return false;
	}
	return true;
}

bool is_prefix(string s_1, string s_2) {
	int len = s_1.size();
	if(s_2.size()<len)
		len = s_2.size();
	for(int ind = 0; ind<len; ind++) {
		if(s_1[ind] != s_2[ind])
			return false;
	}
	return true;
}

bool is_suffix(string s_1, string s_2) {
	return is_prefix(rev_str(s_1), rev_str(s_2));	
}

void print_scan_ind(vector< vector <int> > scan_ind, vector<string> spectrum_files, string scan_ind_file) {
  ofstream out_file;
  out_file.open(scan_ind_file.c_str());
  out_file << "Number of files : "  << scan_ind.size() << endl;
  for(int file_ind = 0; file_ind < scan_ind.size(); file_ind++) {
    out_file << "file name : " << spectrum_files[file_ind] << endl;
    out_file << "num spectra : " << scan_ind[file_ind].size() << endl;
    for(int ind = 0; ind < scan_ind[file_ind].size(); ind++)
      out_file << scan_ind[file_ind][ind] << endl;
  }
  out_file.close();
}
void read_scan_ind(ResInspect res_inspect, vector< vector <int> >& scan_ind, vector< vector <string> >& seqs, vector< vector <int> >& charge, vector<string>& spectrum_files) {
  vector<int> spec_start_index;
  string prev_spectrum_file = "";
  spectrum_files.clear();
  sort(res_inspect.top_res.begin(), res_inspect.top_res.end(), CompareSingleResInspectByFile);

  for(int ind = 0; ind<res_inspect.top_res.size(); ind++) {
    if(!CompareStrEq(res_inspect.top_res[ind].spectrum_file, prev_spectrum_file)) {
      prev_spectrum_file = res_inspect.top_res[ind].spectrum_file;
      spectrum_files.push_back(prev_spectrum_file);
      spec_start_index.push_back(ind);
    }
  }
  spec_start_index.push_back(res_inspect.top_res.size());
  scan_ind.resize(spectrum_files.size());
  seqs.resize(spectrum_files.size());
  charge.resize(spectrum_files.size());
  for(int file_ind = 0; file_ind < spectrum_files.size(); file_ind++) {
    scan_ind[file_ind].clear();
    seqs[file_ind].clear();
    charge[file_ind].clear();
    for(int ind = spec_start_index[file_ind]; ind < spec_start_index[file_ind+1]; ind++) {
      scan_ind[file_ind].push_back(res_inspect.top_res[ind].scan_index);
      seqs[file_ind].push_back(res_inspect.top_res[ind].peptide);
      charge[file_ind].push_back(res_inspect.top_res[ind].charge);
    }
  }
}


void SpecPairEqVec::print(string spec_pair_file) {
  ofstream out_file;
  out_file.open(spec_pair_file.c_str());
  out_file << "size : " << size() << endl;
  for(int ind = 0; ind < size(); ind++) {
      out_file << (*this)[ind].scan_ind_first << " " << (*this)[ind].scan_ind_second << endl;
  }
  out_file.close();
}

void calculate_equal_spec_pairs(vector<IndexedSequence> seq_vec, SpecPairEqVec& spec_pair_vec) { 
    sort(seq_vec.begin(), seq_vec.end(), CompareIndexStrByPrefix);
    spec_pair_vec.clear();
    SpecPairEq temp_spec_pair;
    for(int ind = 0; ind < seq_vec.size()-1; ind++) {
        if(is_equal(seq_vec[ind].seq, seq_vec[ind+1].seq)) {
            temp_spec_pair.scan_ind_first = seq_vec[ind].index;
            temp_spec_pair.scan_ind_second = seq_vec[ind+1].index;
            spec_pair_vec.push_back(temp_spec_pair);
        }
    }
}

void read_sequence(ResInspect res_inspect, vector<IndexedSequence>& seq_vec) {
	seq_vec.resize(res_inspect.top_res.size());
	for(int ind = 0; ind < res_inspect.top_res.size(); ind++) {
		seq_vec[ind].seq = res_inspect.top_res[ind].peptide;
		seq_vec[ind].index = res_inspect.top_res[ind].scan_index;
	}
}

void read_sequence(vector<AnnotatedSpectrumCounter>& spectrum_vector, vector<IndexedSequence>& seq_vec) {
	seq_vec.resize(spectrum_vector.size());
	for(int ind = 0; ind < spectrum_vector.size(); ind++) {
		seq_vec[ind].seq = spectrum_vector[ind].getPeptide().getPeptideStr();
		seq_vec[ind].index = ind;
	}
}

string rev_str(string s) {
	string rev_s;
	for(int ind = s.size()-1; ind>0; ind --)
		rev_s.push_back(s[ind]);
	return rev_s;
}

bool CompareStrByPrefix(string s_1, string s_2) {
	return CompareStr(s_1, s_2);
}

bool CompareIndexStrByPrefix(IndexedSequence s_1, IndexedSequence s_2) {
	if(strcmp(s_1.seq.c_str(), s_2.seq.c_str())!=0)
  	return strcmp(s_1.seq.c_str(), s_2.seq.c_str())<0;
  else
    return s_1.index<s_2.index;

}

bool CompareStrBySuffix(string s_1, string s_2) {
	string s_1_rev = rev_str(s_1);
	string s_2_rev = rev_str(s_2);
	return CompareStr(s_1_rev, s_2_rev);
}

bool CompareIndexStrBySuffix(IndexedSequence s_1, IndexedSequence s_2) {
	string s_1_rev = rev_str(s_1.seq);
	string s_2_rev = rev_str(s_2.seq);
	if(strcmp(s_1_rev.c_str(), s_2_rev.c_str())!=0)
  	return strcmp(s_1_rev.c_str(), s_2_rev.c_str())<0;
  else
    return s_1.index<s_2.index;

}


void ResInspect::print_stat(int phase){
	if(phase == 0) {
		cout << "total db hits : " << stat.num_db_hits << endl;
		cout << "total decoy hits : " << stat.num_decoy_hits << endl;
		cout << "fdr : " << stat.fdr << endl;
		int top_1 = stat.num_rank[0];
		int top_1_decoy = stat.num_rank_decoy[0];
		int top_10 = 0;
		int top_10_decoy = 0;
		int top_100 = 0;
		int top_100_decoy = 0;
		for(int ind = 1; ind<10; ind++) {
			top_10 = top_10 + stat.num_rank[ind];
			top_10_decoy = top_10_decoy + stat.num_rank_decoy[ind];
		}
		for(int ind = 10; ind<100; ind++) {
			top_100 = top_100 + stat.num_rank[ind];
			top_100_decoy = top_100_decoy + stat.num_rank_decoy[ind];
		}
		cout << "top 1 : " << top_1 << " decoy : " << top_1_decoy << endl;
		cout << "top 10 : " << top_10 << " decoy : " << top_10_decoy << endl;
		cout << "top 100 : " << top_100 << " decoy : " << top_100_decoy << endl;
	}
}

void print_share_stat_recovery(ResInspect& res_inspect_1, ResInspect& res_inspect_2) {
	
	int top_vec_1_rec = 0;
	int top_vec_1_non_rec = 0;
	int top_vec_2_rec = 0 ;
	int top_vec_2_non_rec = 0;

	for(int ind = 0; ind<res_inspect_1.rank_correct.size(); ind++) { 
		if(res_inspect_1.rank_correct[ind] == 0 && res_inspect_2.rank_correct[ind] != 0) {
			if(res_inspect_1.top_res[ind].FScore>res_inspect_2.top_res[ind].FScore) {
				top_vec_1_rec++;
			} else {
				top_vec_1_non_rec++;
			}
		}	
		
		if(res_inspect_1.rank_correct[ind] != 0 && res_inspect_2.rank_correct[ind] == 0) {
			if(res_inspect_1.top_res[ind].FScore<res_inspect_2.top_res[ind].FScore) {
				top_vec_2_rec++;
			} else {
				top_vec_2_non_rec++;
			}
		}
	}

	cout << "Top 1 recovered : " << top_vec_1_rec << " not recovered : " << top_vec_1_non_rec << " total : " << top_vec_1_rec + top_vec_1_non_rec << endl;
	cout << "Top 2 recovered : " << top_vec_2_rec << " not recovered : " << top_vec_2_non_rec << " total : " << top_vec_2_rec + top_vec_2_non_rec << endl;
}


void print_share_stat_decoy(ResInspect& res_inspect_1, ResInspect& res_inspect_2) {
	
	vector < vector<int> > top_vec;
	int num = 5;
	top_vec.resize(num);
	for(int ind_1 = 0; ind_1 < num; ind_1++) {
		top_vec[ind_1].resize(num);
		for(int ind_2 = 0; ind_2 < num; ind_2++) {
			top_vec[ind_1][ind_2] = 0;
		}
	}

	int ind_1, ind_2;

	for(int ind = 0; ind<res_inspect_1.rank_correct.size(); ind++) { 
		if(res_inspect_1.rank_correct[ind] == 0) {
			ind_1 = 0;} 
		else if(res_inspect_1.rank_correct[ind] == -1 & res_inspect_1.is_top_decoy[ind] == false) {
			ind_1 = 3;}
		else if(res_inspect_1.rank_correct[ind] == -1 & res_inspect_1.is_top_decoy[ind] == true) {
			ind_1 = 4;}
		else if(res_inspect_1.is_top_decoy[ind] == false) {
			ind_1 = 1;}
		else {ind_1 = 2;}

		if(res_inspect_2.rank_correct[ind] == 0) {
			ind_2 = 0;} 
		else if(res_inspect_2.rank_correct[ind] == -1 & res_inspect_2.is_top_decoy[ind] == false) {
			ind_2 = 3;}
		else if(res_inspect_2.rank_correct[ind] == -1 & res_inspect_2.is_top_decoy[ind] == true) {
			ind_2 = 4;}
		else if(res_inspect_2.is_top_decoy[ind] == false) {
			ind_2 = 1;}
		else {ind_2 = 2;}

		top_vec[ind_1][ind_2]++;
	}
	
	for(int ind_1 = 0; ind_1 < num; ind_1++) {
		for(int ind_2 = 0; ind_2 < num; ind_2++) {
			cout << top_vec[ind_1][ind_2] << " ";
		}
		cout << endl;
	}
}

void print_share_stat_no_decoy(ResInspect& res_inspect_1, ResInspect& res_inspect_2) {
	
	vector < vector<int> > top_vec;
	int num = 3;
	top_vec.resize(num);
	for(int ind_1 = 0; ind_1 < num; ind_1++) {
		top_vec[ind_1].resize(num);
		for(int ind_2 = 0; ind_2 < num; ind_2++) {
			top_vec[ind_1][ind_2] = 0;
		}
	}

	int ind_1, ind_2;

	for(int ind = 0; ind<res_inspect_1.rank_correct.size(); ind++) { 
		if(res_inspect_1.rank_correct[ind] == 0) {
			ind_1 = 0;} 
		else if(res_inspect_1.rank_correct[ind] == -1) {
			ind_1 = 2;}
		else { ind_1 = 1; } 
		if(res_inspect_2.rank_correct[ind] == 0) {
			ind_2 = 0;} 
		else if(res_inspect_2.rank_correct[ind] == -1) {
			ind_2 = 2;}
		else { ind_2 = 1; } 
		top_vec[ind_1][ind_2]++;
	}
	
	for(int ind_1 = 0; ind_1 < num; ind_1++) {
		for(int ind_2 = 0; ind_2 < num; ind_2++) {
			cout << top_vec[ind_1][ind_2] << " ";
		}
		cout << endl;
	}
}

void print_share_stat(ResInspect& res_inspect_1, ResInspect& res_inspect_2, int decoy_flag) {
	if(decoy_flag == 0)
		print_share_stat_no_decoy(res_inspect_1, res_inspect_2);
	else if(decoy_flag == 1)
		print_share_stat_decoy(res_inspect_1, res_inspect_2);
	else
		print_share_stat_recovery(res_inspect_1, res_inspect_2);
}

void SingleResInspect::print() {
	cout << "spectrum file : " << spectrum_file << endl;
	cout << "scan index : " << scan_index << endl;
	cout << "rank : " << rank << endl;
	cout << "peptide : " << peptide << endl;
	cout << "prefix : " << prefix << endl;
	cout << "suffix : " << suffix << endl;
	cout << "protein : " << protein_name << endl;
	cout << "is decoy : " << is_decoy << endl;
	cout << "charge : " << charge << endl;
	cout << "MQScore : " << MQScore << endl;
	cout << "length : " << length << endl;
	cout << "total PRM Score : " << totalPRMScore << endl;
	cout << "median PRM Score : " <<  MedianPRMScore << endl;
	cout << "Fraction Y : " <<  FractionY << endl;
	cout << "Fraction B : " <<  FractionB << endl;
	cout << "Intensity : " <<  Intensity << endl;
	cout << "NTT : "  << NTT << endl;
	cout << "P-Value : " <<  Pval << endl;
	cout << "F-Score : " <<  FScore << endl;
        cout << "Delta Score : " <<  DeltaScore << endl;
	cout << "Delta Score Other : " << DeltaScoreOther << endl;
	cout << "Record Number : " <<  RecordNumber << endl;
	cout << "DB File pos : " <<  DBFilePos << endl;
	cout << "Spec File pos : " <<  SpecFilePos << endl;
	cout << "Precursor Mz : " <<  PrecursorMZ << endl;
	cout << "Precursor Mz Error : " <<  PrecursorMZError << endl;
}

void ResInspectModel::read_inspect_res(string res_inspect_file, ResInspect& res_inspect) {
  ifstream in_file;
  in_file.open(res_inspect_file.c_str());
	int max_len = 1000;
	char line_str[max_len];
	in_file.getline(line_str, max_len, '\n');
	// cout << line_str << endl;

	int ind, scan_ind, old_scan_ind;
	old_scan_ind = -1;
  int num_proc = -1;
	while(in_file.getline(line_str, max_len, '\t')) {
			ind++;
			SingleResInspect single_res_inspect;
      // cout << line_str << endl;
			single_res_inspect.spectrum_file = line_str;
			// cout << single_res_inspect.spectrum_file << endl;
			in_file.getline(line_str, max_len, '\t');
			scan_ind = atoi(line_str);
      // cout << "scan index: " << scan_ind << endl;
			if(scan_ind != old_scan_ind) {
				ind = 0;
        num_proc++;
			}
			old_scan_ind = scan_ind;
			single_res_inspect.scan_index = scan_ind;
			in_file.getline(line_str, max_len, '\t');
			string complete_peptide = line_str;
			int len_com_pep = complete_peptide.size();
			single_res_inspect.prefix = complete_peptide[0];
			single_res_inspect.suffix = complete_peptide[len_com_pep-1];
			for(int char_ind = 2; char_ind<len_com_pep-2; char_ind++)
				single_res_inspect.peptide.push_back(complete_peptide[char_ind]);
			// cout << single_res_inspect.peptide << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.protein_name = line_str;
			single_res_inspect.is_decoy = 0;
			if(single_res_inspect.protein_name[0] == 'X' && single_res_inspect.protein_name[1] == 'X' && single_res_inspect.protein_name[2] == 'X' && single_res_inspect.protein_name[3] == '.') 
			{
				single_res_inspect.is_decoy = 1;
			}

			// cout << single_res_inspect.protein_name << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.charge = atoi(line_str);
			// cout << single_res_inspect.charge << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.MQScore = atof(line_str);
			// cout << single_res_inspect.MQScore << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.length = atoi(line_str);
			// cout << single_res_inspect.length << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.totalPRMScore = atof(line_str);
			// cout << single_res_inspect.totalPRMScore << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.MedianPRMScore = atof(line_str);
			// cout << single_res_inspect.MedianPRMScore << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FractionY = atof(line_str);
			// cout << single_res_inspect.FractionY << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FractionB = atof(line_str);
			// cout << single_res_inspect.FractionB << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.Intensity = atof(line_str);
			// cout << single_res_inspect.Intensity << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.NTT = atoi(line_str);
			// cout << single_res_inspect.NTT << endl;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.Pval = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DeltaScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DeltaScoreOther = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.RecordNumber = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DBFilePos = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.SpecFilePos = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.PrecursorMZ = atof(line_str);
			in_file.getline(line_str, max_len, '\n');
			single_res_inspect.PrecursorMZError = atof(line_str);
			single_res_inspect.rank = ind;
			if(ind == 0) {
        res_inspect.all_res.resize(num_proc+1);
        // vector<singleResInspect> new_sri_vec;
        // new_sri_vc.clear();
        // res_inspect.all_res.push_back(new_sri_res);
				res_inspect.top_res.push_back(single_res_inspect);
				if(single_res_inspect.is_decoy == 1) {
					res_inspect.stat.num_decoy_hits++;
					res_inspect.is_top_decoy.push_back(1);
				} else {
					res_inspect.is_top_decoy.push_back(0);
        }
      }
      // cout << scan_ind << " " << ind << " " << res_inspect.all_res.size() << endl;
			res_inspect.all_res[num_proc].push_back(single_res_inspect);
   }
}

void ResInspectModel::read_inspect_res(string res_inspect_file, ResInspect& res_inspect, AnnotatedSpectrumCounterList& spectrum_vector) {
  ifstream in_file;
  in_file.open(res_inspect_file.c_str());
	int max_len = 1000;
	char line_str[max_len];
	in_file.getline(line_str, max_len, '\n');
	res_inspect.all_res.resize(res_inspect.num_spectra);
	res_inspect.is_top_decoy.resize(res_inspect.num_spectra);
	int ind, scan_ind, old_scan_ind;
	old_scan_ind = -1;
	if(calc_stat_flag) {
		res_inspect.rank_correct.resize(res_inspect.num_spectra);
		res_inspect.stat.num_rank.resize(res_inspect.num_top_hits);	
		res_inspect.stat.num_rank_decoy.resize(res_inspect.num_top_hits);	
		for(int top_ind = 0; top_ind < res_inspect.num_top_hits; top_ind++) {
			res_inspect.stat.num_rank[top_ind] = 0;
			res_inspect.stat.num_rank_decoy[top_ind] = 0;
		}
	}
	while(!in_file.eof()){
			ind++;
			SingleResInspect single_res_inspect;
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.spectrum_file = line_str;
			in_file.getline(line_str, max_len, '\t');
			scan_ind = atoi(line_str);
			if(scan_ind<old_scan_ind)
				continue;
			if(scan_ind>old_scan_ind) {
				ind = 0;
				res_inspect.is_top_decoy[scan_ind] = 0;
				if(calc_stat_flag)
					res_inspect.rank_correct[scan_ind] = -1;
			}
			old_scan_ind = scan_ind;
			single_res_inspect.scan_index = scan_ind;
			in_file.getline(line_str, max_len, '\t');
			string complete_peptide = line_str;
			int len_com_pep = complete_peptide.size();
			single_res_inspect.prefix = complete_peptide[0];
			single_res_inspect.suffix = complete_peptide[len_com_pep-1];
			for(int char_ind = 2; char_ind<len_com_pep-2; char_ind++)
				single_res_inspect.peptide.push_back(complete_peptide[char_ind]);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.protein_name = line_str;
			single_res_inspect.is_decoy = 0;
			if(single_res_inspect.protein_name[0] == 'X' && single_res_inspect.protein_name[1] == 'X' && single_res_inspect.protein_name[2] == 'X' && single_res_inspect.protein_name[3] == '.') 
			{
				single_res_inspect.is_decoy = 1;
			}
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.charge = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.MQScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.length = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.totalPRMScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.MedianPRMScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FractionY = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FractionB = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.Intensity = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.NTT = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.Pval = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.FScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DeltaScore = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DeltaScoreOther = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.RecordNumber = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.DBFilePos = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.SpecFilePos = atoi(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.PrecursorMZ = atof(line_str);
			in_file.getline(line_str, max_len, '\t');
			single_res_inspect.PrecursorMZError = atof(line_str);
			// cout << res_inspect.all_res.size() << " " << scan_ind << endl;
			single_res_inspect.rank = ind;
			res_inspect.all_res[scan_ind].push_back(single_res_inspect);
			if(strcmp(single_res_inspect.peptide.c_str(), spectrum_vector[scan_ind].getPeptide().getPeptideStr().c_str()) == 0) {
				res_inspect.rank_correct[scan_ind] = ind;
				res_inspect.stat.num_rank[ind]++;
				if(res_inspect.is_top_decoy[scan_ind] == 1)
					res_inspect.stat.num_rank_decoy[ind]++;
			}
			if(ind == 0) {
				res_inspect.top_res.push_back(single_res_inspect);
				if(single_res_inspect.is_decoy == 1) {
					res_inspect.stat.num_decoy_hits++;
					res_inspect.is_top_decoy[scan_ind] = 1;
				}
			}
	}
}


int filter_mgf(Config* config, string input_file_mgf, string output_file_mgf, vector<int> scan_set, int scan_index_start, bool use_spectrum_index) {
	ReadSpecModel spec_read_model;
	spec_read_model.set_no_filter();
	AnnotatedSpectrumCounterList spectrum_vector;
	spec_read_model.read_spectra(config, input_file_mgf, spectrum_vector);
	return write_mgf(output_file_mgf, spectrum_vector, scan_set, scan_index_start, use_spectrum_index);
}


int filter_mgf_seq_charge(Config* config, string input_file_mgf, string output_file_mgf, vector<int> scan_set, vector<string> seqs, vector<int> charge, int scan_index_start, bool use_spectrum_index) {
	ReadSpecModel spec_read_model;
	spec_read_model.set_no_filter();
	AnnotatedSpectrumCounterList spectrum_vector;
	spec_read_model.read_spectra(config, input_file_mgf, spectrum_vector);
	return write_mgf_seq_charge(output_file_mgf, spectrum_vector, scan_set, seqs, charge, input_file_mgf, scan_index_start, use_spectrum_index);
}


int write_mgf_seq_charge_spectrum_index(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, vector<string> seqs, vector<int> charge, string spectrum_file, int scan_index_start) {
  ofstream out_file;
  out_file.open(output_file_mgf.c_str(), ios_base::app);
  int ind = 0;
  int scan_set_ind = 0;
  // cout << scan_set.size() << " " << seqs.size() << endl;
	while(ind < spectrum_vector.size() && scan_set_ind < scan_set.size()) {
		if(spectrum_vector[ind].getScanNumber() == scan_set[scan_set_ind]) {
  		out_file << "BEGIN IONS" << endl;
  		out_file << "TITLE=" << scan_set[scan_set_ind] << "_" << spectrum_file << endl;
	  	out_file << "CHARGE=" << charge[scan_set_ind] << endl;
  		out_file << "PEPMASS="<< spectrum_vector[ind].get_m_over_z() << endl;
      out_file << "SEQ=" << seqs[scan_set_ind] << endl;
      out_file << "SCAN=" << scan_index_start << endl;
  		for(int peak_ind = 0; peak_ind < spectrum_vector[ind].getNumPeaks(); peak_ind++)
	  		out_file << spectrum_vector[ind].getPeakMass(peak_ind) << " " << spectrum_vector[ind].getPeakIntensity(peak_ind) << endl;
      scan_index_start++;
		  out_file << "END IONS" << endl;
  		out_file <<  endl;
      scan_set_ind++;
    }
    ind++;
	}
  out_file.close();
  return scan_index_start;
}

int write_mgf_seq_charge_default_index(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, vector<string> seqs, vector<int> charge, string spectrum_file, int scan_index_start) {
  ofstream out_file;
  out_file.open(output_file_mgf.c_str(), ios_base::app);
	for(int ind = 0; ind < scan_set.size(); ind++) {
		int spec_ind = scan_set[ind];
		out_file << "BEGIN IONS" << endl;
		// cout << spectrum_vector.size() << " " << ind << " " << spec_ind << endl;
		out_file << "TITLE=" << spectrum_vector[spec_ind].getPeptide().getPeptideStr() << endl;
		out_file << "CHARGE=" << spectrum_vector[spec_ind].getCharge() << endl;
		out_file << "PEPMASS="<< spectrum_vector[spec_ind].get_m_over_z() << endl;
    out_file << "SEQ=" << spectrum_vector[spec_ind].getPeptide().getPeptideStr() << endl;
    out_file << "SCAN=" << scan_index_start;
		for(int peak_ind = 0; peak_ind < spectrum_vector[spec_ind].getNumPeaks(); peak_ind++)
			out_file << spectrum_vector[spec_ind].getPeakMass(peak_ind) << " " << spectrum_vector[spec_ind].getPeakIntensity(peak_ind) << endl;
    scan_index_start++;
		out_file << "END IONS" << endl;
		out_file <<  endl;
	}
  out_file.close();
  return scan_index_start;
}

int write_mgf_spectrum_index(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, int scan_index_start) {
  ofstream out_file;
  out_file.open(output_file_mgf.c_str(), ios_base::app);
  int ind = 0;
  int scan_set_ind = 0;
	while(ind < spectrum_vector.size() && scan_set_ind < scan_set.size()) {
		if(spectrum_vector[ind].getScanNumber() == scan_set[scan_set_ind]) {
  		out_file << "BEGIN IONS" << endl;
  		out_file << "TITLE=" << spectrum_vector[ind].getTitle() << endl;
  		out_file << "PEPMASS="<< spectrum_vector[ind].get_m_over_z() << endl;
      out_file << "SCAN=" << scan_index_start << endl;
  		for(int peak_ind = 0; peak_ind < spectrum_vector[ind].getNumPeaks(); peak_ind++)
	  		out_file << spectrum_vector[ind].getPeakMass(peak_ind) << " " << spectrum_vector[ind].getPeakIntensity(peak_ind) << endl;
      scan_index_start++;
		  out_file << "END IONS" << endl;
  		out_file <<  endl;
      scan_set_ind++;
    }
    ind++;
	}
  out_file.close();
  return scan_index_start;
}

int write_mgf_default_index(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, int scan_index_start) {
  ofstream out_file;
  out_file.open(output_file_mgf.c_str(), ios_base::app);
	for(int ind = 0; ind < scan_set.size(); ind++) {
		int spec_ind = scan_set[ind];
		out_file << "BEGIN IONS" << endl;
		// cout << spectrum_vector.size() << " " << ind << " " << spec_ind << endl;
		out_file << "TITLE=" << spectrum_vector[spec_ind].getPeptide().getPeptideStr() << endl;
		out_file << "PEPMASS="<< spectrum_vector[spec_ind].get_m_over_z() << endl;
    out_file << "SCAN=" << scan_index_start;
		for(int peak_ind = 0; peak_ind < spectrum_vector[spec_ind].getNumPeaks(); peak_ind++)
			out_file << spectrum_vector[spec_ind].getPeakMass(peak_ind) << " " << spectrum_vector[spec_ind].getPeakIntensity(peak_ind) << endl;
    scan_index_start++;
		out_file << "END IONS" << endl;
		out_file <<  endl;
	}
  out_file.close();
  return scan_index_start;
}

int write_mgf_seq_charge(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, vector<string> seqs, vector<int> charge, string spectrum_file, int scan_index_start, bool use_spectrum_index) {
  if(use_spectrum_index == false)
    return write_mgf_seq_charge_default_index(output_file_mgf, spectrum_vector, scan_set, seqs, charge, spectrum_file, scan_index_start);
  else
    return write_mgf_seq_charge_spectrum_index(output_file_mgf, spectrum_vector, scan_set, seqs, charge, spectrum_file, scan_index_start);
}

int write_mgf(string output_file_mgf, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set, int scan_index_start, bool use_spectrum_index) {
  if(use_spectrum_index == false)
    return write_mgf_default_index(output_file_mgf, spectrum_vector, scan_set, scan_index_start);
  else
    return write_mgf_spectrum_index(output_file_mgf, spectrum_vector, scan_set, scan_index_start);
}

bool CompareSpectrumByPeptideStr(const Spectrum& s_1, const Spectrum& s_2){
	return CompareStr(s_1.getPeptide().getPeptideStr().c_str(), s_2.getPeptide().getPeptideStr().c_str());
}

bool ComparePeptideBySeq(const Peptide& pep_1, const Peptide& pep_2){
	return CompareStr(pep_1.getPeptideStr().c_str(), pep_2.getPeptideStr().c_str());
}

bool ComparePeptideBySeqEq(const Peptide& pep_1, const Peptide& pep_2){
	return CompareStrEq(pep_1.getPeptideStr().c_str(), pep_2.getPeptideStr().c_str());
}

void print_spectrum_vector(vector<AnnotatedSpectrumCounter> sv, bool flag) {
  for(int ind = 0; ind<sv.size(); ind++) { 
	  cout << "Index : " << ind << endl;
	  cout << "Num Peaks : " << sv[ind].getNumPeaks() << endl;
  	  cout << "Spectrum mass : " << sv[ind].get_org_pm() << endl;
	  cout << "Peptide str : " << sv[ind].getPeptide().getPeptideStr() << endl;
	  cout << "Peptide mass : " << sv[ind].getPeptide().get_mass() << endl;
	  cout << "Charge : " << sv[ind].getCharge() << endl;
	  cout << "Num mod : " << sv[ind].get_num_mod() << endl;
	  if(flag) {
		  for(int peak_ind = 0; peak_ind<sv[ind].getNumPeaks(); peak_ind++) {
		  	cout << "Peak " << peak_ind << " : " << sv[ind].getPeakMass(peak_ind) << " " << sv[ind].getPeakIntensity(peak_ind) << endl;
		  }
	  }
  }
}
void string_to_peptide(Config* config, string str, PeptideAdd& pep) {
	float oxidation_offset = 15.9949;
	vector<int> mod_pos;
	int pep_ind = 0;
	string peptide_str;
	for(int ind = 0; ind < str.size(); ind++) {
		if(str[ind] == '(') {
			ind++;
			ind++;
			mod_pos.push_back(pep_ind);
		} else {
			peptide_str.push_back(str[ind]);
			pep_ind++;
		}
	}
	pep.parseFromString(config, peptide_str);
	for(int ind = 0; ind<mod_pos.size(); ind++)
		pep.add_offset(mod_pos[ind], oxidation_offset);
}

bool ComparePeaksByIntensity(Peak pk_1, Peak pk_2) {
	return pk_1.intensity>pk_2.intensity;
}

void read_msp(Config* config, string input_file_msp, AnnotatedSpectrumCounterList& spectrum_vector, int num_top_peaks) {
	vector<int> fmn;
	fmn.clear();
        vector<int> fc;
	fc.clear();
	bool sfmn = false;
	bool sfc = false;
        read_msp(config, input_file_msp, spectrum_vector, num_top_peaks, sfmn, sfc, fmn, fc);
}

void filter_msp_mgf(Config* config, string input_file_msp, string output_file_mgf, bool feasible_mod_flag, bool feasible_charge_flag, vector<int> feasible_mod_num, vector<int> feasible_charge) {
    ifstream in_file;
    in_file.open(input_file_msp.c_str());
    ifstream copy_file;
    copy_file.open(input_file_msp.c_str());
    ofstream out_file;
    out_file.open(output_file_mgf.c_str());

    string name_string;
    string name_str;
    PeptideAdd pep;
    int charge;
    int spec_ind = 0;
    int total_spec_ind = 0;   
    while(in_file>>name_string) {
	    if(name_string.size()!=5 || strcmp(name_string.c_str(), "Name:") != 0) {
		cout << "Error reading Name!" << endl;
		cout << name_string << endl;
            }
	    in_file >> name_str;

	    string peptide_string;
	    peptide_string = name_str;
	    peptide_string.resize(name_str.size()-2);
	    charge = atoi(&name_str[name_str.size()-1]);

	    string_to_peptide(config, peptide_string, pep);

	    string mw_string;
	    string mw_str;
	    in_file >> mw_string;
	    if(mw_string.size()!=3 || strcmp(mw_string.c_str(), "MW:") != 0)
		cout << "Error reading MW!" << endl;
	    in_file >> mw_str;
	    float mw = atof(mw_str.c_str());
	    string comment_string;
	    in_file >> comment_string;
	    
	    if(comment_string.size()!=8 || strcmp(comment_string.c_str(), "Comment:") != 0)
		cout << "Error reading Comments!" << endl;

	    string str;
	    string mod_num_str;
	    in_file >> str;
	    while(str.size()!=3 || strcmp(str.c_str(),"Num") != 0) {
		if(str[0] == 'M' && str[1] == 'o' && str[2] == 'd' && str[3] == 's' && str[4] == '=') {
			mod_num_str.clear();
			mod_num_str.push_back(str[5]);
		}	
		in_file >> str;
	    }

	    int mod_num = atoi(mod_num_str.c_str());

	    string peak_string;
	    in_file >> peak_string;
	    if(peak_string.size()!=6 || strcmp(peak_string.c_str(), "peaks:") != 0)
		cout << "Error reading Peaks!" << endl;
	    string peak_str;
	    in_file >> peak_str;
	    int peak_num = atoi(peak_str.c_str());

	    Peak* peaks = new Peak[peak_num];

	    int max_len = 10000;
	    char annot_str[max_len];

	    string mass_str;    
	    string intensity_str;

	    for(int ind = 0; ind < peak_num; ind++) {
		in_file >> mass_str;
		peaks[ind].mass = atof(mass_str.c_str());
		in_file >> intensity_str;
		peaks[ind].intensity = atof(intensity_str.c_str());
		in_file.getline(annot_str, max_len);
	    }

	    bool flag_charge = !feasible_charge_flag;
	    for(int ind = 0; ind < feasible_charge.size(); ind++) {
	    	if(charge == feasible_charge[ind])
			flag_charge = true;
	    }

	    bool flag_mod = !feasible_mod_flag;
	    for(int ind = 0; ind < feasible_mod_num.size(); ind++) {
	    	if( mod_num == feasible_mod_num[ind])
			flag_mod = true;
	    }

	    float m_over_z = mw/charge;
	    total_spec_ind++;
            if(flag_mod && flag_charge) {
		out_file << "BEGIN IONS" << endl;
		out_file << "TITLE=" << pep.getPeptideStr() << endl;
		out_file << "CHARGE=" << charge << endl;
		out_file << "PEPMASS="<< m_over_z << endl;
       	        out_file << "SEQ=" << pep.getPeptideStr() << endl;
		for(int ind = 0; ind < peak_num; ind++)
			out_file << peaks[ind].mass << " " << peaks[ind].intensity << endl;
		spec_ind++;
		out_file << "END IONS" << endl;
		out_file <<  endl;
	    } 
	}
	// cout << spec_ind << endl;
}



void filter_msp(Config* config, string input_file_msp, string output_file_msp, bool feasible_mod_flag, bool feasible_charge_flag, vector<int> feasible_mod_num, vector<int> feasible_charge) {
    ifstream in_file;
    in_file.open(input_file_msp.c_str());
    ifstream copy_file;
    copy_file.open(input_file_msp.c_str());
    ofstream out_file;
    out_file.open(output_file_msp.c_str());

    string name_string;
    string name_str;
    PeptideAdd pep;
    int charge;
    int spec_ind = 0;
    int total_spec_ind = 0;   
    while(in_file>>name_string) {
	    if(name_string.size()!=5 || strcmp(name_string.c_str(), "Name:") != 0) {
		cout << "Error reading Name!" << endl;
		cout << name_string << endl;
            }
	    in_file >> name_str;

	    string peptide_string;
	    peptide_string = name_str;
	    peptide_string.resize(name_str.size()-2);
	    charge = atoi(&name_str[name_str.size()-1]);

	    string_to_peptide(config, peptide_string, pep);

	    string mw_string;
	    string mw_str;
	    in_file >> mw_string;
	    if(mw_string.size()!=3 || strcmp(mw_string.c_str(), "MW:") != 0)
		cout << "Error reading MW!" << endl;
	    in_file >> mw_str;
	    float mw = atof(mw_str.c_str());

	    string comment_string;
	    in_file >> comment_string;
	    
	    if(comment_string.size()!=8 || strcmp(comment_string.c_str(), "Comment:") != 0)
		cout << "Error reading Comments!" << endl;

	    string str;
	    string mod_num_str;
	    in_file >> str;
	    while(str.size()!=3 || strcmp(str.c_str(),"Num") != 0) {
		if(str[0] == 'M' && str[1] == 'o' && str[2] == 'd' && str[3] == 's' && str[4] == '=') {
			mod_num_str.clear();
			mod_num_str.push_back(str[5]);
		}	
		in_file >> str;
	    }

	    int mod_num = atoi(mod_num_str.c_str());

	    string peak_string;
	    in_file >> peak_string;
	    if(peak_string.size()!=6 || strcmp(peak_string.c_str(), "peaks:") != 0)
		cout << "Error reading Peaks!" << endl;
	    string peak_str;
	    in_file >> peak_str;
	    int peak_num = atoi(peak_str.c_str());

	    AnnotatedSpectrumCounter s;
	    Peak* peaks = new Peak[peak_num];

	    int max_len = 10000;
	    char tot_str[max_len];

	    // cout << "Peak Num : " << peak_num << endl;
	    in_file.getline(tot_str, max_len);
	    for(int ind = 0; ind < peak_num; ind++) {
		in_file.getline(tot_str, max_len);
		// cout << ind << " " << tot_str << endl;
	    }
	    bool flag_charge = !feasible_charge_flag;
	    for(int ind = 0; ind < feasible_charge.size(); ind++) {
	    	if(charge == feasible_charge[ind])
			flag_charge = true;
	    }

	    bool flag_mod = !feasible_mod_flag;
	    for(int ind = 0; ind < feasible_mod_num.size(); ind++) {
	    	if( mod_num == feasible_mod_num[ind])
			flag_mod = true;
	    }

            int num_line = 5+peak_num;
	    total_spec_ind++;
            if(flag_mod && flag_charge) {
		    for(int ind = 0; ind<num_line;ind++) {
			    copy_file.getline(tot_str, max_len);
			    //out_file << "Here" << endl;
			    out_file << tot_str << endl;
		    }
		    spec_ind++;
	    } else {
		    for(int ind = 0; ind<num_line;ind++) {
			    copy_file.getline(tot_str, max_len);
		    }
	    }
	}
	cout << spec_ind << endl;
}

void read_msp(Config* config, string input_file_msp, AnnotatedSpectrumCounterList& spectrum_vector, int num_top_peaks, bool feasible_mod_flag, bool feasible_charge_flag, vector<int> feasible_mod_num, vector<int> feasible_charge) {
    ifstream in_file;
    in_file.open(input_file_msp.c_str());
    string name_string;
    string name_str;
    PeptideAdd pep;
    int charge;
    int spec_ind = 0;
    int total_spec_ind = 0;   
    // for(int spec_ind = 0; spec_ind < 10; spec_ind++) {
    while(in_file>>name_string) {
	    // cout << total_spec_ind << endl;
	    // name_string.clear();
	    // in_file >> name_string;    
	    // cout << "Name string : " << name_string << endl;
	    if(name_string.size()!=5 || strcmp(name_string.c_str(), "Name:") != 0) {
		cout << "Error reading Name!" << endl;
		cout << name_string << endl;
            }
	    in_file >> name_str;
	    // cout << name_str << endl;
	    string peptide_string;
	    peptide_string = name_str;
	    peptide_string.resize(name_str.size()-2);
	    charge = atoi(&name_str[name_str.size()-1]);

	    string_to_peptide(config, peptide_string, pep);

	    string mw_string;
	    string mw_str;
	    in_file >> mw_string;
	    if(mw_string.size()!=3 || strcmp(mw_string.c_str(), "MW:") != 0)
		cout << "Error reading MW!" << endl;
	    in_file >> mw_str;
	    float mw = atof(mw_str.c_str());
	    // spectrum_vector[spec_ind].set_m_over_z(mw);
	    // spectrum_vector[spec_ind].set_org_pm(charge*(mw-MASS_PROTON));

	    string comment_string;
	    in_file >> comment_string;

	    if(comment_string.size()!=8 || strcmp(comment_string.c_str(), "Comment:") != 0)
		cout << "Error reading Comments!" << endl;

	    string str;
	    string mod_num_str;
	    in_file >> str;
	    while(str.size()!=3 || strcmp(str.c_str(),"Num") != 0) {
		if(str[0] == 'M' && str[1] == 'o' && str[2] == 'd' && str[3] == 's' && str[4] == '=') {
			mod_num_str.clear();
			mod_num_str.push_back(str[5]);
		}	
		in_file >> str;
	    }

	    int mod_num = atoi(mod_num_str.c_str());

	    string peak_string;
	    in_file >> peak_string;
	    if(peak_string.size()!=6 || strcmp(peak_string.c_str(), "peaks:") != 0)
		cout << "Error reading Peaks!" << endl;
	    string peak_str;
	    in_file >> peak_str;
	    int peak_num = atoi(peak_str.c_str());

	    AnnotatedSpectrumCounter s;
	    Peak* peaks = new Peak[peak_num];

	    string mass_str;    
	    string intensity_str;
	    int max_len = 100;
	    char annot_str[max_len];

	    for(int ind = 0; ind < peak_num; ind++) {
		in_file >> mass_str;
		peaks[ind].mass = atof(mass_str.c_str());
		in_file >> intensity_str;
		// cout << peaks[ind].mass << endl;
		peaks[ind].intensity = atof(intensity_str.c_str());
		in_file.getline(annot_str, max_len);
		// cout << mass_str << endl;
		// cout << intensity_str << endl;
		// cout << annot_str << endl;
	    }

	    // cout << "num peak : " << peak_num << endl;

	    sort(peaks, peaks+peak_num, ComparePeaksByIntensity); 

	    // for(int ind = 0; ind < peak_num; ind++)
	    //	cout << peaks[ind].mass << endl;

	    if(peak_num < num_top_peaks)
		    s.setNumPeaks(peak_num);
	    else
		    s.setNumPeaks(num_top_peaks);
	    
	    s.setPeaksPtr(peaks);

	    bool flag_charge = !feasible_charge_flag;
	    for(int ind = 0; ind < feasible_charge.size(); ind++) {
	    	if(charge == feasible_charge[ind])
			flag_charge = true;
	    }

	    bool flag_mod = !feasible_mod_flag;
	    for(int ind = 0; ind < feasible_mod_num.size(); ind++) {
	    	if(mod_num == feasible_mod_num[ind])
			flag_mod = true;
	    }

	    total_spec_ind++;
            if(flag_mod && flag_charge) {
		    // cout << "yes" << endl;
		    // s.clear();
		    s.set_peptide(pep);
		    s.setCharge(charge);
		    s.set_num_mod(mod_num);
		    s.set_org_pm(mw);
	    	    spectrum_vector.push_back(s);
		    spec_ind++;
		}
	}
}
  


//void read_spectra_x() {}

void ReadSpecModel::read_spectra(Config* config, string input_file, AnnotatedSpectrumCounterList& spectrum_vector) {

  SpectraAggregator sa;
  if(setMinPeakCountFlag)
    sa.setMinPeakCount(minPeakCount);
  if(setMinMsLevelFlag)
    sa.setMinMsLevel(minMsLevel);
  sa.initializeFromSpectraFilePath(input_file.c_str(), config);
  SpectraList sl(sa);
  sl.selectAllAggregatorHeaders();
  int total_number_headers = sl.getNumHeaders();
  vector<int> scan_set;
  scan_set.clear();
  // cout << "Total Num Headres : " << total_number_headers << endl;

  if(!bring_only_ms2_flag) {
	  if(start_scan_flag == false)
		  start_scan = 0;
	  if(stop_scan_flag == false)
	       	  stop_scan = total_number_headers;
	  // cout << start_scan << " " << stop_scan << endl;
	  for(int ind = start_scan; ind < stop_scan; ind++) {
		  scan_set.push_back(ind);
	  }
  } else {
	  for(int ind = 0; ind < total_number_headers; ind++) {
        	  const SingleSpectrumHeader* const header = sl.getSpectrumHeader(ind);
	          if(header->getMsLevel() >= 1)
		  	scan_set.push_back(ind);
  	  }
  }

  // cout << scan_set.size() << endl;

  read_spectra(config, input_file, spectrum_vector, scan_set);
}


void ReadSpecModel::read_spectra(Config* config, string input_file, AnnotatedSpectrumCounterList& spectrum_vector, vector<int> scan_set) {

  SpectraAggregator sa;
  if(setMinPeakCountFlag)
    sa.setMinPeakCount(minPeakCount);
  if(setMinMsLevelFlag)
    sa.setMinMsLevel(minMsLevel);
  sa.initializeFromSpectraFilePath(input_file.c_str(), config);
  SpectraList sl(sa);
  sl.selectAllAggregatorHeaders();
  int total_number_headers = sl.getNumHeaders();
  spectrum_vector.clear();
  spectrum_vector.resize(scan_set.size()*fixed_charge_vector.size());
  int sc;
  int charge;
  float mz;
  float org_pm;
  
  if(print_flag)
    cout << "Reading " << scan_set.size() << " spectra..." <<  endl;
  int scan_ind = 0;
  for (int scan_num=0; scan_num<scan_set.size(); scan_num++) {
    if(!fix_charge_flag) {
      fixed_charge_vector.clear();
      fixed_charge_vector.push_back(spectrum_vector[scan_ind].getCharge());
    }
    for(int charge_ind = 0; charge_ind < fixed_charge_vector.size(); charge_ind++) {
      //cout << scan_ind << endl;
    	sc = scan_set[scan_num];
      // cout << "sc : " << sc << endl;
      const SingleSpectrumHeader* const header = sl.getSpectrumHeader(sc);
      float final_mz = header->getMOverZ();
      float final_pm = (final_mz-MASS_PROTON)*fixed_charge_vector[charge_ind];
      // cout << fixed_charge_vector[charge_ind] << endl;
      // cout << final_mz << endl;
      // cout << final_pm << " " << min_anal_pm << " " << max_anal_pm << endl;

      if(use_anal_pm_flag == false || (final_pm > min_anal_pm && final_pm < max_anal_pm)) {
      // cout << "Hey!" << endl;
      spectrum_vector[scan_ind].head = *header;
	  if(filter_step_flag)
	       	  spectrum_vector[scan_ind].set_filter_step(filter_step);
  	  if(filter_peak_num_flag)
	  	  spectrum_vector[scan_ind].set_filter_peak_num(filter_peak_num);
  	  if(num_top_peaks_flag)
  		  spectrum_vector[scan_ind].set_spectrum_length(num_top_peaks); 
      spectrum_vector[scan_ind].set_filter_type(filter_type); // 0 means pepnovo, 1 means mine
      spectrum_vector[scan_ind].scan_index = scan_ind;
      spectrum_vector[scan_ind].original_scan_index = scan_num;
      spectrum_vector[scan_ind].global_scan_index = scan_set[scan_num];
	
	
      // cout << scan_ind << endl;
      // SingleSpectrumHeader* h;
      //header->setMOverZ(spectrum_vector[scan_ind].get_m_over_z());
      // cout << "get m over z : " << header->getMOverZ() << endl;
      // spectrum_vector[scan_ind].readSpectrum(sa, header, do_filter);
      spectrum_vector[scan_ind].readSpectrum(sa, &(spectrum_vector[scan_ind].head), do_filter);
      // spectrum_vector[scan_ind].setHeader(header);
      // cout << "get m over z header: " << spectrum_vector[scan_ind].getHeader()->getMOverZ() << endl;
      if(de_isotope_flag)
        spectrum_vector[scan_ind].de_isotope(de_isotope_val, de_isotope_thresh);
      // cout << spectrum_vector[scan_ind].getNumPeaks() << endl;
      // cout << scan_ind << " " << header->getScanNumber() << " " << spectrum_vector[scan_ind].getScanNumber() << endl;
      if(fix_mass_flag)
        spectrum_vector[scan_ind].set_m_over_z(fixed_mass);
      spectrum_vector[scan_ind].setCharge(fixed_charge_vector[charge_ind]);
      charge = spectrum_vector[scan_ind].getCharge();
      mz = spectrum_vector[scan_ind].get_m_over_z();
      // cout << "mz " << mz << endl;
      org_pm = charge*(mz-MASS_PROTON);
      spectrum_vector[scan_ind].set_org_pm(org_pm);
      if(integer_spectra_flag)
        spectrum_vector[scan_ind].initialize_integer_spectrum();
      if(binning_flag)
  		  spectrum_vector[scan_ind].initialize_binned_spectrum(bin_width, acc_thresh);
      // cout << spectrum_vector[scan_ind].getScanNumber() << endl;
      scan_ind++;
      }
    }
  }
  spectrum_vector.resize(scan_ind);
}
  
void SpecPairListList::print_out(int num_cys) {

  for(int spec_pair_ind = 0; spec_pair_ind < (*this)[num_cys].size(); spec_pair_ind++) {
	cout << spec_pair_ind << " ";
	cout << (*this)[num_cys][spec_pair_ind].num_cys << " ";
	cout << (*this)[num_cys][spec_pair_ind].isotopic_off << " ";
	cout << (*this)[num_cys][spec_pair_ind].ind_ra << " ";
	cout << (*this)[num_cys][spec_pair_ind].ind_native << " ";
	cout << (*this)[num_cys][spec_pair_ind].mass_ra << " ";
	cout << (*this)[num_cys][spec_pair_ind].mass_native << " ";
	for(int ind = 0; ind < (*this)[num_cys][spec_pair_ind].share_peaks.size(); ind++)
		cout << (*this)[num_cys][spec_pair_ind].share_peaks[ind] << " ";
	cout << (*this)[num_cys][spec_pair_ind].total_share_peaks << endl;
  }

}

void SpecPairListList::construct(vector<AnnotatedSpectrumCounter>& spectrum_vector, vector<AnnotatedSpectrumCounter>& spectrum_vector_native, float thrsh, float thr_pair, int tot_num_cys) {

  int count = 0;
  resize(tot_num_cys);
  flag_used.resize(spectrum_vector.size());
  pair_ind.resize(spectrum_vector.size());
  for(int ind_ra = 0; ind_ra < spectrum_vector.size(); ind_ra++) {
      flag_used[ind_ra].resize(tot_num_cys);
      pair_ind[ind_ra].resize(tot_num_cys);
      for(int num_cys = 0; num_cys < tot_num_cys; num_cys++) {
	  flag_used[ind_ra][num_cys] = false;
      }
      for(int ind_native = 0; ind_native < spectrum_vector_native.size(); ind_native++) {
	  float mass_red = spectrum_vector[ind_ra].get_org_pm();
	  float mass_nat = spectrum_vector_native[ind_native].get_org_pm();
          for(int num_cys = 0; num_cys < tot_num_cys; num_cys++) {
		for(int isotopic_off = -1; isotopic_off<=1; isotopic_off++) {
			if(abs(mass_red - mass_nat - num_cys*2*cys_off - isotopic_off)<thrsh) {
				SpecPair sp;
				// cout << ind << " " << mass_red << " " << ind_native << " " << mass_nat << " " << num_cys << isotopic_off << endl;
				// log_file << ind << " " << mass_red << " " << ind_native << " " << mass_nat << " " << num_cys << isotopic_off << endl;
				sp.num_cys = num_cys;
				sp.isotopic_off = isotopic_off;
				sp.ind_ra = ind_ra;
				sp.ind_native = ind_native;
				sp.mass_ra = mass_red;
				sp.mass_native = mass_nat; 
				sp.total_share_peaks = 0;
				for(int shared_ind = 0; shared_ind <= num_cys; shared_ind++) {
					sp.share_peaks.push_back(intersect_spectrum_unique(spectrum_vector[sp.ind_ra], spectrum_vector_native[sp.ind_native], shared_ind*num_cys, thr_pair));
					sp.total_share_peaks += sp.share_peaks[shared_ind];
				}
				if(flag_used[ind_ra][num_cys] == false) {
					// sp.index = count;
					(*this)[num_cys].push_back(sp);
					pair_ind[ind_ra][num_cys] = (*this)[num_cys].size() - 1;
					flag_used[ind_ra][num_cys] = true;
					count++;
					
				} else {
					if ((*this)[num_cys][pair_ind[ind_ra][num_cys]].total_share_peaks < sp.total_share_peaks) {
						(*this)[num_cys][pair_ind[ind_ra][num_cys]] = sp;
					}
				}
			}
		}
	}
     }
  }
  scan_list.resize(tot_num_cys);
  scan_list_native.resize(tot_num_cys);
  vector< vector <int> > final_pair_ind;
  final_pair_ind.resize(spectrum_vector.size());
  for(int ind = 0; ind < spectrum_vector.size(); ind++)
	final_pair_ind[ind].resize(tot_num_cys);
  for(int num_cys = 0; num_cys < tot_num_cys; num_cys++) {
	scan_list[num_cys].clear();
	scan_list_native[num_cys].clear();
	sort((*this)[num_cys].begin(), (*this)[num_cys].end(), CompareSpecPairBySharePeaks);
	// cout << "len : " << len << endl;
	if(len>=0 && (*this)[num_cys].size()>len)
		(*this)[num_cys].resize(len);
	for(int ind = 0; ind<(*this)[num_cys].size(); ind++) {
		scan_list[num_cys].push_back((*this)[num_cys][ind].ind_ra);
		scan_list_native[num_cys].push_back((*this)[num_cys][ind].ind_native);
		final_pair_ind[scan_list[num_cys][ind]][num_cys] = ind;
	}
  }
  pair_ind = final_pair_ind;

}

bool CompareSpecPairBySharePeaks(const SpecPair& sp_1, const SpecPair& sp_2) {
	return sp_1.total_share_peaks > sp_2.total_share_peaks;
}


int calc_num_aa(PeptideAdd& pep, int aa) {
	vector<int> aas = pep.get_amino_acids();
	int num = 0;
	for(int ind = 0; ind<aas.size(); ind++) {
		if(aas[ind] == aa)
			num++;
	}
	return num;
}

ostream& operator << (ostream& os, const Non_Linear_Peptide& pep)
{
	for(int ind = 0; ind < pep.seq.size(); ind ++) {
		os << pep.seq[ind] << " ";
	}
	os << endl;
}

bool within_threshold(vector<float> seq_1, vector<float> seq_2, float thresh) {
	int pos = 0;
	while(pos < seq_1.size() && abs(seq_1[pos] - seq_2[pos])<thresh)
		pos++;
	if (pos < seq_1.size())
		return false;
	else
		return true;
}

vector<float> rotate(vector<float> seq, int pos){
	vector<float> new_seq;
	for(int ind = pos; ind < seq.size(); ind++)
		new_seq.push_back(seq[ind]);
	for(int ind = 0; ind < pos; ind++)
		new_seq.push_back(seq[ind]);
	return new_seq;
}

bool CompareNLPByScore(const Non_Linear_Peptide nlp_1, const Non_Linear_Peptide nlp_2) {
	return nlp_1.score > nlp_2.score;
}

void diff_to_com(vector<float>& diff_seq, vector<float>& cum_seq) {	
	cum_seq.resize(diff_seq.size());
	cum_seq[0] = diff_seq[0];	
	for(int ind = 1; ind<cum_seq.size(); ind++) {	
		cum_seq[ind] = cum_seq[ind-1] + diff_seq[ind];
	}
}	
	
void cum_to_diff(vector<float>& cum_seq, vector<float>& diff_seq) {	
	diff_seq.resize(cum_seq.size());
	diff_seq[0] = cum_seq[0];	
	for(int ind = 1; ind<cum_seq.size(); ind++) {
		diff_seq[ind] = cum_seq[ind] - cum_seq[ind - 1];	
	}	
}
		
bool CompareSpectrumByMass(const AnnotatedSpectrumAdd& s_1, const AnnotatedSpectrumAdd& s_2) {
	return s_1.get_org_pm()>s_2.get_org_pm();
}



score_t intersect_threshold_binned(float* theo_spec, int theo_spec_size, BinnedSpectrum& binned_spectrum, int thresh_index) {
	score_t final_score = 0;
	float bw = binned_spectrum.bin_width;
	float offset = binned_spectrum.offset;
	int length = binned_spectrum.length;
	int mass_int;
	for(int ind = 0; ind<theo_spec_size; ind++){
		mass_int = int((theo_spec[ind]-offset)/bw + 0.5);
		if((mass_int >= 0) && (mass_int <= length) && (binned_spectrum.bin[thresh_index][mass_int] == true)) {
			final_score++;
		}
	}
	return final_score;
}

score_t intersect_threshold_intensity(float* theo_spec, int theo_spec_size, float* exper_spec, float* intensity_vec, int exper_spec_size, mass_t threshold) {

	if(exper_spec_size == 0 || theo_spec_size == 0)
	    return 0;
	int len = exper_spec_size;
	int mid_len = int(len/2);

	float right_part[theo_spec_size];
	float left_part[theo_spec_size];

	score_t score = 0;
	score_t score_temp = 0;
	int I_r = 0;
	int I_l = 0;
	bool fl_1, fl_2, flag;

	flag = false;
	for(int ind = 0; ind<theo_spec_size; ind++){
	    fl_1 =  theo_spec[ind]>(exper_spec[mid_len]-threshold);
	    fl_2 =  theo_spec[ind]<(exper_spec[mid_len]+threshold); 
	    if(fl_1){
		right_part[I_r] = theo_spec[ind];
		I_r++;}
	    if(fl_2){
		left_part[I_l] = theo_spec[ind];
		I_l++;}
	    if(fl_1 && fl_2){
		flag = true;
		if(intensity_vec[ind]>score_temp)
			score_temp = intensity_vec[ind];
	    }
	}
	if(flag == true)
		score = score_temp;
	score += intersect_threshold_intensity(right_part, I_r, &(exper_spec[mid_len+1]), &(intensity_vec[mid_len+1]), len - mid_len -1, threshold);
	score += intersect_threshold_intensity(left_part, I_l, exper_spec, intensity_vec, mid_len, threshold);
	return score;
}


score_t intersect_spectrum_unique(const AnnotatedSpectrumAdd& s_1, const AnnotatedSpectrumAdd& s_2, float offset, float thr, bool fwd) {
	// cout << s_1.getNumPeaks() << " " << s_2.getNumPeaks() << endl;
	int s_1_size = s_1.getNumPeaks();
	int s_2_size = s_2.getNumPeaks();
	// cout << s_1.get_org_pm() << " " << s_1.getNumPeaks() << endl;
	// cout << s_2.get_org_pm() << " " << s_2.getNumPeaks() << endl;
	// cout << "offset : " << offset << endl;
	float s_1_mass_vec[s_1_size];
	float s_2_mass_vec[s_2_size];
	for(int ind = 0; ind < s_1_size; ind++) {
		if(fwd)
			s_1_mass_vec[ind] = s_1.getPeakMass(ind);
		else
			s_1_mass_vec[ind] = s_1.get_org_pm() - s_1.getPeakMass(ind);
		}
	for(int ind = 0; ind < s_2_size; ind++) {
		if(fwd)
			s_2_mass_vec[ind] = s_2.getPeakMass(ind) + offset;
		else
			s_2_mass_vec[ind] = s_2.get_org_pm() - s_2.getPeakMass(ind) + offset;
	}
	return intersect_threshold_unique(s_1_mass_vec, s_1_size, s_2_mass_vec, s_2_size, thr);
}

score_t intersect_threshold_unique_thresh(float* theo_spec, int theo_spec_size, float* exper_spec, int exper_spec_size, float* thresh) {

	if(exper_spec_size == 0 || theo_spec_size == 0)
	    return 0;
	int len = exper_spec_size;
	int mid_len = int(len/2);

	float right_part[theo_spec_size];
	float left_part[theo_spec_size];
	float right_thresh[theo_spec_size];
	float left_thresh[theo_spec_size];

	score_t score = 0;
	int I_r = 0;
	int I_l = 0;
	bool fl_1, fl_2, flag;

	flag = false;
	for(int ind = 0; ind<theo_spec_size; ind++){
	    fl_1 =  theo_spec[ind]>(exper_spec[mid_len]-thresh[ind]);
	    fl_2 =  theo_spec[ind]<(exper_spec[mid_len]+thresh[ind]); 
	    if(fl_1){
		right_part[I_r] = theo_spec[ind];
		right_thresh[I_r] = thresh[ind];
		I_r++;}
	    if(fl_2){
		left_part[I_l] = theo_spec[ind];
		left_thresh[I_l] = thresh[ind];
		I_l++;}
	    if(fl_1 && fl_2){
		flag = true;}
	}
	if(flag == true)
		score = 1;
	score += intersect_threshold_unique_thresh(right_part, I_r, &(exper_spec[mid_len+1]), len - mid_len -1, right_thresh);
	score += intersect_threshold_unique_thresh(left_part, I_l, exper_spec, mid_len, left_thresh);
	return score;
}

score_t intersect_threshold_unique(float* theo_spec, int theo_spec_size, float* exper_spec, int exper_spec_size, mass_t threshold) {

	if(exper_spec_size == 0 || theo_spec_size == 0)
	    return 0;
	int len = exper_spec_size;
	int mid_len = int(len/2);

	float right_part[theo_spec_size];
	float left_part[theo_spec_size];

	score_t score = 0;
	int I_r = 0;
	int I_l = 0;
	bool fl_1, fl_2, flag;

	flag = false;
	for(int ind = 0; ind<theo_spec_size; ind++){
	    fl_1 =  theo_spec[ind]>(exper_spec[mid_len]-threshold);
	    fl_2 =  theo_spec[ind]<(exper_spec[mid_len]+threshold); 
	    if(fl_1){
		right_part[I_r] = theo_spec[ind];
		I_r++;}
	    if(fl_2){
		left_part[I_l] = theo_spec[ind];
		I_l++;}
	    if(fl_1 && fl_2){
		flag = true;}
	}
	if(flag == true)
		score = 1;
	score += intersect_threshold_unique(right_part, I_r, &(exper_spec[mid_len+1]), len - mid_len -1, threshold);
	score += intersect_threshold_unique(left_part, I_l, exper_spec, mid_len, threshold);
	return score;
}




int Partition(int low,int high,float arr[])
{ int i,high_vac,low_vac,pivot;
   pivot=arr[low];
   while(high>low)
{ high_vac=arr[high];

  while(pivot<high_vac)
  {
    if(high<=low) break;
    high--;
    high_vac=arr[high];
  }

  arr[low]=high_vac;
  low_vac=arr[low];
  while(pivot>low_vac)
  {
    if(high<=low) break;
    low++;
    low_vac=arr[low];
  }
  arr[high]=low_vac;
}
  arr[low]=pivot;
   return low;
}

void Quick_sort(int low,int high, float arr[])
{
  int Piv_index,i;
  if(low<high)
  {
   Piv_index=Partition(low,high,arr);
   Quick_sort(low,Piv_index-1,arr);
   Quick_sort(Piv_index+1,high,arr);
  }
}

int num_greater(vector<int> score_vector, int score) {
  int num = 0;
  for(int ind = 0; ind<score_vector.size(); ind++) {
    if(score_vector[ind]>score)
      num++;
    return num;
  }
}


float sum_vector(vector<mass_t> input){
    float tmp = 0.f;
    for(int i = 0; i < input.size(); i++){
        tmp += input[i];
    }
    return tmp;
}



float elcom2mass(string str){
    // cout << " string : " << str << endl;
    int I = 0;
    vector<int> type(257,0);
    type[67] = 1;
    type[70] = 1;
    type[72] = 1;
    type[78] = 1;
    type[79] = 1;
    type[83] = 1;
    for(int i = 48; i <= 57; i++){
        type[i] = 2;
    }
    for(int i = 97; i <= 122; i++){
        type[i] = 3;
    }
    
    vector<string> atom_string;
    vector<float> atom_mass;
    atom_string.resize(10);
    atom_mass.resize(10);

    atom_mass[0] = 12; atom_string[0] = "C";
    atom_mass[1] = 1.007825; atom_string[1] = "H";
    atom_mass[2] = 15.994914; atom_string[2] = "O";
    atom_mass[3] = 14.003074; atom_string[3] = "N";
    atom_mass[4] = 31.97207; atom_string[4] = "S";
    atom_mass[5] = 30.973762; atom_string[5] = "P";
    atom_mass[6] = 18.99840322; atom_string[6] = "F";
    atom_mass[7] = 126.904473; atom_string[7] = "I";
    atom_mass[8] = 34.96885272; atom_string[8] = "Cl";
    atom_mass[9] = 78.9183361; atom_string[9] = "Br";
    
    I = 0;
    int J;
    int K = 0;
    int num;
    float mass = 0;
    while(I < str.length()){
        K++;
	string res_atom;
	res_atom.clear();
	res_atom.push_back(str[I]);
        J = 0;
        while(I+J+1 < str.length() && type[str[I+J+1]] == 3) {
		res_atom.push_back(str[I+J+1]);
		J++;
	}
	string res_num;
	res_num.clear();
	I = I + J + 1;
	J = 0;
        while(I+J < str.length() && type[str[I+J]] == 2) {
		res_num.push_back(str[I+J]);
		J++;
	}
	if(J == 0)
		num = 1;
	else
		num = atoi(res_num.c_str());	
	// cout << "num string : " << res_num << endl;
	I = I + J;
	int num_match = 0;
	// cout << "atom : " << res_atom << " number : " << num << endl;
	for(int ind = 0; ind < atom_string.size(); ind++) {
		if(res_atom.size() == atom_string[ind].size() && strcmp(res_atom.c_str(), atom_string[ind].c_str()) == 0) {
			mass += num*atom_mass[ind];
			num_match++;		
		}
	}
	if(num_match == 0) {
		cout << "Error : no match for " << str << " " << res_atom << " " << num << endl;
	}
	if(num_match > 1) {
		cout << "Error : more than one match for " << str << endl; 
	}
    }
    
    return mass;
    
}



void write_filtered_spectra(Config* config, string path, vector<string> spectrum_files, vector<vector<int> > scan_ind, string mgf_file) {
  ofstream out_file;
  out_file.open(mgf_file.c_str(), ios_base::trunc);
  out_file.close();
  char spec_file[256];
  int scan_index_start = 0;
  for(int ind = 0; ind<spectrum_files.size(); ind++) {
    strcpy(spec_file, path.c_str());
    strcat(spec_file, spectrum_files[ind].c_str());
    scan_index_start = filter_mgf(config, spec_file, mgf_file, scan_ind[ind], scan_index_start, true);
  }
}

void write_filtered_spectra_seq_charge(Config* config, string path, vector<string> spectrum_files, vector<vector<int> > scan_ind, vector<vector <string> > seqs, vector<vector<int> > charge, string mgf_file) {
  ofstream out_file;
  out_file.open(mgf_file.c_str(), ios_base::trunc);
  out_file.close();
  char spec_file[256];
  int scan_index_start = 0;
  for(int ind = 0; ind<spectrum_files.size(); ind++) {
    strcpy(spec_file, path.c_str());
    strcat(spec_file, spectrum_files[ind].c_str());
    scan_index_start = filter_mgf_seq_charge(config, spec_file, mgf_file, scan_ind[ind], seqs[ind], charge[ind], scan_index_start, true);
  }
}

void inspect_res_mgf(Config* config, ResInspect res_inspect, string path, string mgf_file, bool seq_charge) {
  vector< vector<int> > scan_ind;
  vector< vector<string> > seqs;
  vector< vector<int> > charge;
  vector<string> spectrum_files;
  read_scan_ind(res_inspect, scan_ind, seqs, charge, spectrum_files);
  // string scan_ind_file = "scan_ind.txt";
  // print_scan_ind(scan_ind, spectrum_files, scan_ind_file);
  if(seq_charge)
    write_filtered_spectra_seq_charge(config, path, spectrum_files, scan_ind, seqs, charge, mgf_file);
  else
    write_filtered_spectra(config, path, spectrum_files, scan_ind, mgf_file);
}




