#include "PrmGraph.h"
#include "AnnotatedSpectrum.h"
#include "auxfun.h"


vector<bool *> PrmGraph::out_aa_ind; // holds for each amino acid and node
												 // whether there is a single aa leaving that node
vector<bool *> PrmGraph::in_aa_ind;

int PrmGraph::ind_vector_lengths;

/********************************************************
*********************************************************/
void PrmGraph::create_graph_from_spectrum(Model *_model, Spectrum *spectrum,
							mass_t _pm_with_19, int spec_charge)
{
	model  = _model;
	config = model->get_config();
	source_spectrum = spectrum;
	pm_with_19 = _pm_with_19;
	charge = spec_charge;

	if (charge==0)
		charge = source_spectrum->get_charge();

	int org_size_idx = spectrum->get_size_idx();
	size_idx = config->calc_peptide_size_idx(charge,pm_with_19);

	spectrum->set_size_idx(size_idx);

	digest_node_score = config->get_terminal_score();
	model->init_model_for_scoring_spectrum(source_spectrum);

//	config->print_session_aas();

	if (pm_with_19<10)
	{
		cout << "Error: supplied negative/low PM for graph!" << endl;
		exit(1);
	}

	create_nodes();
	add_digest_nodes();
	score_nodes(model);

	set_significant_masses();

	fill_single_multi_edges();
	fill_double_multi_edges();

	int l;
	for (l=3; l<=config->get_max_edge_length(); l++)
		fill_longer_multi_edges(l);

//	spectrum->print_expected_by();
//	print_with_multi_edges();

	set_node_penalties(model);

	rank_nodes_according_to_score();

	set_idxs_max_in_out_for_nodes();

	// restore size idx
	spectrum->set_size_idx(org_size_idx);

}


/***********************************************************************
Finds the optimal value for pm_with_19
Chooses the one for which there is a maximal score for the top 5-10 nodes
************************************************************************/
mass_t PrmGraph::find_optimal_pm_with_19_for_graph(Model *_model, 
							Spectrum *spectrum, mass_t base_pm_with_19,
							int spec_charge)
{
	const mass_t margin    = 1.5;
	const mass_t increment = 0.1;


	model  = _model;
	config = model->get_config();
	source_spectrum = spectrum;


	charge = spec_charge;

	int org_size_idx = spectrum->get_size_idx();
	size_idx = config->calc_peptide_size_idx(charge,base_pm_with_19);
	spectrum->set_size_idx(size_idx);
	

	digest_node_score = config->get_terminal_score();
	model->init_model_for_scoring_spectrum(source_spectrum);

	const mass_t node_tol = config->get_tolerance()*1.25;

	mass_t best_mass_with_19=-1;
	score_t best_score=NEG_INF;

	int num_top_nodes = 5;
	num_top_nodes+= (int)((base_pm_with_19 - 800.0)/400.0);
	if (num_top_nodes>8)
		num_top_nodes=8;

	vector<score_t> mass_scores;
	vector<mass_t > masses_with_19;
	mass_scores.clear();
	masses_with_19.clear();

	for (pm_with_19 = base_pm_with_19 - margin; 
		 pm_with_19<= base_pm_with_19+ margin; 
		 pm_with_19 += increment)
	{

		create_nodes();
		add_digest_nodes();
		score_nodes(model);

		const int num_nodes = nodes.size();

		static bool *forbidden_node_map = NULL;
		static int   forbidden_map_size = 0;
		if (forbidden_map_size ==0 || forbidden_map_size<num_nodes)
		{
			forbidden_map_size = 400;
			if (forbidden_map_size<num_nodes)
				forbidden_map_size = num_nodes * 2;

			forbidden_node_map = new bool [ forbidden_map_size];
			if (! forbidden_node_map)
			{
				cout << "Error: couldn't allocate mem for bool node map!" << endl;
				exit(1);
			}
		}

		memset(forbidden_node_map,0,num_nodes*sizeof(bool));

		score_t node_set_score =0;
		int i;
		for (i=0; i<num_top_nodes; i++)
		{
			int top_node_idx = -1;
			score_t top_node_score=NEG_INF;

			const int max_node_idx= num_nodes - 3;
			int j;
			for (j=1; j<max_node_idx; j++)
				if (! forbidden_node_map[j] && nodes[j].score>top_node_score)
				{
					top_node_score = nodes[j].score;
					top_node_idx = j;
				}

			// mark the node and all masses in +- 25 Da range (including the mirror in a
			// +- 10 Da range
			if (top_node_idx>=0)
			{
				const mass_t min_mass = nodes[top_node_idx].mass - 25.0;
				const mass_t max_mass = nodes[top_node_idx].mass + 25.0;

				forbidden_node_map[top_node_idx]=true;

				int k=top_node_idx;
				while (k>=0 && nodes[k].mass>min_mass)
					forbidden_node_map[k--]=true;
				
				k=top_node_idx;
				while (k<num_nodes && nodes[k].mass<max_mass)
					forbidden_node_map[k++]=true;

				const mass_t mirror_node_mass = pm_with_19 - 19.0183 - nodes[top_node_idx].mass;
				const mass_t min_mirror_mass = mirror_node_mass - 10.0;
				const mass_t max_mirror_mass = mirror_node_mass + 10.0;

				k=0;
				while (k<num_nodes && nodes[k].mass<min_mirror_mass)
					k++;

				while (k<num_nodes && nodes[k].mass<max_mirror_mass)
					forbidden_node_map[k++]=true;

				// add to score
				node_set_score += nodes[top_node_idx].score;

				// check if this looks like part of a misalligned pair (i.e. there is a
				// very close node that also has a high score and a different source
				// if so give penaly (count only half of the score)

				if (top_node_idx>0 && 
					nodes[top_node_idx-1].mass + node_tol > nodes[top_node_idx].mass &&
					nodes[top_node_idx-1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
				{
					node_set_score -= nodes[top_node_idx].score * 0.5;
				}

				if (top_node_idx<num_nodes-1 && 
					nodes[top_node_idx+1].mass - node_tol < nodes[top_node_idx].mass &&
					nodes[top_node_idx+1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
				{
					node_set_score -= nodes[top_node_idx].score * 0.5;
				}

			//	cout << "  " << top_node_idx << " " << nodes[top_node_idx].score << endl;
			}
		}

		mass_scores.push_back(node_set_score);
		masses_with_19.push_back(pm_with_19);

	//	cout << setprecision(3) << pm_with_19 << "\t" << node_set_score << endl;
		if (node_set_score>best_score)
		{
			best_score = node_set_score;
			best_mass_with_19 = pm_with_19;
		}


	}

	// restore size idx
	spectrum->set_size_idx(org_size_idx);

	int i;
	vector<int> good_mass_idxs;
	good_mass_idxs.clear();
	for (i=0; i<mass_scores.size(); i++)
	{
		if (mass_scores[i]==best_score)
			good_mass_idxs.push_back(i);
	}


	if (good_mass_idxs.size()==0) // how would this happen?
		return best_mass_with_19;

	int idx = good_mass_idxs[good_mass_idxs.size()/2];

//	cout << " >>> " << masses_with_19[idx] << endl;

	return masses_with_19[idx];
}



/*********************************************************************
Initializes the index array.
For each rounded off Dalton m, it gives the index of the closest peak i
with mass m_i>= m.
**********************************************************************/
void PrmGraph::init_index_array()
{
	int i,c,size=(int)pm_with_19+2;
	const int max_node_idx = nodes.size()-1;
	
	index_array.clear();
	index_array.resize(size,-1);
	
	i=0;
	int m=(int)nodes[0].mass;
	while (i<m)
		index_array[i++]=0;

	c=0;
	while (c< max_node_idx)
	{
		int curr_m=(int)nodes[c].mass;
		int next_m = curr_m;
		int next_c = c;

		while (next_m == curr_m && next_c<max_node_idx)
			next_m=(int)nodes[++next_c].mass;

		while (i<next_m)
			index_array[i++]=c;
		
		c=next_c;
	}

	while (i<size)
		index_array[i++]=max_node_idx;
}



/***********************************************************
Merges nodes that are close to each other
Gives preference to the prefix fragment, but merge genereally
goes according to the intensities of the source fragments
************************************************************/
void PrmGraph::merge_close_nodes()
{
	int i;
	const vector<FragmentType>& all_fragments = config->get_all_fragments();

	mass_t delta = config->get_tolerance();
	if (delta>0.2)
		delta *= 0.8;

	for (i=0; i<nodes.size()-1; i++)
	{
		if (nodes[i+1].mass - nodes[i].mass < delta &&
			nodes[i+1].source_frag_type_idx != nodes[i].source_frag_type_idx)
		{
			const int frag1_pos = nodes[i].breakage.get_position_of_frag_idx(nodes[i].source_frag_type_idx);
			const int frag2_pos = nodes[i+1].breakage.get_position_of_frag_idx(nodes[i+1].source_frag_type_idx);
			if (frag1_pos <0 || frag2_pos<0)
			{
				Node& node1 = nodes[i];
				Node& node2 = nodes[i+1];

				node1.breakage.print_fragments(config);
				cout << endl;
				node2.breakage.print_fragments(config);
				cout << endl;
				cout << "Error: source fragments not found in breakage!: " <<
					node1.source_frag_type_idx << "  " << node2.source_frag_type_idx 
					<< endl;
				exit(1);
			}
			mass_t new_node_mass;
			intensity_t inten1 = nodes[i].breakage.fragments[frag1_pos].intensity;
			intensity_t inten2 = nodes[i+1].breakage.fragments[frag2_pos].intensity;
			if (all_fragments[nodes[i].source_frag_type_idx].orientation == PREFIX)
			{
				inten1 *= 2;
			}
			else if (all_fragments[nodes[i+1].source_frag_type_idx].orientation == PREFIX)
			{
				inten2 *= 2;
			}

			mass_t mass_times_inten1 = nodes[i].mass * inten1;
			mass_t mass_times_inten2 = nodes[i+1].mass * inten2;

			new_node_mass = (mass_times_inten1 + mass_times_inten2)/ (inten1 + inten2);

			// create new node, move it to the i+1 position
			nodes[i].mass = 99999999;
			nodes[i+1].mass = new_node_mass;

			// transfer fragments from node i to i+1 if they are not there
			int f;
			for (f=0; f<nodes[i].breakage.fragments.size(); f++)
			{
				if (nodes[i+1].breakage.get_position_of_frag_idx(
					 nodes[i].breakage.fragments[f].frag_type_idx) < 0)
				{
//					cout << "Adding fragments! " << endl;
//					nodes[i].breakage.print();
//					nodes[i+1].breakage.print();
//					cout<<endl;

					nodes[i+1].breakage.add_fragment(nodes[i].breakage.fragments[f]);
					
				}
			}
		}
	}
	
	sort(nodes.begin(),nodes.end());
	while (nodes.size()>0 && nodes[nodes.size()-1].mass > 50000)
		nodes.pop_back();

	init_index_array();

}



struct frag_region_list {
	int frag_type_idx;
	vector<int> region_idxs;
};


/*******************************************************
Selectes nodes for PrmGraph.
Selection done in two stages. First every strong peak is
considered, then combos are considered.
********************************************************/
void PrmGraph::create_nodes()
{
	const int num_regions = config->get_num_regions(charge,size_idx);
	const vector<FragmentType>& all_fragments = config->get_all_fragments();
	const vector<Peak>& peaks = source_spectrum->get_peaks();
	const vector<int>& strong_peak_idxs = source_spectrum->get_strong_peak_idxs();
	const mass_t max_mass_to_create_node = pm_with_19 - 55;
	const mass_t mid_mass = pm_with_19 * 0.5;

	nodes.clear();
	nodes.resize(1);

	vector< vector<int> > peak_usages; // holds for each peak all the interpertations
									   // that were given to it for creating nodes

	peak_usages.resize(peaks.size());
	
	// add N-TERM
	nodes[0].mass=0;
	nodes[0].type = NODE_N_TERM;
	nodes[0].breakage.mass = 0;
	nodes[0].breakage.region_idx=0;
	nodes[0].breakage.parent_charge=charge;
	nodes[0].breakage.parent_size_idx = size_idx;


	// create list for each strong frag type, what regions it can be used as 
	// a basis for creating a node
	vector<frag_region_list> strong_frags_lists;
	int r,i;
	for (r=0; r<num_regions; r++)
	{
		const RegionalFragments& rf = config->get_regional_fragments(charge,size_idx,r);
		const vector<int>& strong_frag_type_idxs = rf.get_strong_frag_type_idxs();
		int f;
		for (f=0; f<strong_frag_type_idxs.size(); f++)
		{
			int j;
			for (j=0; j<strong_frags_lists.size(); j++)
			{
				if (strong_frags_lists[j].frag_type_idx == strong_frag_type_idxs[f])
				{
					strong_frags_lists[j].region_idxs[r]=1;
					break;
				}
			}

			if (j==strong_frags_lists.size())
			{
				frag_region_list frl;
				frl.frag_type_idx= strong_frag_type_idxs[f];
				frl.region_idxs.resize(num_regions,0);
				frl.region_idxs[r]=1;
				strong_frags_lists.push_back(frl);
			}
		}
	}
	


	// create nodes from strong peaks
	for (i=0; i<strong_frags_lists.size(); i++)
	{
		const int strong_frag_idx = strong_frags_lists[i].frag_type_idx;
		const FragmentType& ft = all_fragments[strong_frag_idx];
		const vector<int>& permitted_regions = strong_frags_lists[i].region_idxs;

		int q;
		for (q=0; q<strong_peak_idxs.size(); q++)
		{
			const int p_idx = strong_peak_idxs[q];
			const mass_t exp_break_mass = ft.calc_breakage_mass(peaks[p_idx].mass,pm_with_19);

			if (exp_break_mass<mid_mass && ! config->is_allowed_prefix_mass(exp_break_mass))
			{
			//	cout << "Bad prefix mass: " << exp_break_mass << endl;
				continue;
			}

			if (exp_break_mass>mid_mass && ! config->is_allowed_suffix_mass(pm_with_19,exp_break_mass))
			{
			//	cout << "Bad Suffix mass: " << exp_break_mass << endl;
				continue;
			} 

			if (exp_break_mass < 3 || exp_break_mass > max_mass_to_create_node)
				continue;

			const int region_idx = config->calc_region_idx(exp_break_mass,pm_with_19, 
				source_spectrum->get_min_peak_mass(), source_spectrum->get_max_peak_mass());

			const RegionalFragments & rf = config->get_regional_fragments(charge,size_idx,region_idx);
			const vector<FragmentCombo>&  combos = rf.get_frag_type_combos();

			if (! permitted_regions[region_idx])
				continue;

			Node node;
			node.breakage.region_idx= region_idx;
			node.mass = exp_break_mass;
			node.breakage.mass = exp_break_mass;

			node.type = NODE_REG;
			node.source_frag_type_idx = strong_frag_idx;

			// make sure fragment is present
			BreakageFragment brf;
			brf.expected_mass = peaks[p_idx].mass;
			brf.frag_type_idx = strong_frag_idx;
			brf.mass = peaks[p_idx].mass;
			brf.intensity = peaks[p_idx].intensity;
			brf.peak_idx = p_idx;

			node.breakage.add_fragment(brf);

			annotate_breakage(source_spectrum, pm_with_19, size_idx, node.breakage);

			nodes.push_back(node);

			// add peak usage
			peak_usages[p_idx].push_back(strong_frag_idx);
		}
	}

	// Create nodes from combos
	// if peaks were already used for a certain fragment, then don't create a node
	// the first idx in the combo is a strong_frag_type_idx

	// first create a list for the first strong types from the combos
	vector<frag_region_list> strong_combo_lists;
	for (r=0; r<num_regions; r++)
	{
		const vector<FragmentCombo>& combos = config->get_regional_fragments(charge,size_idx,r).get_frag_type_combos();
		int f;
		for (f=0; f<combos.size(); f++)
		{
			int j;
			for (j=0; j<strong_combo_lists.size(); j++)
			{
				if (strong_combo_lists[j].frag_type_idx == combos[f].frag_inten_idxs[0])
				{
					strong_combo_lists[j].region_idxs[r]=1;
					break;
				}
			}

			if (j==strong_combo_lists.size())
			{
				frag_region_list frl;
				frl.frag_type_idx= combos[f].frag_inten_idxs[0];
				frl.region_idxs.resize(num_regions,0);
				frl.region_idxs[r]=1;
				strong_combo_lists.push_back(frl);
			}
		}
	}

	Node node;
	node.mass = pm_with_19 - 19.0183;
	node.type = NODE_C_TERM;
	node.breakage.mass = node.mass;
	node.breakage.parent_charge=charge;
	node.breakage.parent_size_idx = size_idx;
	node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19);

	nodes.push_back(node);

	sort(nodes.begin(),nodes.end());
	
	merge_close_nodes();

	for (i=0; i<nodes.size(); i++)
	{
		nodes[i].breakage.parent_charge = source_spectrum->get_charge();
		nodes[i].breakage.parent_size_idx = source_spectrum->get_size_idx();
	}
	
}


/**********************************************************
Adds the digest nodes for the digest amino acids
***********************************************************/
void PrmGraph::add_digest_nodes()
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& n_term_digest_aas = config->get_n_term_digest_aas();
	const vector<int>& c_term_digest_aas = config->get_c_term_digest_aas();
	const mass_t digest_tolerance = 1.5 * config->get_tolerance();

	bool added_nodes = false;

	if (c_term_digest_aas.size()>0)
	{
		int c_term_node=nodes.size();
		while (--c_term_node>=0)
			if (nodes[c_term_node].type == NODE_C_TERM)
				break;

		if (c_term_node<=0)
		{
			cout << "Error: couldn't find regular C-terminal!"<< endl;
			exit(1);
		}
		

		int i;
		for (i=0; i<c_term_digest_aas.size(); i++)
		{
			const int aa = c_term_digest_aas[i];
			mass_t exp_node_mass = nodes[c_term_node].mass - aa2mass[aa];

			int n=c_term_node-1;
			while (n>=0 && nodes[n].mass>exp_node_mass)
				n--;

			int min_dis_node = fabs(nodes[n].mass - exp_node_mass) < 
				fabs(nodes[n+1].mass - exp_node_mass) ? n : n+1;

			if (fabs(nodes[min_dis_node].mass - exp_node_mass)<digest_tolerance)
			{
				nodes[min_dis_node].type = NODE_DIGEST;
			}
			else // create the new node
			{
				Node node;
				node.mass = exp_node_mass;
				node.type = NODE_DIGEST;
				node.breakage.mass = node.mass;
				node.breakage.parent_charge=charge;
				node.breakage.parent_size_idx = size_idx;
				node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19);

				nodes.push_back(node);
				added_nodes=true;
			}
		}
	}


	if (n_term_digest_aas.size()>0)
	{
		int n_term_node=0;
	
		int i;
		for (i=0; i<n_term_digest_aas.size(); i++)
		{
			const int aa = n_term_digest_aas[i];
			mass_t exp_node_mass =aa2mass[aa];

			int n=1;
			while (n<nodes.size() && nodes[n].mass<exp_node_mass)
				n++;

			int min_dis_node = fabs(nodes[n].mass - exp_node_mass) < 
				fabs(nodes[n-1].mass - exp_node_mass) ? n : n-1;

			if (fabs(nodes[min_dis_node].mass - exp_node_mass)<digest_tolerance)
			{
				nodes[min_dis_node].type = NODE_DIGEST;
			}
			else // create the new node
			{
				Node node;
				node.mass = exp_node_mass;
				node.type = NODE_DIGEST;
				node.breakage.mass = node.mass;
				node.breakage.parent_charge=charge;
				node.breakage.parent_size_idx = size_idx;
				node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19);

				nodes.push_back(node);
				added_nodes=true;
			}
		}
	}


	
	if (added_nodes)
	{
		sort(nodes.begin(),nodes.end());
		init_index_array(); // redo the index because digest nodes were added
	}
}


/**********************************************************
Does initial scoring of nodes (uses only a node's breakage)
***********************************************************/
void PrmGraph::score_nodes(Model *model)
{
	int i;

	const int max_node = nodes.size();
	for (i=0; i<max_node; i++)
	{
		bool verbose = false;
	
		if (nodes[i].type == NODE_REG || nodes[i].type == NODE_DIGEST) 
		{
			model->score_breakage(source_spectrum,&nodes[i].breakage,verbose);

			nodes[i].score = nodes[i].breakage.score;
		}
		else if (nodes[i].type == NODE_N_TERM || nodes[i].type == NODE_C_TERM)
		{
			nodes[i].score = config->get_terminal_score();
			nodes[i].breakage.score = config->get_terminal_score(); 

//			cout << "Term node score: " << nodes[i].breakage.score << endl;
		}

	//	if (nodes[i].score<0)
	//		nodes[i].score=0;
	}
}


/**********************************************************
Adds scores from incoming and outgoing edges to nodes
(useful for improving scores with PrmGraphs
***********************************************************/
void PrmGraph::add_edge_scores_to_nodes() 
{
	int i;

	const int max_node = nodes.size();
	for (i=0; i<max_node; i++)
	{
		Node& node = nodes[i];

		if (node.type != NODE_REG)
			continue;

		score_t max_in_score=-2.0;
		score_t max_out_score=-2.0;

		int j;
		for (j=0; j<node.out_edge_idxs.size(); j++)
		{
			const int edge_idx = node.out_edge_idxs[j];
			if (multi_edges[edge_idx].max_variant_score>max_out_score)
				max_out_score=multi_edges[edge_idx].max_variant_score;
		}

		for (j=0; j<node.in_edge_idxs.size(); j++)
		{
			const int edge_idx = node.in_edge_idxs[j];
			if (multi_edges[edge_idx].max_variant_score>max_in_score)
				max_in_score=multi_edges[edge_idx].max_variant_score;
		}

		score_t sum= max_in_score + max_out_score;

		if (sum>3.0)
			node.score += sum * 0.3 + 2.0;
	

	}
}









/********************************************************************
	Fills multi edges.
	Connects between nodes with peaks (non-terminal and non digest).
*********************************************************************/
void PrmGraph::fill_single_multi_edges()
{
	const vector<AA_combo>& aa_edge_combos    = config->get_aa_edge_combos();
	const vector<int>& single_edge_combo_idxs = config->get_combo_idxs_by_length(1);
	const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
	const vector<int>& session_aas = config->get_session_aas();
	const mass_t tolerance = config->get_tolerance();
	const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
	const mass_t digest_tolerance = 1.5 * tolerance;
	const mass_t half_tolerance = 0.5 * tolerance;
	const int num_nodes  = nodes.size();
	const int num_combos = single_edge_combo_idxs.size();
	const mass_t max_combos_mass = aa_edge_combos[single_edge_combo_idxs[single_edge_combo_idxs.size()-1]].total_mass;;
	const mass_t max_node_mass = nodes[num_nodes-1].mass;
	const int num_session_aas = config->get_session_aas().size();
	const int max_aa = config->get_session_aas()[num_session_aas-1];

	// check we need to allocate the single edge indicator arrays
	if (ind_vector_lengths<num_nodes)
	{
		ind_vector_lengths = (int)(num_nodes * 1.2);
		if (ind_vector_lengths<200)
			ind_vector_lengths = 200;

		if (in_aa_ind.size()<=max_aa)
			in_aa_ind.resize(max_aa+1);

		if (out_aa_ind.size()<=max_aa)
			out_aa_ind.resize(max_aa+1);

		int i;
		for (i=0; i<session_aas.size(); i++)
		{
			const int aa = session_aas[i];
			in_aa_ind[aa]  = new bool[ind_vector_lengths];
			out_aa_ind[aa] = new bool[ind_vector_lengths]; 
		}
	}

	// clear the indicator vectors
	int i;
	const int num_bytes_to_reset = sizeof(bool)*num_nodes;
	for (i=0; i<session_aas.size(); i++)
	{
		const int aa = session_aas[i];
		memset(in_aa_ind[aa],0,num_bytes_to_reset);
		memset(out_aa_ind[aa],0,num_bytes_to_reset);
	}

	// fill single aa edges
	for (i=0; i<num_nodes; i++)
	{
		if (nodes[i].type == NODE_C_TERM)
			continue;

		const mass_t& node_mass = nodes[i].mass;
		
		int last_node_idx=i;			
		int current_e_idx = -1;
		int current_c_idx = -1;
		
		int c;
		for (c=0; c<num_combos; c++)
		{
			const int& combo_idx = single_edge_combo_idxs[c];
			const AA_combo& combo = aa_edge_combos[combo_idx];
			const mass_t& combo_mass = combo.total_mass;
			const mass_t exp_connect_mass = node_mass + combo_mass;
		
			const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
			const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;

			int n_idx;
			score_t max_score = NEG_INF;
			int     max_idx   = -1;
			for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
			{
				const mass_t& next_node_mass = nodes[n_idx].mass;
				
				if (next_node_mass<min_connect_mass1)
					continue;

				if (next_node_mass>max_connect_mass1)
					break;

				if (nodes[n_idx].score>max_score)
				{
					max_idx = n_idx;
					max_score = nodes[n_idx].score;
				}
			}

			// if couldn't connect with small tolerance, try larger margin
			if (max_idx<0)
			{
				const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
				const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;

				int n_idx;
				for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
				{
					const mass_t& next_node_mass = nodes[n_idx].mass;
					
					if (next_node_mass<min_connect_mass2)
					{
						last_node_idx++;
						continue;
					}

					if (next_node_mass>max_connect_mass2)
						break;

					if (nodes[n_idx].score>max_score)
					{
						max_idx = n_idx;
						max_score = nodes[n_idx].score;
					}
				}
			}

		

			if (max_score>NEG_INF)
			{
				// need to check if distance between peaks is within tolerance
				mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);

				// try seeing if there are peaks that can bridge them
				if (min_diff>tolerance)
				{
					const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
					const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;

					int f;
					for (f=0; f<strong_fragment_idxs.size(); f++)
					{
						const int& strong_frag_idx = strong_fragment_idxs[f];
						const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos1<0)
							continue;

						const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos2<0)
							continue;

						mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
						
						const int frag_charge = config->get_fragment(strong_frag_idx).charge;
						if (frag_charge>1)
							mass_diff *= frag_charge;

						mass_t diff = fabs(mass_diff - combo_mass);

						if (diff < min_diff)
							min_diff = diff;
					}
				}

				// larger tolerance is allowed for the digest and terminal nodes because they can be
				// created relative to the terminal node
				if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
				{
					if (min_diff>digest_tolerance)
							continue;
				}
				else
				if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
				{
					if (min_diff>digest_tolerance)
						continue;
				}
				else
					if (min_diff>tolerance)
						continue;

				// add combo idx
				if (current_c_idx != max_idx)
				{
					current_e_idx = find_edge_idx_ij(i,max_idx);
					current_c_idx = max_idx;
				}

				if (current_e_idx>=0)
				{
					MultiEdge& edge = multi_edges[current_e_idx];
					
					// add the variant ptr and scores for this combo
					add_and_score_edge_variants(combo,edge);
				}
				else // create new edge
				{
					MultiEdge new_edge;
	
					new_edge.n_idx = i;
					new_edge.c_idx = max_idx;
					new_edge.n_break = &nodes[i].breakage;
					new_edge.c_break = &nodes[max_idx].breakage;
					new_edge.num_aa = 1;
				
					add_and_score_edge_variants(combo,new_edge);

					if (new_edge.variant_ptrs.size() == 0)
						continue;

					current_e_idx = multi_edges.size();
					nodes[i].out_edge_idxs.push_back(current_e_idx);
					nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
					multi_edges.push_back(new_edge);
				}

					// set amino acid indicators
				int edge_aa = combo.amino_acids[0];
				out_aa_ind[edge_aa][i]=true;
				in_aa_ind[edge_aa][max_idx]=true;

			}
		}
	}
}




/********************************************************************
	Fills in double edges.
	Uses a precomputed list (from config) of aa comobos and masses to quickly
	determine if certain edges are present.
	Flag add_overlap_edges controls if to add a double edge when the 
	same edge can be constructed by single edges.
*********************************************************************/
void PrmGraph::fill_double_multi_edges(bool add_overlap_edges)
{
	const vector<AA_combo>& aa_edge_combos    = config->get_aa_edge_combos();
	const vector<int>& double_edge_combo_idxs = config->get_combo_idxs_by_length(2);
	const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
	const mass_t tolerance = config->get_tolerance();
	const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
	const mass_t digest_tolerance = 1.5 * tolerance;
	const mass_t half_tolerance = 0.5 * tolerance;
	const int num_nodes  = nodes.size();
	const int num_combos = double_edge_combo_idxs.size();
	const mass_t max_combos_mass = aa_edge_combos[double_edge_combo_idxs[double_edge_combo_idxs.size()-1]].total_mass;;
	const mass_t max_node_mass = nodes[num_nodes-1].mass;

	int i;
	for (i=0; i<num_nodes; i++)
	{
		if (nodes[i].type == NODE_C_TERM)
			continue;

		const mass_t& node_mass = nodes[i].mass;
		
		int current_e_idx = -1;
		int current_c_idx = -1;

		int last_node_idx=i;
		
		int c;
		for (c=0; c<num_combos; c++)
		{
			const int& combo_idx = double_edge_combo_idxs[c];
			const AA_combo& combo = aa_edge_combos[combo_idx];
			const mass_t& combo_mass = combo.total_mass;

			// should this combo be check - if there is a single edge with one of the
			// amino acids in the combo skip this combo
			bool is_overlap_edge = false;
			if (out_aa_ind[combo.amino_acids[0]][i] ||
				out_aa_ind[combo.amino_acids[1]][i])
			{
				if (! add_overlap_edges)
					continue;

				is_overlap_edge = true;
			}

			const mass_t exp_connect_mass = node_mass + combo_mass;
			const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
			const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;

			int n_idx;
			score_t max_score = NEG_INF;
			int     max_idx   = -1;
			for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
			{
				const mass_t& next_node_mass = nodes[n_idx].mass;
				
				if (next_node_mass<min_connect_mass1)
					continue;

				if (next_node_mass>max_connect_mass1)
					break;

				if (nodes[n_idx].score>max_score)
				{
					max_idx = n_idx;
					max_score = nodes[n_idx].score;
				}
			}

			// if couldn't connect with small tolerance, try larger margin
			if (max_idx<0)
			{
				const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
				const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;

				int n_idx;
				for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
				{
					const mass_t& next_node_mass = nodes[n_idx].mass;
					
					if (next_node_mass<min_connect_mass2)
					{
						last_node_idx++;
						continue;
					}

					if (next_node_mass>max_connect_mass2)
						break;

					if (nodes[n_idx].score>max_score)
					{
						max_idx = n_idx;
						max_score = nodes[n_idx].score;
					}
				}
			}


			if (max_score>NEG_INF)
			{
				// need to check if distance between peaks is within tolerance
				mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);

				// try seeing if there are peaks that can bridge them
				if (min_diff>tolerance)
				{
					const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
					const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;

					int f;
					for (f=0; f<strong_fragment_idxs.size(); f++)
					{
						const int& strong_frag_idx = strong_fragment_idxs[f];
						const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos1<0)
							continue;

						const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos2<0)
							continue;

						mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
						
						const int frag_charge = config->get_fragment(strong_frag_idx).charge;
						if (frag_charge>1)
							mass_diff *= frag_charge;

						mass_t diff = fabs(mass_diff - combo_mass);

						if (diff < min_diff)
							min_diff = diff;
					}
				}

				// larger tolerance is allowed for the digest and terminal nodes because they can be
				// created relative to the terminal node
				if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
				{
					if (min_diff>digest_tolerance)
							continue;
				}
				else
				if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
				{
					if (min_diff>digest_tolerance)
						continue;
				}
				else
					if (min_diff>tolerance)
						continue;
				
				// add combo idx
				if (current_c_idx != max_idx)
				{
					current_e_idx = find_edge_idx_ij(i,max_idx);
					current_c_idx = max_idx;
				}

				if (current_e_idx>=0 && multi_edges[current_e_idx].num_aa == 2)
				{
					MultiEdge& edge = multi_edges[current_e_idx];
					
					add_and_score_edge_variants(combo,edge);

					if (is_overlap_edge)
						edge.ind_edge_overlaps = true;

				}
				else // create new edge
				{
					MultiEdge new_edge;
					
					new_edge.n_idx = i;
					new_edge.c_idx = max_idx;
					new_edge.n_break = &nodes[i].breakage;
					new_edge.c_break = &nodes[max_idx].breakage;
					new_edge.num_aa= 2;
					new_edge.ind_edge_overlaps=is_overlap_edge;

					add_and_score_edge_variants(combo,new_edge);
					if (new_edge.variant_ptrs.size() == 0)
						continue;

					current_e_idx = multi_edges.size();
					nodes[i].out_edge_idxs.push_back(current_e_idx);
					nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
					multi_edges.push_back(new_edge);
				}
			}
		}
	}
}


/********************************************************************
	Fills in long edges (similar to the double function, only looks both
	ath the out edges of node i and the in edges of node max_idx to see
	if there is a possible overlap).
	Uses a precomputed list (from config) of aa comobos and masses to quickly
	determine if certain edges are present.
	Flag add_overlap_edges controls if to add a double edge when the 
	same edge can be constructed by single edges.
*********************************************************************/
void PrmGraph::fill_longer_multi_edges(int edge_length, bool add_overlap_edges)
{
	const vector<AA_combo>& aa_edge_combos    = config->get_aa_edge_combos();
	const vector<int>& edge_combo_idxs = config->get_combo_idxs_by_length(edge_length);
	const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
	const mass_t tolerance = config->get_tolerance();
	const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
	const mass_t digest_tolerance = 1.5 * tolerance;
	const mass_t half_tolerance = 0.5 * tolerance;
	const int num_nodes  = nodes.size();
	const int num_combos = edge_combo_idxs.size();
	const mass_t max_combos_mass = aa_edge_combos[edge_combo_idxs[edge_combo_idxs.size()-1]].total_mass;;
	const mass_t max_node_mass = nodes[num_nodes-1].mass;
	

	int i;
	for (i=0; i<num_nodes; i++)
	{
		if (nodes[i].type == NODE_C_TERM)
			continue;

		const mass_t& node_mass = nodes[i].mass;
		const int length_minus_1 = edge_length-1;
		
		int current_e_idx = -1;
		int current_c_idx = -1;

		int last_node_idx=i;
		
		int c;
		for (c=0; c<num_combos; c++)
		{
			const int& combo_idx = edge_combo_idxs[c];
			const AA_combo& combo = aa_edge_combos[combo_idx];
			const mass_t& combo_mass = combo.total_mass;

			// should this combo be check - if there is a single edge with one of the
			// amino acids in the combo skip this combo
			bool is_overlap_edge = false;
			
			int a;
			for (a=0; a<edge_length; a++)
			{
				const int& aa = combo.amino_acids[a];
				if (out_aa_ind[aa][i])
					break;
			}

			if (a<edge_length)
			{
				if (! add_overlap_edges)
					continue;

				is_overlap_edge = true;
			}

			const mass_t exp_connect_mass = node_mass + combo_mass;
			const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
			const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;

			int n_idx;
			score_t max_score = NEG_INF;
			int     max_idx   = -1;
			for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
			{
				const mass_t& next_node_mass = nodes[n_idx].mass;
				
				if (next_node_mass<min_connect_mass1)
					continue;

				if (next_node_mass>max_connect_mass1)
					break;

				if (nodes[n_idx].score>max_score)
				{
					max_idx = n_idx;
					max_score = nodes[n_idx].score;
				}
			}

			// if couldn't connect with small tolerance, try larger margin
			if (max_idx<0)
			{
				const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
				const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;

				int n_idx;
				for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
				{
					const mass_t& next_node_mass = nodes[n_idx].mass;
					
					if (next_node_mass<min_connect_mass2)
					{
						last_node_idx++;
						continue;
					}

					if (next_node_mass>max_connect_mass2)
						break;

					if (nodes[n_idx].score>max_score)
					{
						max_idx = n_idx;
						max_score = nodes[n_idx].score;
					}
				}
			}


			if (max_score>NEG_INF)
			{
				// need to check if distance between peaks is within tolerance
				mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);

				// try seeing if there are peaks that can bridge them
				if (min_diff>tolerance)
				{
					const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
					const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;

					int f;
					for (f=0; f<strong_fragment_idxs.size(); f++)
					{
						const int& strong_frag_idx = strong_fragment_idxs[f];
						const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos1<0)
							continue;

						const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
						if (pos2<0)
							continue;

						mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
						
						const int frag_charge = config->get_fragment(strong_frag_idx).charge;
						if (frag_charge>1)
							mass_diff *= frag_charge;

						mass_t diff = fabs(mass_diff - combo_mass);

						if (diff < min_diff)
							min_diff = diff;
					}
				}

				// larger tolerance is allowed for the digest and terminal nodes because they can be
				// created relative to the terminal node
				if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
				{
					if (min_diff>digest_tolerance)
							continue;
				}
				else
				if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
				{
					if (min_diff>digest_tolerance)
						continue;
				}
				else
					if (min_diff>tolerance)
						continue;

				// check if this edge overlaps a subpath of shorter edges with similar 
				// amino acids (already check the node i, so only check what comes in
				// node max_idx
				int a;
				for (a=0; a<edge_length; a++)
				{
					const int& aa = combo.amino_acids[a];
					if (in_aa_ind[aa][max_idx])
						break;
				}

				if (a<edge_length)
				{
					if (! add_overlap_edges)
						continue;

					is_overlap_edge = true;
				}
				
				// add combo idx
				if (current_c_idx != max_idx)
				{
					current_e_idx = find_edge_idx_ij(i,max_idx);
					current_c_idx = max_idx;
				}

				if (current_e_idx>=0 && multi_edges[current_e_idx].num_aa == edge_length)
				{
					MultiEdge& edge = multi_edges[current_e_idx];

					add_and_score_edge_variants(combo,edge);

					if (is_overlap_edge)
						edge.ind_edge_overlaps = true;
				}
				else // create new edge
				{
					MultiEdge new_edge;
		
					new_edge.n_idx = i;
					new_edge.c_idx = max_idx;
					new_edge.n_break = &nodes[i].breakage;
					new_edge.c_break = &nodes[max_idx].breakage;
					new_edge.num_aa= edge_length;
					new_edge.ind_edge_overlaps=is_overlap_edge;

					add_and_score_edge_variants(combo,new_edge);
					if (new_edge.variant_ptrs.size() == 0)
						continue;

					current_e_idx = multi_edges.size();
					nodes[i].out_edge_idxs.push_back(current_e_idx);
					nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
					multi_edges.push_back(new_edge);
				}
			}
		}
	}
}



struct score_idx_pair {
	bool operator< (const score_idx_pair& other) const
	{
		return score>other.score;
	}

	int idx;
	score_t score;
};

/***********************************************************
	assigns a value to each node's (log) rank field
	also sets max_node_score.
************************************************************/
void PrmGraph::rank_nodes_according_to_score()
{
	vector<score_idx_pair> pairs;

	int i;
	pairs.resize(nodes.size());

	for (i=0; i<nodes.size(); i++)
	{
		pairs[i].idx=i;
		pairs[i].score = nodes[i].score;
	}

	sort(pairs.begin(),pairs.end());

	for (i=0; i<pairs.size(); i++)
		nodes[pairs[i].idx].log_rank = (float)log(2.0+i);

	max_node_score = NEG_INF;
	for (i=0; i<nodes.size(); i++)
		if (nodes[i].score>max_node_score)
			max_node_score = nodes[i].score;
}


/**********************************************************
	Sets the idx_max_in_score and idx_max_out_score fields
	for the nodes.
***********************************************************/
void PrmGraph::set_idxs_max_in_out_for_nodes()
{
	int i;
	for (i=0; i<nodes.size(); i++)
	{
		int e;
		score_t max_in_score = NEG_INF;
		nodes[i].idx_max_in_score_node=-1;
		
		const vector<int>& in_idxs = nodes[i].in_edge_idxs;
		for (e=0; e<in_idxs.size(); e++)
		{
			const int prev_node_idx = multi_edges[in_idxs[e]].n_idx;
			if (nodes[prev_node_idx].score>max_in_score)
			{
				nodes[i].idx_max_in_score_node = prev_node_idx;
				max_in_score = nodes[prev_node_idx].score; 
			}
		}

		score_t max_out_score = NEG_INF;
		nodes[i].idx_max_out_score_node=-1;

		const vector<int>& out_idxs = nodes[i].out_edge_idxs;
		for (e=0; e<out_idxs.size(); e++)
		{
			const int next_node_idx = multi_edges[out_idxs[e]].c_idx;
			if (nodes[next_node_idx].score>max_out_score)
			{
				nodes[i].idx_max_out_score_node = next_node_idx;
				max_out_score = nodes[next_node_idx].score; 
			}
		}		
	}
}



// this function performs all the scoring operations on edges 
// (amino acid scores, missing cleavage scores etc.)
// *** Most of the scoring is now done through EdgeModel !!!!!
//
score_t PrmGraph::calc_edge_variant_score(const MultiEdge& edge, int num_aa, int *aa) const
{
	const Node& n_node = nodes[edge.n_idx];
	const Node& c_node = nodes[edge.c_idx];

	if (num_aa == 1)
	{
		// check for digest edge
		if (nodes[edge.n_idx].type == NODE_DIGEST && nodes[edge.c_idx].type == NODE_C_TERM)
		{
			const vector<int>& c_term_digest_aas = config->get_c_term_digest_aas();
			int i;
			for (i=0; i<c_term_digest_aas.size(); i++)
				if (aa[0] == c_term_digest_aas[i])
					return digest_node_score;
		}

		if (nodes[edge.n_idx].type == NODE_N_TERM && nodes[edge.c_idx].type == NODE_DIGEST)
		{
			const vector<int>& n_term_digest_aas = config->get_n_term_digest_aas();
			int i;
			for (i=0; i<n_term_digest_aas.size(); i++)
				if (aa[0] == n_term_digest_aas[i])
					return digest_node_score;
		
		
		}

		return 0;
	}

	if (num_aa >1)
	{
		return (num_aa-1)*model->get_missing_breakage_score(charge,size_idx,edge.c_break->region_idx);
	}

	return 0;

}




// sets penalties that should be given if a peptide starts or ends
// at a ceratin nodes (penalties becasuse the length doesn't cover the whole peptide)
// The penalities are given per 100 Daltons of mass
void PrmGraph::set_node_penalties(Model *model)
{
	const score_t pre_penalty = 0.01 * model->get_prefix_score_penalty(source_spectrum);
	const score_t suf_penalty = 0.01 * model->get_suffix_score_penalty(source_spectrum);
	int i;

	for (i=0; i<nodes.size(); i++)
	{
		nodes[i].prefix_penalty = pre_penalty * nodes[i].mass ;
		nodes[i].suffix_penalty = suf_penalty * (pm_with_19 - nodes[i].mass - 19.0183);
	}
}



// gives maximal score attainable by traversing forward from each node
void PrmGraph::find_max_suffix_gains( vector< score_t >&  max_gains) const
{
	const int num_nodes = nodes.size();
	int n,i;

	max_gains.resize(num_nodes);
	for (i=0; i<max_gains.size(); i++)
		max_gains[i]=nodes[i].suffix_penalty;

	for (n=num_nodes-1; n>=0; n--)
	{
		const Node& node = nodes[n];
		const vector<int>& out_idxs = node.out_edge_idxs;
		int j;

		for (j=0; j<out_idxs.size(); j++)
		{
			const MultiEdge& edge = multi_edges[out_idxs[j]];
			const int & c_idx  = edge.c_idx;
			const score_t score = nodes[c_idx].score + edge.max_variant_score + max_gains[c_idx];
			
			if (score>max_gains[n])
				max_gains[n]=score;
		}
	}
}


// gives maximal score attainable by traversing forward to a node
// includes the score of the node
void PrmGraph::find_max_prefix_gains( vector< score_t >&  max_prefix_gains) const
{
	const int num_nodes = nodes.size();
	int n,i;

	max_prefix_gains.resize(num_nodes);
	for (i=0; i<max_prefix_gains.size(); i++)
		max_prefix_gains[i]=nodes[i].prefix_penalty + nodes[i].score;

	for (n=1; n<num_nodes; n++)
	{
		const Node& node = nodes[n];
		const vector<int>& in_idxs = node.in_edge_idxs;
		int j;

		for (j=0; j<in_idxs.size(); j++)
		{
			const MultiEdge& edge = multi_edges[in_idxs[j]];
			const int & n_idx  = edge.n_idx;
			const score_t score = edge.max_variant_score + max_prefix_gains[n_idx] + node.score;
			
			if (score>max_prefix_gains[n])
				max_prefix_gains[n]=score;
		}
	}
}

// marks all nodes that are not part of a path that is delta away from optimum
int PrmGraph::mark_useless_nodes(score_t delta )
{
	vector<score_t> pre_gains, suf_gains, through_scores;

	find_max_prefix_gains(pre_gains);
	find_max_suffix_gains(suf_gains);

	through_scores.resize(pre_gains.size());

	int i;
	for (i=0; i<nodes.size(); i++)
		through_scores[i]=pre_gains[i]+suf_gains[i];

	score_t max_score = NEG_INF;
	for (i=0; i<nodes.size(); i++)
		if (through_scores[i]>max_score)
			max_score = through_scores[i];

	const score_t min_score = max_score - delta;
	int count =0;

	for (i=1; i<nodes.size(); i++)
		if (through_scores[i] < min_score)
		{
			nodes[i].active = 0;
			count++;
		}
	
	return count;
}



// removes all edges to and from nodes with the active flag set to 0
void PrmGraph::remove_edges_from_inactive_nodes()
{
	int i;

	for (i=0; i<nodes.size(); i++)
	{
		if (nodes[i].active)
			continue;

		int j;
		Node& node = nodes[i];

		for (j=0; j<node.in_edge_idxs.size(); j++)
		{
			// remove edge from other node's list
			const int e_idx = node.in_edge_idxs[j];
			Node& other_node = nodes[multi_edges[e_idx].n_idx];
			int k;

			for (k=0; k<other_node.out_edge_idxs.size(); k++)
				if (other_node.out_edge_idxs[k] == e_idx)
					break;

			if (k== other_node.out_edge_idxs.size())
			{
				cout << "Error: missing out edge idx!" << endl;
				exit(1);
			}

			other_node.out_edge_idxs[k] = other_node.out_edge_idxs[other_node.out_edge_idxs.size()-1];
			other_node.out_edge_idxs.pop_back();			
		}

		for (j=0; j<node.out_edge_idxs.size(); j++)
		{
			// remove edge from other node's list
			const int e_idx = node.out_edge_idxs[j];
			Node& other_node = nodes[multi_edges[e_idx].c_idx];
			int k;

			for (k=0; k<other_node.in_edge_idxs.size(); k++)
				if (other_node.in_edge_idxs[k] == e_idx)
					break;

			if (k== other_node.in_edge_idxs.size())
			{
				cout << "Error: missing in edge idx!" << endl;
				exit(1);
			}

			other_node.in_edge_idxs[k] = other_node.in_edge_idxs[other_node.in_edge_idxs.size()-1];
			other_node.in_edge_idxs.pop_back();			
		}

		node.in_edge_idxs.clear();
		node.out_edge_idxs.clear();
	}
}






struct idx_score {
	bool operator< (const idx_score& other) const
	{
		return score>other.score;
	}
	int edge_idx;
	score_t score;
};

// sorts edges according to the value to which they lead
void PrmGraph::sort_outgoing_edges()
{
	int i;
	for (i=0; i<nodes.size(); i++)
	{
		if (nodes[i].out_edge_idxs.size()==0)
			continue;
		
		int j;
		vector<idx_score> pairs;
		pairs.resize(nodes[i].out_edge_idxs.size());
		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
		{
			pairs[j].edge_idx=nodes[i].out_edge_idxs[j];
			pairs[j].score = multi_edges[nodes[i].out_edge_idxs[j]].max_variant_score + 
							 nodes[multi_edges[nodes[i].out_edge_idxs[j]].c_idx].score;
		}
		sort(pairs.begin(),pairs.end());

		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
			nodes[i].out_edge_idxs[j]=pairs[j].edge_idx;
	}
}





struct edge_idx_pair {
	bool operator< (const edge_idx_pair& other) const
	{
		return n_idx<other.n_idx;
	}

	int edge_idx;
	int n_idx;
};


/*******************************************************************
// creates a path object from a collection of edges that are assumed
// to correspond to a path in the graph
********************************************************************/
void PrmGraph::create_path_from_edges(vector<int>& edge_idxs, MultiPath& path) const
{
	int i;
	vector<edge_idx_pair> pairs;

	if (edge_idxs.size()==0)
	{
		path.path_score = 0;
		return;
	}
	
	for (i=0; i<edge_idxs.size(); i++)
	{
		edge_idx_pair p;
		p.edge_idx = edge_idxs[i];
		p.n_idx = multi_edges[edge_idxs[i]].n_idx;

		pairs.push_back(p);
	}
	sort(pairs.begin(),pairs.end());

	path.edge_idxs.resize(pairs.size());
	for (i=0; i<pairs.size(); i++)
		path.edge_idxs[i]=pairs[i].edge_idx;

	path.n_term_mass= nodes[multi_edges[edge_idxs[0]].n_idx].mass;
	path.c_term_mass= nodes[multi_edges[edge_idxs[edge_idxs.size()-1]].c_idx].mass;

	for (i=1; i<path.edge_idxs.size(); i++)
	{
		if (multi_edges[path.edge_idxs[i]].n_idx != multi_edges[path.edge_idxs[i-1]].c_idx)
		{
			cout << "Error: inconsistent edges when creating path!" << endl;
			exit(1);
		}
	}

	// collect breakage info and edges

	path.edge_idxs = edge_idxs;
	path.breakages.clear();
	path.node_idxs.clear();
	for (i=0; i<edge_idxs.size(); i++)
	{
		path.breakages.push_back(multi_edges[edge_idxs[i]].n_break);
		path.node_idxs.push_back(multi_edges[edge_idxs[i]].n_idx);
	}
	path.breakages.push_back(multi_edges[edge_idxs[i-1]].c_break);
	path.node_idxs.push_back(multi_edges[edge_idxs[i-1]].c_idx);	
}



// finds the highest scoring continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_highest_scoring_subpath(const Peptide& peptide, mass_t start_mass) const
{
	SeqPath ret_path;
	const vector<int>& path_aas = peptide.get_amino_acids();
	mass_t pre_mass = pre_mass = start_mass;
	mass_t double_tolerance = config->get_tolerance() * 3.0;
	vector<bool> use_as_start_pos;
	int i;
	
	use_as_start_pos.resize(nodes.size(),true);
	ret_path.path_score = 0;

	// give start idx double tolerance
	for (i=0; i<path_aas.size(); i++)
	{
		int j;
		PeakRange nr = this->get_nodes_in_range(pre_mass - double_tolerance, pre_mass + double_tolerance);
		for (j=0; j<nr.num_peaks; j++)
		{
			int node_idx = nr.low_idx+j;
			if (! use_as_start_pos[node_idx])
				continue;

			// find max correct subpath from this node
			SeqPath path;

			path.n_term_mass = nodes[node_idx].mass;
			path.c_term_mass = nodes[node_idx].mass;
			path.path_score = 0;
			path.positions.clear();

			// loop until end is reached
			int k=i;
			
			int lass_good_edge_idx = -1;
			while (k<path_aas.size())
			{
				const Node& node = nodes[node_idx];
				int e;
				for (e=0; e<node.out_edge_idxs.size(); e++)
				{
					const int& e_idx = node.out_edge_idxs[e];
					const MultiEdge& edge = multi_edges[e_idx];

					int var_idx = edge.get_variant_idx(1,(int *)&path_aas[k]);
					if (var_idx<0 && i<path_aas.size()-1)
						var_idx = edge.get_variant_idx(2,(int *)&path_aas[k]);

					if (var_idx>=0)
					{
						path.add_edge_variant(edge,e_idx,var_idx);
						k+=edge.num_aa;
						lass_good_edge_idx = e_idx;
						break;
					}
				}
				if (e == node.out_edge_idxs.size())
					break;
			}

			// add last position
			if (lass_good_edge_idx>=0)
			{
				const MultiEdge& last_edge = multi_edges[lass_good_edge_idx];
				PathPos last_pos;

				last_pos.breakage = last_edge.c_break;
				last_pos.edge_idx =-1;
				last_pos.mass = last_edge.c_break->mass;
				last_pos.node_idx = last_edge.c_idx;
				last_pos.node_score = last_edge.c_break->score;
				path.positions.push_back(last_pos);
				
				path.path_score += last_pos.node_score;
				path.c_term_mass = last_pos.mass;
			}

			// check if this path is better
			if (path.path_score>ret_path.path_score)
				ret_path = path;
		}	

		pre_mass += config->get_aa2mass()[path_aas[i]];
	}
	
	return ret_path;
}





// finds the longest continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_longest_subpath(const Peptide& peptide, mass_t start_mass, bool verbose) const
{
	const int max_edge_length     = config->get_max_edge_length();
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	vector<mass_t> prefix_masses;
	vector<int> path_aas = peptide.get_amino_acids();
	vector<int> path_edges;
	vector<int> path_aa_count;
	int a_start=-1;
	mass_t pre_mass = 0;
	mass_t double_tolerance = config->get_tolerance() * 1.75;
	int i;

	path_edges.clear();

	for (i=0; i<path_aas.size(); i++)
		if (path_aas[i]==Ile)
			path_aas[i]=Leu;

	prefix_masses.push_back(start_mass);
	for (i=0; i<path_aas.size(); i++)
		prefix_masses.push_back(prefix_masses[i]+aa2mass[path_aas[i]]);
	
	// give start idx double tolerance
	for (i=0; i<path_aas.size(); i++)
	{
		int j;
		const mass_t pre_mass = prefix_masses[i];
		PeakRange nr = this->get_nodes_in_range(pre_mass - double_tolerance, pre_mass + double_tolerance);
		for (j=0; j<nr.num_peaks; j++)
		{
			int node_idx = nr.low_idx+j;
			vector<int> e_idxs, aa_count;
			e_idxs.clear();
			aa_count.clear();
			
		
			// find max correct subpath from this node

			int k;
			int n=node_idx;
			for (k=i; k<path_aas.size(); k++)
			{
				int q;
				int curr_aa = path_aas[k];
				bool found_edge=false;
				
				// look for aas in edges
				// start with short edges and increase length
				int num_aa;
				for (num_aa = 1; num_aa<=max_edge_length; num_aa++)
				{
					if (num_aa + k > path_aas.size())
						break;

					for (q=0; q<nodes[n].out_edge_idxs.size(); q++)
					{
						int e_idx = nodes[n].out_edge_idxs[q];
						const MultiEdge& edge = multi_edges[e_idx];

						if (verbose && i == 0 && num_aa == 2 && k == 4 && e_idx == 528)
						{
							const vector<string>& a = config->get_aa2label();
							cout << "Looking for " << path_aas[k] << " " << path_aas[k+1] << "  ";
							cout << a[path_aas[k]] << a[path_aas[k+1]] << endl;
						}
	
						if (edge.has_variant(num_aa,&path_aas[k]) )
						{
							found_edge=true;
							e_idxs.push_back(e_idx);
							aa_count.push_back(num_aa);
							n = edge.c_idx;

							k+= num_aa -1;
							
							break;
						}			
					}

					if (found_edge)
						break;
				}
					
				if (! found_edge)
					break;
			}

			if (e_idxs.size()>path_edges.size())
			{
				path_edges = e_idxs;
				path_aa_count = aa_count;
				a_start=i;
			}
		}
	}

	// create path
	// need to add treatment of modifications to terminals


	SeqPath ret_path;
	int a_pos= a_start;

	if (verbose)
		cout << endl << "Starting at aa " << a_pos << endl;

	for (i=0; i<path_edges.size(); i++)
	{
		const MultiEdge& edge = multi_edges[path_edges[i]];

		int variant_idx = edge.get_variant_idx(edge.num_aa,&path_aas[a_pos]);
		if (variant_idx<0)
		{
			cout << "Error: edge does not contain variant idx!" << endl;
			exit(1);
		}
		
		if (verbose)
			cout << i<< " " << edge.n_idx << " (" << path_aa_count[i] << ") " << 
			edge.c_idx << endl;

		PathPos pos;
		
		pos.node_idx = edge.n_idx;
		pos.node_score = nodes[edge.n_idx].score;
		pos.edge_varaint_score = edge.variant_scores[variant_idx];
		pos.breakage   = (Breakage *)&nodes[edge.n_idx].breakage;
		pos.edge_idx   = path_edges[i];
		pos.mass       = nodes[edge.n_idx].mass;
		pos.aa         = path_aas[a_pos];

		ret_path.positions.push_back(pos);

		a_pos++;

		// add skipped positions for the rest of the amino acids in the edge
		int j;
		for (j=1; j<path_aa_count[i]; j++)
		{
			PathPos pos;
		
			pos.aa         = path_aas[a_pos++];
			ret_path.positions.push_back(pos);

		}		
	}

	// add last pos
	if (path_edges.size()>0)
	{
		const MultiEdge& last_edge = multi_edges[path_edges[path_edges.size()-1]];
		PathPos pos;

		pos.node_idx = last_edge.c_idx;
		pos.node_score = nodes[last_edge.c_idx].score;
		pos.breakage   = (Breakage *)&nodes[last_edge.c_idx].breakage;
		pos.mass       = nodes[last_edge.c_idx].mass;

		ret_path.positions.push_back(pos);

		ret_path.n_term_mass = ret_path.positions[0].mass;
		ret_path.c_term_mass = pos.mass;

		ret_path.path_score =0;
		int i;

		for (i=0; i<ret_path.positions.size(); i++)
		{
			ret_path.path_score += (ret_path.positions[i].node_score + ret_path.positions[i].edge_varaint_score);
		//	cout << ret_path.positions[i].node_score << "  " <<  ret_path.positions[i].edge_varaint_score <<
		//			" t: " << ret_path.path_score << endl;
		}
	}
		

	


//	ret_path.num_aa = ret_path.positions.size()-1;

	ret_path.make_seq_str(config);
	
	

	return ret_path; 
}







/***********************************************************************
// returns the idxs of nodes correponding to the expected breakages of the peptide
// returns -1 for an idx of a missing node
************************************************************************/
void PrmGraph::get_all_correct_node_idxs(const Peptide& peptide, vector<int>& idxs) const
{
	const mass_t tolerance = config->get_tolerance()*1.5;
	vector<mass_t> break_masses;
	int i;

	peptide.calc_expected_breakage_masses(config,break_masses);

	idxs.clear();
	for (i=0; i<break_masses.size(); i++)
	{
		int idx= this->get_max_score_node(break_masses[i],tolerance);
	//	if (idx<0)
	//		continue;
		idxs.push_back(idx);
	}
}


/***********************************************************************
   returns the idxs of nodes correponding to the mirrors of the
   expected breakages of the peptide
   returns -1 for an idx of a missing node
************************************************************************/
void PrmGraph::get_all_mirror_node_idxs(const Peptide& peptide, vector<int>& idxs) const
{
	const mass_t tolerance = config->get_tolerance()*1.5;
	vector<mass_t> break_masses;
	int i;

	peptide.calc_expected_breakage_masses(config,break_masses);


	idxs.clear();
	for (i=break_masses.size()-1; i>=0; i--)
	{
		int idx= this->get_max_score_node(pm_with_19 - break_masses[i] + 1.0078,tolerance);
	//	if (idx<0)
	//		continue;

		idxs.push_back(idx);
	}
}

/***********************************************************************
// returns the idxs of nodes correponding to the expected breakages of the peptide
************************************************************************/
void PrmGraph::get_relevant_node_idxs(const Peptide& peptide, vector<int>& idxs) const
{
	const mass_t tolerance = config->get_tolerance()*2.0;
	vector<mass_t> break_masses;
	int i;

	peptide.calc_expected_breakage_masses(config,break_masses);

	idxs.clear();
	for (i=0; i<break_masses.size(); i++)
	{
		int idx= this->get_max_score_node(break_masses[i],tolerance);
		if (idx<0)
			continue;

		idxs.push_back(idx);
	}
}








/************************************************************
	Finds the masses of the highest scoring nodes in the lower
	10% of the nodes and the higher 90%.
*************************************************************/
void PrmGraph::set_significant_masses() 
{
	score_t max_score=NEG_INF;
	min_significant_mass=0;

	int i, num_nodes_to_check = (int)(nodes.size() * 0.11);

	for (i=1; i<=num_nodes_to_check; i++)
	{
		if (nodes[i].score>max_score)
		{
			min_significant_mass = nodes[i].mass;
			min_significant_idx = i;
		}
	}

	max_score = NEG_INF;
	const int max_num_nodes = nodes.size()-1;
	for (i=max_num_nodes -num_nodes_to_check; i<=max_num_nodes && i>0; i++)
	{
		if (nodes[i].score>max_score)
		{
			max_significant_mass = nodes[i].mass;
			max_significant_idx = i;
		}
	}
}



void Node::print(Config *config, ostream& os) const
{
	os << mass << " " << breakage.region_idx 
	   << ","  << source_frag_type_idx << "," << type << " s: " << setw(4) << score << " ";

	breakage.print_fragments(config,os);
	os << endl;
}




void PrmGraph::print_multi_edges(int node_idx, bool print_edge_probs) const
{
	const vector<AA_combo>& aa_edge_combos = config->get_aa_edge_combos();
	int e;
	const vector<string>& aa2label = config->get_aa2label();
	for (e=0; e<nodes[node_idx].out_edge_idxs.size(); e++)
	{
		int a;
		int edge_idx = nodes[node_idx].out_edge_idxs[e];
		
		cout << " (n:" << multi_edges[edge_idx].c_idx << " e:" << edge_idx << " ";

		int j;
		for (j=0; j<multi_edges[edge_idx].variant_ptrs.size(); j++)
		{
			
			int *p = multi_edges[edge_idx].variant_ptrs[j];
			int num_aa = *p++;
			int *aas = p;
			for (a=0; a<num_aa; a++)
				cout << aa2label[aas[a]];
			cout <<" ";
			if (print_edge_probs)
				cout << fixed << setprecision(2) << multi_edges[edge_idx].variant_probs[j] <<" ";
		}
		cout << ") ";
	}
	cout << endl;
}


void PrmGraph::print_only_scores() const
{
	int i;

	cout << setprecision(3) << fixed;
	for (i=0; i<nodes.size(); i++)
	{
		cout << nodes[i].mass << " " << nodes[i].score << endl;
	}
}


void PrmGraph::print(ostream& os, bool print_edge_probs) const
{
	int i;
	for (i=0; i<nodes.size(); i++)
	{
		os << fixed << setw(4) << left << i << " " << setw(8) << setprecision(3) << nodes[i].mass << " " << nodes[i].breakage.region_idx 
		   << ","  << nodes[i].source_frag_type_idx << "," << nodes[i].type << " s: " << setw(4) << nodes[i].score << " ";

		nodes[i].breakage.print_fragments(config,os);
		os << endl;
		if (nodes[i].out_edge_idxs.size()>0)
			print_multi_edges(i,print_edge_probs);

		os << endl;
	}
}




void PrmGraph::print_with_multi_edges() const
{
	int i;
	for (i=0; i<nodes.size(); i++)
	{
		cout << setw(4) << left << i << " " << setw(8) << setprecision(3) << nodes[i].mass << " " << nodes[i].breakage.region_idx 
		   << ","  << nodes[i].source_frag_type_idx << "," << nodes[i].type << " s: " << setw(4) << nodes[i].score << " ";

		nodes[i].breakage.print_fragments(config,cout);
		cout << endl;
		if (nodes[i].out_edge_idxs.size()>0)
			print_multi_edges(i,true);

		cout << endl;
	}

}










