#ifndef __PRMGRAPH_H__
#define __PRMGRAPH_H__

#include "Spectrum.h"
#include "Model.h"
#include "includes.h"


#define BONUS_FOR_COMPLETE_PEPTIDE 25.0


class PrmGraph;


struct SeqPath;

// This structure contains a path that can be expanded to many variants
struct MultiPath {

	MultiPath() : original_rank(-1), n_term_mass(-1), c_term_mass(-1),
		n_term_aa(N_TERM), c_term_aa(C_TERM), ind_compete_path(0), path_score(0) {};

	bool check_if_correct(const Peptide& p, Config *config) const;

	int  get_num_correct_aas(const PrmGraph& prm, const Peptide& p, Config *config) const;

	int  get_num_aas() const;

	void print(Config *config, ostream& os=cout) const;



	int original_rank;
	mass_t n_term_mass;
	mass_t c_term_mass;

	int n_term_aa;
	int c_term_aa;

	int ind_compete_path;   // set to 1 if the path is from the N-terminal to the C-terminal

	score_t  path_score;

	vector<int> edge_idxs; // for the multi edges
	vector<int> node_idxs;
	vector<Breakage *> breakages;
};





// The structure for a single path corresponsing to one amino acid sequence
struct SeqPath {

	SeqPath() : multi_path_rank(-1), n_term_mass(-1), c_term_mass(-1),
		n_term_aa(N_TERM), c_term_aa(C_TERM), pm_with_19(-1), charge(0),
		path_score(0), seq_prob(0), sort_key(0) {};


	void print(ostream &os = cout) const;

	void print_with_probs(ostream &os = cout) const;

	void print_full(Config *config, ostream &os = cout) const;

	void get_amino_acids(vector<int>& aas) const
	{
		int i;
		int num_aa = positions.size()-1;
		aas.resize(num_aa);
		for (i=0; i<num_aa; i++)
			aas[i]=positions[i].aa;
	}


	// calculates score according to the aa probabilties
	// gives premium if the path belonged to a denovo solution 
	void calc_confidence_score(int denovo_rank=-1);

	int get_num_correct_aas(Peptide& pep, Config *config) const;


	bool operator< (const SeqPath& other) const
	{
		return (path_score>other.path_score);
	}

	
	bool check_if_correct(const string& str, Config *config) const;

	// returns the number of b/y ions
	int get_num_frags(const vector<int>& frag_idxs) const;


	void parse_path_to_smaller_ones(Config *config,
		int min_length, int max_length, vector<SeqPath>& new_paths) const;


	int get_num_aa() const { return positions.size()-1; }


	// adds the relevant PathPos to the path and adjusts the other non-terminal values 
	void add_edge_variant(const MultiEdge& edge, int e_idx, int varaint_idx);

	void make_seq_str(Config *config);

	vector<PathPos> positions; // there are n+1 positions for n amino acids
	int multi_path_rank;
		
	mass_t n_term_mass;
	mass_t c_term_mass;

	int n_term_aa;
	int c_term_aa;

	mass_t pm_with_19;
	int    charge;

	score_t path_score;
	
	float	seq_prob;

	float	sort_key; // according to the seq heap is sorted, higher value gets first position in heap

	string seq_str;
};






class PrmGraph {
	friend class DeNovoDp;
	friend class EdgeModel;
public:

	void create_graph_from_spectrum(Model *model, Spectrum *spectrum, 
					mass_t pm_with_19, int spec_charge=0);

	// gives maximal score attainable by traversing forward to a node
	// includes the score of the node
	void find_max_prefix_gains( vector< score_t >&  max_prefix_gains) const;
	
	// gives maximal score attainable by traversing forward from each node
	void find_max_suffix_gains( vector< score_t >&  max_gains) const;

	// marks all nodes that are not part of a path that is delta away from optimum
	// returns how many nodes were marked
	int mark_useless_nodes(score_t delta = 50);


	// removes all edges to and from nodes with the active flag set to 0
	void remove_edges_from_inactive_nodes();

	// finds the highest scoring continuous subpath for a given peptide in the graph
	SeqPath get_highest_scoring_subpath(const Peptide& peptide, mass_t start_mass =0) const;

	SeqPath get_longest_subpath(const Peptide& peptide, mass_t start_mass,
						bool verbose = false) const;


	int get_num_nodes() const { return nodes.size(); }
	Spectrum *get_source_spectrum() const { return source_spectrum; }
	Config   *get_config() const { return config; }
	Model    *get_model()  const { return model;  }


	
	void expand_multi_path(const MultiPath& multi_path, 
		                   vector<SeqPath>& seq_paths,
						   int max_num_paths = -1) const;

	void expand_all_multi_paths(const vector<MultiPath>& multi_paths, 
							 vector<SeqPath>& paths, int max_num_paths = 100) const;

	void get_all_correct_node_idxs(const Peptide& peptide, vector<int>& idxs) const;

	void get_all_mirror_node_idxs(const Peptide& peptide, vector<int>& idxs) const;

	// returns the idxs of nodes correponding to the expected breakages of the peptide
	void get_relevant_node_idxs(const Peptide& peptide, vector<int>& idxs) const;



	const MultiEdge& get_multi_edge(int e_idx) const { return multi_edges[e_idx]; }
	const Node& get_node(int n_idx) const { return nodes[n_idx]; }
	const vector<MultiEdge>& get_multi_edges() const { return multi_edges; }
	const vector<Node>& get_nodes() const { return nodes; }
	

	mass_t get_max_node_mass() const { return nodes[nodes.size()-1].mass; }

	void   set_significant_masses();

	mass_t get_min_significant_mass() const { return min_significant_mass; }
	mass_t get_max_significant_mass() const { return max_significant_mass; }
	int	   get_min_significant_idx() const { return min_significant_idx; }
	int    get_max_significant_idx() const { return max_significant_idx; }

	score_t    get_max_node_score() const { return max_node_score; }

	// for quick access to nodes array
	PeakRange get_nodes_in_range(mass_t min_r, mass_t max_r) const;
	int get_max_score_node(mass_t exp_mass, mass_t tolerance) const;
	int get_min_dis_node(mass_t mass, mass_t tolerance) const;

	mass_t find_optimal_pm_with_19_for_graph(Model *_model, 
				Spectrum *spectrum, mass_t base_pm_with_19, int spec_charge);

	void print(ostream& os = cout, bool edge_probs = false) const;

	void print_only_scores() const;

	void add_edge_scores_to_nodes(); 

	void clear() { nodes.clear(); multi_edges.clear(); index_array.clear(); 
									source_spectrum=NULL; config=NULL; }

	void print_with_multi_edges() const;

private:
	vector<Node> nodes;
	vector<MultiEdge> multi_edges;

	vector<int>  index_array;

	Spectrum * source_spectrum;
	Config   * config;
	Model    * model;

	mass_t   pm_with_19; // the pm_with_19 used to create the graph
	int      size_idx;   // 
	int      charge;

	mass_t min_significant_mass;  // the mass of the highest scoring node in the below 10% mass range
	mass_t max_significant_mass;  // the mass of the highest scoring node in the above 90% mass range

	int    min_significant_idx, max_significant_idx;

	score_t max_node_score;

	score_t digest_node_score;

	
	static vector<bool *> out_aa_ind, in_aa_ind; // holds for each amino acid and node
												 // whether there is a single aa leaving that node
	static int ind_vector_lengths;

	// functions
	void init_index_array();
	void merge_close_nodes();
	void create_nodes();
	void add_digest_nodes();
	void score_nodes(Model *model);
	

	// this function performs all the scoring operations on edges 
	// (amino acid scores, missing cleavage scores etc.)
	score_t calc_edge_variant_score(const MultiEdge& egde, int num_aa, int *aa) const;

	// add the variant ptr and scores for this combo
	void add_and_score_edge_variants(const AA_combo& combo, MultiEdge& edge);


	// fill edges
	void fill_single_multi_edges();
	
	void fill_double_multi_edges(bool add_overlap_edges=false);

	void fill_longer_multi_edges(int max_edge_length, bool add_overlap_edges=false);

	// assigns a value to each node's rank field
	void rank_nodes_according_to_score();

	void set_idxs_max_in_out_for_nodes();

	


	// finds the edge idx of the edge from node i to j, returns -1 if no such node exists
	int find_edge_idx_ij(int i_idx, int j_idx) const
	{
		const vector<int>& out_idxs = nodes[i_idx].out_edge_idxs;
		int e;
		for (e=out_idxs.size()-1; e>=0; e--)
			if (multi_edges[out_idxs[e]].c_idx == j_idx)
				return out_idxs[e];
		return -1;
	}


	// sets penalties that should be given if a peptide starts or ends
	// at a ceratin nodes (penalties becasuse the length doesn't cover the whole peptide)
	void set_node_penalties(Model *model);



	// sorts edges according to the value to which they lead
	void sort_outgoing_edges();


	// creates a path object from a collection of edges that are assumed
	// to correspond to a path in the graph
	void create_path_from_edges(vector<int>& edge_idxs, MultiPath& path) const;


	void print_multi_edges(int node_idx, bool print_edge_probs = false) const;
};


// more or less the same functions as the map with the spectrum
// why not use template? good question...
// this seems easier...

/*****************************************************************
Returns the indices of all peaks that are within the mass range
******************************************************************/
inline PeakRange  PrmGraph::get_nodes_in_range(mass_t min_r, mass_t max_r) const
{
	PeakRange pr;

	int max_node_idx = nodes.size()-1;

	if (max_r>= pm_with_19-2)
		max_r = pm_with_19-2;

	if (min_r<0)
		min_r=0;

	if (max_r<min_r)
		return pr;

	int i_min=index_array[(int)min_r];
	int i_max=index_array[((int)max_r)];
		
	if (i_max<max_node_idx)
		i_max++;

	while (nodes[i_min].mass<min_r && i_min<i_max)
		i_min++;

	while (nodes[i_max].mass>max_r && i_max>0)
		i_max--;

	if (nodes[i_min].mass > max_r || nodes[i_max].mass<min_r)
		return pr;

	pr.num_peaks = i_max - i_min+1;
	pr.low_idx =   i_min;
	pr.high_idx =  i_max;

	return pr;
}


/***********************************************************
returns the idx of the peak that is closes to the mass
(-1 is returned if no peak is found)
************************************************************/
inline int PrmGraph::get_min_dis_node(mass_t mass, mass_t tolerance) const
{
	PeakRange pr = get_nodes_in_range(mass-tolerance,mass+tolerance);
	if (pr.num_peaks==0)
		return -1;
	mass_t min_dis=fabs(mass - nodes[pr.low_idx].mass);
	int min_idx=pr.low_idx;
	int j;

	// find closest peak to exp_pos
	for (j=1; j<pr.num_peaks; j++)
	{
		mass_t dis = fabs(mass - nodes[pr.low_idx+j].mass);
		if (dis<min_dis)
		{
			min_dis=dis;
			min_idx = pr.low_idx+j;
		}
	}
	return min_idx;
}

/***********************************************************
returns the idx of the peak that has the highest intensity
in the range (-1 is returned if no peak is found)
Balances between intensity and proximity to the expected position
A peak at the edge needs to be at least 2 times stronger than
a peak exactly at the middle to be chosen.
************************************************************/
inline int PrmGraph::get_max_score_node(mass_t exp_mass, mass_t tolerance) const
{
	PeakRange pr = get_nodes_in_range(exp_mass-tolerance,exp_mass+tolerance);
	if (pr.num_peaks==0)
		return -1;

	if (pr.num_peaks ==1)
		return pr.low_idx;

	mass_t max_val = (2*tolerance - fabs(nodes[pr.low_idx].mass - exp_mass)) *
								nodes[pr.low_idx].score;

	int max_idx=pr.low_idx;
	int j;
	// find closest peak to exp_pos
	for (j=1; j<pr.num_peaks; j++)
	{
		mass_t peak_val = (2*tolerance - fabs(nodes[pr.low_idx+j].mass - exp_mass)) *
								nodes[pr.low_idx+j].score;
		if (peak_val>max_val)
		{
			max_val=peak_val;
			max_idx = pr.low_idx+j;
		}
		else
			break;
	}
	return max_idx;
}




#endif

