#ifndef __PMSQS_H__
#define __PMSQS_H__

#include "ME_REG.h"
#include "QuickClustering.h"
#include "includes.h"

#define MIN_SPECTRA_FOR_PMCSQS_MODEL 100

// The 3 SQS model makes binary decisions: good charge c spectrum or not good for charges 
// c=1,2,3.
// The PMC model assumes that we have a good spectrum and in the correct charge.
// 




typedef enum SQS_Fields 
{ 
	//0
	SQS_CONST,
	SQS_NUM_PEAKS,	  SQS_SQR_NUM_PEAKS,

	SQS_IND_BELOW_1000,		 SQS_IND_MZ_ABOVE_1000,
	SQS_M_OVER_Z_BELOW_1000, SQS_M_OVER_Z_ABOVE_1000,

	SQS_PROP_UPTO2G,  SQS_PROP_UPTO5G, 
	SQS_PROP_UPTO10G, SQS_PROP_MORE10G,

	//14
	SQS_PROP_INTEN_UPTO2G, SQS_PROP_INTEN_UPTO5G, SQS_PROP_INTEN_MORE5G,

	SQS_PROP_ISO_PEAKS, SQS_PROP_STRONG_WITH_ISO_PEAKS,

	//19
	SQS_PROP_ALL_WITH_H2O_LOSS,    SQS_PROP_ALL_WITH_NH3_LOSS,    SQS_PROP_ALL_WITH_CO_LOSS,
	SQS_PROP_STRONG_WITH_H2O_LOSS, SQS_PROP_STRONG_WITH_NH3_LOSS, SQS_PROP_STRONG_WITH_CO_LOSS,
	

	SQS_IND_MAX_TAG_LENGTH_ABOVE_4, SQS_IND_MAX_TAG_LENGTH_BELOW_4,
	SQS_MAX_TAG_LENGTH_ABOVE_4,     SQS_MAX_TAG_LENGTH_BELOW_4,
	SQS_PROP_INTEN_IN_TAGS,
	SQS_PROP_TAGS1, SQS_PROP_STRONG_PEAKS_IN_TAG1, SQS_PROP_INTEN_TAG1, 
	SQS_PROP_STRONG_BELOW30_TAG1,
	SQS_PROP_TAGS2, SQS_PROP_STRONG_PEAKS_IN_TAG2, SQS_PROP_INTEN_TAG2,	
	SQS_PROP_STRONG_BELOW20_TAG2,
	SQS_PROP_TAGS3, SQS_PROP_STRONG_PEAKS_IN_TAG3, SQS_PROP_INTEN_TAG3, 
	SQS_PROP_STRONG_BELOW10_TAG3,
	
	SQS_PEAK_DENSE_T1_I,  SQS_PEAK_DENSE_T2_I,  SQS_PEAK_DENSE_T3_I,
	SQS_INTEN_DENSE_T1_I, SQS_INTEN_DENSE_T2_I, SQS_INTEN_DENSE_T3_I,

	SQS_PEAK_DENSE_H1_I,  SQS_PEAK_DENSE_H2_I,
	SQS_INTEN_DENSE_H1_I, SQS_INTEN_DENSE_H2_I,

	SQS_PEAK_DENSE_T1_II,  SQS_PEAK_DENSE_T2_II,  SQS_PEAK_DENSE_T3_II,
	SQS_INTEN_DENSE_T1_II, SQS_INTEN_DENSE_T2_II, SQS_INTEN_DENSE_T3_II,

	SQS_PEAK_DENSE_H1_II,  SQS_PEAK_DENSE_H2_II,
	SQS_INTEN_DENSE_H1_II, SQS_INTEN_DENSE_H2_II,

	SQS_PROP_MZ_RANGE_WITH_33_INTEN_I,
	SQS_PROP_MZ_RANGE_WITH_50_INTEN_I,
	SQS_PROP_MZ_RANGE_WITH_75_INTEN_I,
	SQS_PROP_MZ_RANGE_WITH_90_INTEN_I,

	SQS_PROP_MZ_RANGE_WITH_33_INTEN_II,
	SQS_PROP_MZ_RANGE_WITH_50_INTEN_II,
	SQS_PROP_MZ_RANGE_WITH_75_INTEN_II,
	SQS_PROP_MZ_RANGE_WITH_90_INTEN_II,


	SQS_NUM_FRAG_PAIRS_1,			SQS_NUM_STRONG_FRAG_PAIRS_1,
	SQS_NUM_C2_FRAG_PAIRS_1,		SQS_NUM_STRONG_C2_FRAG_PAIRS_1,

	SQS_NUM_FRAG_PAIRS_2,			SQS_NUM_STRONG_FRAG_PAIRS_2,
	SQS_NUM_C2_FRAG_PAIRS_2,		SQS_NUM_STRONG_C2_FRAG_PAIRS_2,

	SQS_NUM_FRAG_PAIRS_3,			SQS_NUM_STRONG_FRAG_PAIRS_3,
	SQS_NUM_C2_FRAG_PAIRS_3,		SQS_NUM_STRONG_C2_FRAG_PAIRS_3,

	SQS_PROP_OF_MAX_FRAG_PAIRS_1,			SQS_PROP_OF_MAX_STRONG_FRAG_PAIRS_1,
	SQS_PROP_OF_MAX_C2_FRAG_PAIRS_1,		SQS_PROP_OF_MAX_STRONG_C2_FRAG_PAIRS_1,

	SQS_PROP_OF_MAX_FRAG_PAIRS_2,			SQS_PROP_OF_MAX_STRONG_FRAG_PAIRS_2,
	SQS_PROP_OF_MAX_C2_FRAG_PAIRS_2,		SQS_PROP_OF_MAX_STRONG_C2_FRAG_PAIRS_2,

	SQS_PROP_OF_MAX_FRAG_PAIRS_3,			SQS_PROP_OF_MAX_STRONG_FRAG_PAIRS_3,
	SQS_PROP_OF_MAX_C2_FRAG_PAIRS_3,		SQS_PROP_OF_MAX_STRONG_C2_FRAG_PAIRS_3,

	// get these from the PM features (max values found)
	// all values are after subtracting the maximum background values
	// obtained by using erroneous parent masses

	SQS_PROP_FRAG_PAIRS_1,			SQS_PROP_STRONG_FRAG_PAIRS_1,
	SQS_PROP_C2_FRAG_PAIRS_1,		SQS_PROP_STRONG_C2_FRAG_PAIRS_1,

	SQS_PROP_FRAG_PAIRS_2,			SQS_PROP_STRONG_FRAG_PAIRS_2,
	SQS_PROP_C2_FRAG_PAIRS_2,		SQS_PROP_STRONG_C2_FRAG_PAIRS_2,

	SQS_PROP_FRAG_PAIRS_3,			SQS_PROP_STRONG_FRAG_PAIRS_3,
	SQS_PROP_C2_FRAG_PAIRS_3,		SQS_PROP_STRONG_C2_FRAG_PAIRS_3,


	SQS_NUM_FIELDS 
} SQS_Fields;


typedef enum PMC_Fields 
{ 
	PMC_CONST,

	PMC_DIFF_FROM_MEASURED_MZ,
	PMC_ABS_DIFF_FROM_MEASURED_MZ,

	PMC_IND_IS_P0, PMC_IND_IS_P1, PMC_IND_IS_P2, PMC_IND_IS_P3, 
	PMC_IND_IS_M1, PMC_IND_IS_M2, PMC_IND_IS_M3, PMC_IND_IS_M4, 

	PMC_IND_HAS_NO_PAIRS,			PMC_IND_HAS_NO_C2_PAIRS,
	PMC_IND_HAS_NO_STRONG_PAIRS,	PMC_IND_HAS_NO_C2_STRONG_PAIRS,

	PMC_IND_HAS_PAIRS,				PMC_IND_HAS_C2_PAIRS,
	PMC_IND_HAS_STRONG_PAIRS,		PMC_IND_HAS_C2_STRONG_PAIRS,
	PMC_IND_HAS_BOTH_PAIRS,			PMC_IND_HAS_BOTH_STRONG_PAIRS,

	// tolerance values
	PMC_AVG_TOL_OVER_PAIRS,				PMC_AVG_TOL_OVER_STRONG_PAIRS,
	PMC_AVG_TOL_SQR_OVER_PAIRS,			PMC_AVG_TOL_SQR_OVER_STRONG_PAIRS,

	PMC_AVG_TOL_OVER_C2_PAIRS,			PMC_AVG_TOL_OVER_C2_STRONG_PAIRS,
	PMC_AVG_TOL_SQR_OVER_C2_PAIRS,		PMC_AVG_TOL_SQR_OVER_C2_STRONG_PAIRS,

	// comapred to optimal tolerance values
	PMC_AVG_DIFF_TOL_OVER_PAIRS,			PMC_AVG_DIFF_TOL_OVER_STRONG_PAIRS,
	PMC_AVG_DIFF_TOL_SQR_OVER_PAIRS,		PMC_AVG_DIFF_TOL_SQR_OVER_STRONG_PAIRS,

	PMC_AVG_DIFF_TOL_OVER_C2_PAIRS,			PMC_AVG_DIFF_TOL_OVER_C2_STRONG_PAIRS,
	PMC_AVG_DIFF_TOL_SQR_OVER_C2_PAIRS,		PMC_AVG_DIFF_TOL_SQR_OVER_C2_STRONG_PAIRS,

	// values in raw numbers
	PMC_NUM_FRAG_PAIRS,				PMC_NUM_STRONG_FRAG_PAIRS,
	PMC_NUM_C2_FRAG_PAIRS,			PMC_NUM_STRONG_C2_FRAG_PAIRS,

	// indicators
	PMC_IND_BEST_NUM_FRAG_PAIRS,		PMC_IND_BEST_NUM_STRONG_FRAG_PAIRS,
	PMC_IND_BEST_BOTH_PAIRS,
	
	PMC_IND_BEST_NUM_C2_FRAG_PAIRS,		PMC_IND_BEST_NUM_STRONG_C2_FRAG_PAIRS,
	PMC_IND_BEST_BOTH_C2_PAIRS,

	PMC_IND_PAIRS_AND_MIN_TOLERANCE,	PMC_IND_STRONG_PAIRS_AND_MIN_TOLERANCE,
	PMC_IND_C2_PAIRS_AND_MIN_TOLERANCE,	PMC_IND_C2_STRONG_PAIRS_AND_MIN_TOLERANCE,

	PMC_LOG_DIS_PAIRS_MIN_TOL,			PMC_LOG_DIS_STRONG_PAIRS_MIN_TOL,
	PMC_LOG_DIS_C2_PAIRS_MIN_TOL,		PMC_LOG_DIS_C2_STRONG_PAIRS_MIN_TOL,

	// values as proportions of num peaks / num strong peaks
	PMC_PROP_NUM_PAIRS,				PMC_PROP_NUM_STRONG_PAIRS,
	PMC_PROP_NUM_C2_PAIRS,			PMC_PROP_NUM_C2_STRONG_PAIRS,

	PMC_PROP_INTEN_PAIRS,			PMC_PROP_INTEN_STRONG_PAIRS,
	PMC_PROP_INTEN_C2_PAIRS,		PMC_PROP_INTEN_C2_STRONG_PAIRS,

	// compared with optimal value measured for all mass values
	PMC_REL_PROP_NUM_PAIRS,			PMC_REL_PROP_NUM_STRONG_PAIRS,
	PMC_REL_PROP_NUM_C2_PAIRS,		PMC_REL_PROP_NUM_C2_STRONG_PAIRS,

	PMC_REL_PROP_INTEN_PAIRS,		PMC_REL_PROP_INTEN_STRONG_PAIRS,
	PMC_REL_PROP_INTEN_C2_PAIRS,	PMC_REL_PROP_INTEN_C2_STRONG_PAIRS,
	
	PMC_NUM_FIELDS

} PMC_Fields;


struct PmcStats {

	PmcStats() : num_frag_pairs(0),		 num_strong_frag_pairs(0), num_c2_frag_pairs(0), 
				 num_strong_c2_frag_pairs(0), inten_frag_pairs(0), inten_strong_pairs(0),
				 inten_c2_pairs(0), inten_c2_strong_pairs(0),
				 tol_frag_pairs(0),		   tol_sqr_frag_pairs(0),
				 tol_c2_frag_pairs(99999999),	 tol_c2_sqr_frag_pairs(99999999), 
				 tol_strong_frag_pairs(99999999),		 tol_strong_sqr_frag_pairs(99999999),
				 tol_strong_c2_frag_pairs(99999999),	 tol_strong_c2_sqr_frag_pairs(99999999), 
				 ind_pairs_with_min_tol(false),			 ind_strong_pairs_with_min_tol(false),
				 ind_c2_pairs_with_min_tol(false),		 ind_c2_strong_pairs_with_min_tol(false),
				 log_dis_from_pairs_min_tol(0),			 log_dis_from_strong_pairs_min_tol(0),
				 log_dis_from_c2_pairs_min_tol(0),		 log_dis_from_c2_strong_pairs_min_tol(0),

				 m_over_z(-1) {};

	float num_frag_pairs;
	float num_strong_frag_pairs;
	float num_c2_frag_pairs;
	float num_strong_c2_frag_pairs;

	float inten_frag_pairs;
	float inten_strong_pairs;
	float inten_c2_pairs;
	float inten_c2_strong_pairs;

	mass_t tol_frag_pairs;
	mass_t tol_sqr_frag_pairs;
	mass_t tol_c2_frag_pairs;
	mass_t tol_c2_sqr_frag_pairs;

	mass_t tol_strong_frag_pairs;
	mass_t tol_strong_sqr_frag_pairs;
	mass_t tol_strong_c2_frag_pairs;
	mass_t tol_strong_c2_sqr_frag_pairs;

	bool ind_pairs_with_min_tol;
	bool ind_strong_pairs_with_min_tol;
	bool ind_c2_pairs_with_min_tol;
	bool ind_c2_strong_pairs_with_min_tol;

	float log_dis_from_pairs_min_tol;
	float log_dis_from_strong_pairs_min_tol;
	float log_dis_from_c2_pairs_min_tol;
	float log_dis_from_c2_strong_pairs_min_tol;

	mass_t m_over_z;
};


// per charge
struct PmcSqsChargeRes {
	
	PmcSqsChargeRes() : mz1(-1), prob1(-1), mz2(-1), prob2(-1) {};

	float mz1;
	float prob1;
	float mz2;
	float prob2;
};

struct  DPColumn {
	int pointers[Val+1];
};


class PMCSQS_Scorer {
public:

	PMCSQS_Scorer() : max_charge(0),
				frag_pair_sum_offset(NEG_INF), bin_increment(NEG_INF), 
				ind_initialized_pmc(false), ind_initialized_sqs(false), config(NULL), 
				curr_spec_total_intensity(0), curr_spec_strong_intensity(0),
				curr_spec_num_strong(0) {};

	void init_for_current_spec(Config *config, const BasicSpectrum& bs);

	void   set_frag_pair_sum_offset(mass_t offset) { frag_pair_sum_offset = offset; }
	mass_t get_frag_pair_sum_offset() const { return frag_pair_sum_offset; }

	void   set_bin_increment(mass_t inc) { bin_increment = inc; }
	mass_t get_bin_increment() const { return bin_increment; }


	void find_best_mz_values(const BasicSpectrum& bs, int charge, PmcSqsChargeRes& res) const;


	float get_pmcsqs_results_for_spectrum(Config *config, const BasicSpectrum& bs,
						vector<PmcSqsChargeRes>& res);

	float get_best_mz_charge(Config *config, const BasicSpectrum& bs, 
						   mass_t* mz1, int* charge1, float *prob1,
						   mass_t* mz2, int* charge2, float *prob2);

	mass_t get_charge_mz_bias(int charge) const { return charge_mz_biases[charge]; }

	void print_spec(const BasicSpectrum& bs) const;

	void test_pmc(Config *config, char *specs_file, int charge);

	void train_sqs_models(Config *config, char *pos_list, char *neg_list, float *inp_weights=NULL);

	void train_pmc_models(Config *config, char *pos_list);

	bool read_pmc_models(Config *config, char *file);

	void write_pmc_models(const char *path) const;

	bool read_sqs_models(Config *config, char *file);

	void write_sqs_models(const char *path) const;

	bool get_ind_initialized_pmc() const { return ind_initialized_pmc; }
	bool get_ind_initialized_sqs() const { return ind_initialized_sqs; }

	int get_max_charge() const { return max_charge; }


private:
	vector<ME_Regression_Model *> pmc_models;  // charge

	vector< vector<ME_Regression_Model *> > sqs_models; // charge1, charge2 (charge2=0 is pos vs negative classes)

	vector<mass_t> charge_mz_biases; // add this to the predicted m/z

	int max_charge; // the maximal charge for which we have a model

	mass_t frag_pair_sum_offset; // b+y or c+z - (PM+19)

	mass_t bin_increment;

	bool ind_initialized_pmc;

	bool ind_initialized_sqs;


	Config *config;

	float curr_spec_total_intensity;
	float curr_spec_strong_intensity;
	int   curr_spec_num_strong;

	vector<bool>  curr_spec_strong_inds;
	vector<float> curr_spec_iso_levels;
	vector< vector<PmcStats> > curr_spec_pmc_tables;
	vector<PmcStats>		   curr_spec_background_stats;
	vector<PmcStats>		   curr_spec_maximal_values;

	
	void fill_SQS_DP(const BasicSpectrum& bs, vector<DPColumn>& dp ) const;


	void fill_fval_vector_with_SQS(const BasicSpectrum& bs,
								   ME_Regression_Sample& sam) const;


	void fill_fval_vectors_with_PMC(const BasicSpectrum& bs,
								   int charge,
								   vector<ME_Regression_Sample>& samples) const;

	int get_optimal_bin(int true_mz_bin, int charge) const;

	void select_training_sample_idxs(int charge,
		const vector<ME_Regression_Sample>& spec_samples,
		const BasicSpectrum& bs,
		int& correct_idx,
		vector<int>& bad_pmc_idxs) const;



	void calculate_curr_spec_pmc_values( const BasicSpectrum& bs, mass_t bin_increment);

	void get_sqs_features_from_pmc_tables(const BasicSpectrum& bs,
										  vector< vector<float> >& sqs_featrues) const;







};


void fill_pmc_table_stats(int charge,
						  const mass_t single_charge_pair_offset, // the sum of b+y or c+z
						  mass_t minus_range, 
						  mass_t plus_range,
						  mass_t increment,
						  Config *config,
						  const BasicSpectrum& bs,
						  const vector<bool>& strong_inds,
						  const vector<float>& iso_levels,
						  vector<PmcStats>& pmc_stats_vec);


void create_training_files(Config *config);



#endif

