/******************************************************************
Parser for the website's interface.
Controls everything that involves running de novo from the website
*******************************************************************/
#include "RegularRankModel.h" 
#include "FileManagement.h"
#include "DeNovoDp.h"
#include "DeNovoSolutions.h"
#include "PMCSQS.h"
#include "auxfun.h"
#include "includes.h"

void print_error(char *line) 
{
	cout << "Error: coudln't parse input file line:\n" << line << endl;
	exit(1);
}



int main(int argc, char **argv) 
{ 

	char *input_file = argv[1];

	FILE *inp_file = fopen(input_file,"r");
	if (! inp_file)
	{
		cout << "Error: couldn't open input file: " << input_file << endl;
		exit(1);
	}

	vector<string> files;
	vector<string> ptm_lines;
	
	char model_name[64]; 
	char model_dir[128];
	char output_file[256];
	int protease_digest=TRYPSIN_DIGEST;

	mass_t tolerance=0.5;
	mass_t pm_tolerance=2.5;

	int num_solutions=10;
	int min_length = 5;
	int max_length = 20;


	strcpy(model_name,"LTQ_LOW_TRYP");
	strcpy(output_file,"pepnovo_out.txt");

	char buff[256];
	while (fgets(buff,256,inp_file))
	{
		if (buff[0]=='#')
			continue;

		if (! strncmp(buff,"spectra,",8))
		{
			char file_path[256];
			if (sscanf(buff,"spectra,%s",file_path) != 1)
				print_error(buff);

			files.push_back(file_path);
			continue;
		}

		if (! strncmp(buff,"output,",7))
		{
			if (sscanf(buff,"output,%s",output_file) != 1)
				print_error(buff);

			continue;
		}

		if (! strncmp(buff,"protease,",9))
		{
			char prot_name[64];
			if (sscanf(buff,"protease,%s",prot_name) != 1)
				print_error(buff);

			if (! strcmp(prot_name,"Trypsin"))
			{
				protease_digest=TRYPSIN_DIGEST;
			}
			else
				protease_digest = NON_SPECIFIC_DIGEST;

			continue;
		}

		if (! strncmp(buff,"modelDir,",7))
		{
			if (sscanf(buff,"modelDir,%s",model_dir) != 1)
				print_error(buff);

			continue;
		}

		if (! strncmp(buff,"instrument,",11))
		{
			char instrument_name[64];
			if ( sscanf(buff,"instrument,%s",instrument_name) != 1)
				print_error(buff);

			if (! strcmp(instrument_name,"ESI-ION-TRAP"))
			{
				strcpy(model_name,"LTQ_LOW_TRYP");
			}
			else if (! strcmp(instrument_name,"FT-HYBRID"))
			{
				strcpy(model_name,"LQQ_FT_HYBRID");
			}
			else
			{
				cout << "Warnaing: model for " << instrument_name << " not supported. Using " << model_name << endl;
			}

			continue;
		}

		if (! strncmp(buff,"mod,",4))
		{
			ptm_lines.push_back(buff);
			continue;
		}

		if (! strncmp(buff,"PMTolerance,",10))
		{
			float val;
			if (sscanf(buff,"PMTolerance,%f",&val) != 1)
				print_error(buff);
			pm_tolerance = val;
			continue;
		}

		if (! strncmp(buff,"IonTolerance,",10))
		{
			float val;
			if (sscanf(buff,"IonTolerance,%f",&val) != 1)
				print_error(buff);
			tolerance = val;

			continue;
		}

		if (! strncmp(buff,"numSolutions,",11))
		{
			int val;
			if (sscanf(buff,"numSolutions,%d",&val) != 1)
				print_error(buff);
			num_solutions = val;
			continue;
		}

		if (! strncmp(buff,"minLength,",8))
		{
			int val;
			if (sscanf(buff,"minLength,%d",&val) != 1)
				print_error(buff);
			min_length = val;
			continue;
		}

			if (! strncmp(buff,"maxLength,",8))
		{
			int val;
			if (sscanf(buff,"maxLength,%d",&val) != 1)
				print_error(buff);
			max_length = val;
			continue;
		}

	}

	// Read model and set model parameters
	RegularRankModel model;
	PMCSQS_Scorer pmcsqs;
	Config *config = model.get_config();
	
	config->set_resource_dir(string(model_dir));
	model.read_model(model_name);

	config->set_tolerance(tolerance);
	config->set_pm_tolerance(pm_tolerance);
	config->set_digest_type(protease_digest);
	config->apply_site_input_PTMs(ptm_lines);

	ofstream out_stream(output_file,ios::out);
	if (! out_stream.good())
	{
		cout << "Error: couldn't open output file for writing: " << output_file << endl;
		exit(1);
	}
	
	//////////////////////////////////////////////////////////////////
	// read pmc sqs models
	if (config->get_need_to_estimate_pm() )
	{
		if (! (pmcsqs.read_pmc_models(config) && 
			   pmcsqs.read_sqs_models(config) ) )
		{
			cout << "Error: could not find PMC and SQF models for " << config->get_model_name() << endl;
			cout << "Cannot perform precursor mass correction and charge determiniation!" << endl;
			exit(1);
		}
	}

	clock_t start_time,last_time;
	start_time = clock();
	last_time = start_time;

	cout << "Running PepNovo release 20070420." << endl;
	cout << files.size() << " input files." << endl;

	///////////////////////////////////////////////////////////////////
	// iterate on input files
	int num_spectra_processed=0;
	int f;
	for (f=0; f<files.size(); f++) 
	{
	
		const char *spectra_file = files[f].c_str();
		FileManager fm;
		FileSet fs;
		BasicSpecReader bsr;

		///////////////////////////////////////////////
		// Quick read, get all pointers to begining of spectra
		if (get_file_extension_type(files[f]) != MZXML)
		{
			fm.init_from_file(config,spectra_file);
		}
		else
			fm.init_and_read_single_mzXML(config,spectra_file,f);

		fs.select_all_files(fm);


		const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();
		int sc;
		for (sc=0; sc<all_ssf.size(); sc++)
		{
			static vector<QCPeak> peaks;
			SingleSpectrumFile *ssf = all_ssf[sc];
			
			if (peaks.size()<ssf->num_peaks)
			{
				int new_size = ssf->num_peaks*2;
				if (new_size<2500)
					new_size=2500;
				peaks.resize(new_size);
			}

			// progress report every 3 seconds
			
			clock_t curr_time = clock();
			double elapsed_time = (curr_time - last_time)/(double)CLOCKS_PER_SEC;
			if (elapsed_time>5)
			{
				last_time = curr_time;
				cout << "Processing file " << f+1 << "/" << files.size();
				if (all_ssf.size() == 1)
				{
					cout << "  spectrum 1/1 of current file." << endl;
				}
				else
					cout << "  spectrum " << sc+1 << "/" << all_ssf.size() << 
					" of current file." << endl;
			}

			int num_peaks = bsr.read_basic_spec(config,fm,ssf,&peaks[0]);
			ssf->file_idx = f;

			// convert peak list ot a spectrum with charge (if original charge ==0)
			// the spectrum gets charge 2, but the true charge is computed from the data
		
			Spectrum s;
			s.init_from_QCPeaks(config,&peaks[0],num_peaks,ssf);
		
			vector<SeqPath> solutions;
			solutions.clear();

			if (config->get_need_to_estimate_pm() && ssf->charge>3)
			{
				ssf->print_ssf_stats(config,out_stream);
				out_stream << "# Charge " << s.get_charge() << " not supported yet..." << endl << endl;
				continue;
			}

			num_spectra_processed++;

			// Simple de novo or PRM, no filtering needed
			if (! config->get_try_multiple_pms() && 
				! config->get_need_to_estimate_pm() &&
				ssf->charge>0 )
			{
				generate_denovo_solutions(&model,&s,s.get_org_pm_with_19(),s.get_charge(),
						num_solutions,min_length,max_length,solutions);
			}

			// more complicated case, need to perform some sort of filtering
			else  
			{
				vector<mass_t> pms_with_19;
				vector<int>    charges;
				pms_with_19.clear();
				charges.clear();

				if (config->get_need_to_estimate_pm() || ssf->charge == 0)
				{
					mass_t	mz1,mz2;
					int		charge1,charge2;
					float	prob1,prob2;s;
					
					BasicSpectrum bs;
					bs.ssf = ssf;
					bs.peaks = &peaks[0];
					bs.num_peaks = num_peaks;

					// output m/z and prob values for the different charge states
					pmcsqs.get_best_mz_charge(config,bs, 
						&mz1,&charge1,&prob1,&mz2,&charge2,&prob2);

					mass_t corr_pm_with_19   =  mz1*charge1 - (charge1-1);
					mass_t second_pm_with_19 =  -1;
					if (mz2>0 && mz2<5000)
						second_pm_with_19=mz2*charge2 - (charge2-1);

				
					// calculate the optimal pm_with_19
					// this will be used for the denovo sequencing

					{
						PrmGraph prm;
						bool update_org_pm_with_19 = false;
						if (s.get_charge() != charge1 || ssf->charge==0)
						{
							s.set_charge(charge1);
							update_org_pm_with_19 = true;
						}

						model.init_model_for_scoring_spectrum(&s);
						corr_pm_with_19 = prm.find_optimal_pm_with_19_for_graph(&model,
							&s,corr_pm_with_19,s.get_charge());

						if (update_org_pm_with_19)
							s.set_org_pm_with_19(corr_pm_with_19);
					}

					pms_with_19.push_back(corr_pm_with_19);
					charges.push_back(charge1);

					if (config->get_try_multiple_pms())
					{
						pms_with_19.push_back(second_pm_with_19);
						charges.push_back(charge2);
					}
				}

				
				// use skips of +-1
				if (config->get_try_multiple_pms())
				{
					if (config->get_pm_tolerance()<0.1)
					{
						int i;
						for (i=0; i<=3; i++)
						{
							pms_with_19.push_back(s.get_org_pm_with_19()-i*1.0023);
							charges.push_back(s.get_charge());
						}
					}
					else
					{
						pms_with_19.push_back(s.get_org_pm_with_19());
						charges.push_back(s.get_charge());
					}
				}

				
				if (pms_with_19.size()==0)
				{
					pms_with_19.push_back(s.get_org_pm_with_19());
					charges.push_back(s.get_charge());
				}
			
				// by now we might have a list of several charges, M+H to examine
				// Find the pooled results
				
				generate_denovo_solutions_from_several_pms(&model,&s,
					num_solutions,
					min_length,
					max_length,
					pms_with_19,
					charges,
					solutions);
			}

			////////////////////////////////////////////////////////////
			// if we are here it is only for denovo/tags
			// print results
			////////////////////////////////////////////////////////////

			ssf->print_ssf_stats(config,out_stream);

			if (solutions.size() == 0)
			{
				out_stream << "No solutions found." << endl;
			}
			else 
			{
				out_stream << "#Index\tProb\tScore\tN-mass\tC-Mass\t[M+H]\tCharge\tSequence" << endl;
				int i; 	
				for (i=0; i<solutions.size(); i++) 
				{
					out_stream << setprecision(3) << fixed << i << "\t";
					out_stream << solutions[i].seq_prob << "\t";
					out_stream << solutions[i].path_score << "\t";
					out_stream << solutions[i].n_term_mass << "\t";
					out_stream << solutions[i].c_term_mass << "\t";
					out_stream << solutions[i].pm_with_19 << "\t";
					out_stream << solutions[i].charge << "\t";
					out_stream << solutions[i].seq_str;
					out_stream << endl;
				}
			}
			out_stream << endl;
		}
	}

	clock_t curr_time = clock();
	double elapsed_time = (curr_time - start_time)/(double)CLOCKS_PER_SEC;
	cout << "Processed " << files.size() << " (" << num_spectra_processed << " spectra)." << endl;
	cout << "Elapsed time " << setprecision(3) << elapsed_time << " seconds." << endl;
	return 0;
}





