/***************************************************************************
* Title:          MSTopDown.cpp
* Author:         Ari Frank
* Copyright (c) 2009 The Regents of the University of California
* All Rights Reserved
* See file LICENSE for details.
***************************************************************************/

/******************************************************************************
Copyright 2008, The Regents of the University of California
All Rights Reserved

Permission to use, copy, modify and distribute any part of this
program for educational, research and non-profit purposes, without fee,
and without a written agreement is hereby granted, provided that the
above copyright notice, this paragraph and the following three paragraphs
appear in all copies.

Those desiring to incorporate this work into commercial
products or use for commercial purposes should contact the Technology
Transfer & Intellectual Property Services, University of California,
San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent@ucsd.edu.

IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.

THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*******************************************************************************/




#include "MonoSpectrum.h"
#include "MonoFragments.h"
#include "ProteinSequence.h"
#include "MonoAlignment.h"
#include "includes.h"



void print_help()
{
	cout << "Spectral Alignment for Top-Down Mass Spectra" << endl;
	cout << "--------------------------------------------" << endl << endl;
	cout << "Version 1.0 (alpha). Copyright 2008, the regents of the unoversity of California." << endl;
	cout << "Programmed by Ari Frank ( arf@cs.ucsd.edu )." << endl << endl;
	cout << "This program is a designed as a proof-of-concept for using spectral alignemnt for descerning protein forms using the spectral alignment algorithm.";
	cout <<	" It finds the optimal alignment(s) between a deconvoluted monoisotpic peak list and a proteomic sequence.";
	cout << " The program can be used both to find the correct number (and types) of posttranslational modifications (novel or known types), and also help detect the presence of multiple forms in a single spectrum." << endl;
	cout << "To run the program you must supply the following arguments: " << endl;
	cout << "-peak_list <path to peak list file> - first mass is the monoisotopic precursor mass (M), followed by the monoisotpic peak masses." << endl;
	cout << "-sequence <path to sequence of a single protein> - the sequence should be given in FASTA format." << endl;
	cout << "-specific <X> - where X is the maximal number of known PTMs that can be expected." << endl;
	cout << "-general  <Y> - where Y is the maximal number of arbitrary PTMs that can be expected." << endl << endl;
	cout << "Optional arguments:" << endl;
	cout << "-num_forms <Z> - where Z is the maximal number of forms that should be searched for (default is Z=1)." << endl;
	cout << "-print - outputs the peak list for Z protein forms, with X known PTMs and Y novel PTMs." << endl;
	cout << "-model <TYPE> - the type of fragmentation to expect, currently only ECD is suppoorted (TYPE=ECD is the default)." << endl;
	cout << "-PPM <pp> - the expected resolution of the mass measurements (default is pp=10)." << endl << endl;
	cout << "See readme file for more information about these options and run examples." << endl << endl;
}

void error(char *msg)
{
	cout << "Error: " << msg << endl << endl;
	print_help();
	exit(1);
}



int main(int argc, char ** argv)
{
	char peak_list_path[512];
	char sequence_path[512];
	unsigned int numSpecific = 0;
	unsigned int numGeneral  = 0;
	unsigned int numForms = 1;
	unsigned int modelType = 0;
	mass_t ppm = 10.0;

	bool got_peak_list_path = false;
	bool got_sequence_path = false;
	bool got_print = false;

	Config			config;
	MonoSpectrum	spectrum;
	FragmentSet		fragments;
	ProteinSequence sequence;
	MonoAlignment	alignmentArray;

	unsigned int i;
	i=1;
	while (i<(unsigned int)argc)
	{
		if (!strcmp(argv[i],"-peak_list"))
		{
			if (sscanf(argv[++i],"%s",peak_list_path) != 1)
				error("-peak_list");

			got_peak_list_path = true;
		}
		else if (!strcmp(argv[i],"-sequence"))
		{
			if (sscanf(argv[++i],"%s",sequence_path) != 1)
				error("-sequence_path");

			got_sequence_path = true;
		}
		else if (! strcmp(argv[i],"-general"))
		{
			if (sscanf(argv[++i],"%d",&numGeneral) != 1)
				error("-general");
			if (numGeneral<0 || numGeneral>99)
				error("-general should have a value between 0-99!");
		}
		else if (! strcmp(argv[i],"-specific"))
		{
			if (sscanf(argv[++i],"%d",&numSpecific) != 1)
				error("-specific");
			if (numSpecific<0 || numSpecific>99)
				error("-specific should have a value between 0-99!");
		}
		else if (! strcmp(argv[i],"-num_forms"))
		{
			if (sscanf(argv[++i],"%d",&numForms) != 1)
				error("-num_fomrs");
			if (numForms<1 || numForms>10)
				error("-num_forms should have a value between 1-10!");
		}
		else if (! strcmp(argv[i],"-print"))
		{
			got_print = true;
		}
		else if (!strcmp(argv[i],"-model_type"))
		{
			char model_name[64];
			if (sscanf(argv[++i],"%s",model_name) != 1)
				error("-model_name");
			if (! strcmp(model_name,"ECD"))
			{
				modelType=0;
			}
			else
				error("Only available model names: ECD");
		}
		else if (!strcmp(argv[i],"-PPM"))
		{
			if (sscanf(argv[++i],"%lf",&ppm) != 1)
				error("-PPM");
			if (ppm<=0 || ppm>1000)
				error("-PPM should take values in (0-1000)");
		}
		else
		{
			cout << "command line flag: " << argv[i] << endl;
			error("Unkown option!");
		}

		i++;
	}

	if (! got_peak_list_path)
		error("Must supply path to peak list file!");
	if (! got_sequence_path)
		error("Must supply path to sequence file!");

	if (modelType == 0)
	{
		config.init_with_defaults();
		fragments.initECD();
	}
	
	spectrum.readAllPeakList(peak_list_path,ppm,ppm);
	sequence.read(sequence_path,&config);

	vector<CutPeak> cutPeaks;
	
	spectrum.convertToCutPeaks(fragments,cutPeaks);
	alignmentArray.setSequence(sequence);
	alignmentArray.makeQuickDpTable(&config, numGeneral, numSpecific, 
					numForms, sequence, cutPeaks, config.getAllPTMs());

	// create table with all solution scores
	if (got_print)
	{
		bool printedPaths = false;
		for (i=0; i<numForms; i++)
		{
			SimpleAlignment path;
			alignmentArray.getBestAlignmentInTable(cutPeaks,path,numGeneral,numSpecific,i);

			if (path.score<0)
				continue; 

			printedPaths=true;
			if (i>0)
			{
				cout << endl << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << endl << endl;
			}
			cout << "Form " << i+1 << "  (score " << setprecision(0) << fixed << path.score << ")  approx PPM: " << setprecision(1) << ppm << endl << endl;
			path.printNice(cutPeaks,sequence,spectrum,fragments,alignmentArray.getAlignmentPtmManager());
			cout << endl;
		}

		if (! printedPaths)
			cout << "No paths with positive scores were found!" << endl;
	}
	else // print the desired forms' with peak lists
	{
		alignmentArray.printScoreTable();
	}

	return 0;
}

