/***************************************************************************
* Title:          MonoAlignment.h
* Author:         Ari Frank
* Copyright (c) 2009 The Regents of the University of California
* All Rights Reserved
* See file LICENSE for details.
***************************************************************************/

/******************************************************************************
Copyright 2008, The Regents of the University of California
All Rights Reserved

Permission to use, copy, modify and distribute any part of this
program for educational, research and non-profit purposes, without fee,
and without a written agreement is hereby granted, provided that the
above copyright notice, this paragraph and the following three paragraphs
appear in all copies.

Those desiring to incorporate this work into commercial
products or use for commercial purposes should contact the Technology
Transfer & Intellectual Property Services, University of California,
San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent@ucsd.edu.

IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.

THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*******************************************************************************/


#ifndef __MONOALIGNMENT_H__
#define __MONOALIGNMENT_H__


#include "ProteinSequence.h"
#include "MonoSpectrum.h"
#include "MonoFragments.h"
#include "Config.h"
#include "BasicDataStructs.h"

#define MAX_NUM_PTMS_IN_COMBO 4
#define NUM_REGULAR_AAS Val+1

#define MAX_NUM_SUBOPTIMAL_SOLS 4

class AlignmentPtmManager;



// Describes the location of a DPell
// generalIdx - the number of arbitrary modifications used to get to this table
// specificIdx - the number of known PTMs used to get to this table
// peakIdx,aaIdx - the two dimension of the table
struct DpCellLocation {

	DpCellLocation() : generalIdx(-1), specificIdx(-1), peakIdx(-1), aaIdx(-1), formIdx(-1) {};

	DpCellLocation(int _generalIdx, int _specificIdx, int _peakIdx, int _aaIdx) : 
				   generalIdx(_generalIdx), specificIdx(_specificIdx), peakIdx(_peakIdx), aaIdx(_aaIdx), formIdx(0) {};

	DpCellLocation(int _generalIdx, int _specificIdx, int _peakIdx, int _aaIdx, int _formIdx) : 
				   generalIdx(_generalIdx), specificIdx(_specificIdx), peakIdx(_peakIdx), aaIdx(_aaIdx), formIdx(_formIdx) {};

	DpCellLocation& operator = ( const DpCellLocation& other)
	{
		generalIdx  = other.generalIdx;
		specificIdx  = other.specificIdx;
		peakIdx = other.peakIdx;
		aaIdx   = other.aaIdx;
		formIdx = other.formIdx;
		return *this;
	}

	bool operator == (const DpCellLocation& other) const
	{
		return (generalIdx    == other.generalIdx  &&
				specificIdx    == other.specificIdx  &&
				peakIdx   == other.peakIdx &&
				aaIdx	  == other.aaIdx   &&
				formIdx == other.formIdx );
	}

	int generalIdx;
	int specificIdx;
	int peakIdx;
	int aaIdx;
	int formIdx;
};

// This is the structure for recovering a number of (suboptimal) paths
struct PathPointer {

	PathPointer() : comboIdx((int)NEG_INF), massShift((mass_t)NEG_INF), score((score_t)NEG_INF) {};

	bool operator< (const PathPointer& other) const
	{
		return ( (score < other.score) ||
				 (score == other.score && 
				 previousCell.formIdx > other.previousCell.formIdx)); 
	}

	void clear()
	{
		massShift = NEG_INF;
		score = NEG_INF;
		previousCell = DpCellLocation(-1,-1,-1,-1,-1);
	}

	int comboIdx;				// if using a specific combination
	mass_t  massShift;
	score_t score;				 // at this current location
	DpCellLocation previousCell; // where was the maximum achieved from (last diagonal cell)
};


struct DpCell {

	void init(int numPointers)
	{
		pointers.resize(numPointers);
		int i;
		for (i=0; i<numPointers; i++)
			pointers[i].clear();
	}

	// adds a new pointer to the list of pointers if its score is greater
	// then any of the existing (pointer with least score is removed)
	void addPointer(const PathPointer& newPointer, int maxNumPointers);

	// sort the pointers so they appear in descending score
	void sortPointers(int k);

	// for the top k paths
	vector<PathPointer> pointers;
};


struct AlignmentBlock {
	
	AlignmentBlock() :  generalIdxStart(-1),    generalIdxEnd(-1),    
						specificIdxStart(-1),   specificIdxEnd(-1),
						peakIdxStart(-1),   peakIdxEnd(-1),   aaIdxStart(-1),  aaIdxEnd(-1), 
						formIdxStart(-1), formIdxEnd(-1), blockDelta(0),   comboIdx(-1),
						score(0) {};

	void setStart(int generalIdx, int specificIdx, int peakIdx, int aaIdx, int formIdx) {
		generalIdxStart = generalIdx; specificIdxStart = specificIdx; peakIdxStart = peakIdx; aaIdxStart = aaIdx; formIdxStart = formIdx; }

	void setEnd(int generalIdx, int specificIdx, int peakIdx, int aaIdx, int formIdx) {
		generalIdxEnd = generalIdx; specificIdxEnd = specificIdx; peakIdxEnd = peakIdx; aaIdxEnd = aaIdx; formIdxEnd = formIdx;}


	void print(ostream& os = cout) const;
	
	int generalIdxStart;
	int generalIdxEnd;
	int specificIdxStart;
	int specificIdxEnd;
	int peakIdxStart;
	int peakIdxEnd;
	int aaIdxStart;
	int aaIdxEnd;
	int formIdxStart;
	int formIdxEnd;

	 // the modification's range ends right after this amino acid
	mass_t  blockDelta;   // for mods
	int		comboIdx;  // if specific PTM was used
	score_t score;
};



struct SimpleAlignment {
	SimpleAlignment() : totalMonoMass(0), score(0), pSequence(NULL) {};

	score_t getScore() const { return score; }

	void print(const vector<CutPeak>& cutPeaks, const ProteinSequence& ps, ostream& os = cout) const;

	void printNice(const vector<CutPeak>& cutPeaks, const ProteinSequence& ps, 
				   const MonoSpectrum& ms, const FragmentSet& fs, const AlignmentPtmManager& ptms,
				   ostream& os = cout) const;

	void printAlignedMasses(const vector<CutPeak>& cutPeaks, const ProteinSequence& ps) const;

	mass_t   totalMonoMass;
	score_t  score;
	ProteinSequence *pSequence;

	vector<AlignmentBlock> alignmentBlocks; 
};





struct Shift {
	Shift() : aaIdx(-1), massShift(0) {};
	Shift(int _aaIdx, mass_t _massShift) : aaIdx(_aaIdx), massShift(_massShift) {};

	int    aaIdx;
	mass_t massShift;
};



struct AaCombo {
	AaCombo() : numAasInCombo(0) {};

	AaCombo(const vector<int>& aas) 
	{
		numAasInCombo=0;
		size_t i;
		for (i=0; i<aas.size(); i++)
		{
			int j;
			for (j=0; j<numAasInCombo; j++)
				if (aas[i]==aminoAcids[j])
				{
					counts[j]++;
					break;
				}
			if (j<numAasInCombo)
				continue;
			aminoAcids[numAasInCombo]=aas[i];
			counts[numAasInCombo]=1;
			numAasInCombo++;
		}
	}

	// assumes the AaCombos are already sorted
	bool operator == (const AaCombo& other) const
	{
		if (numAasInCombo != other.numAasInCombo)
			return false;

		int i;
		for (i=0; i<numAasInCombo; i++)
			if (aminoAcids[i] != other.aminoAcids[i] ||
				counts[i] != other.counts[i])
				return false;
		return true;
	}

	// sorts AaCombo according to the aa values
	void sortAaCombo()
	{
		if (numAasInCombo <= 1)
			return;

		if (numAasInCombo == 2)
		{
			if (aminoAcids[0]>aminoAcids[1])
			{
				int t;
				t=aminoAcids[0];
				aminoAcids[0]=aminoAcids[1];
				aminoAcids[1]=t;
				t=counts[0];
				counts[0]=counts[1];
				counts[1]=t;
				return;
			}
		}

		bubbleSortAaCombo();
	}

	int numAasInCombo;
	int aminoAcids[MAX_NUM_PTMS_IN_COMBO];
	int counts[MAX_NUM_PTMS_IN_COMBO];

private:
	void bubbleSortAaCombo();
};


// a distinct combination of PTMs (upto the order in which they appear in the block)
struct PtmCombo {

	PtmCombo() : numPtmsInCombo(0), name(""), totalMassJump(0) {};

	bool operator< (const PtmCombo& other) const
	{
		return (totalMassJump<other.totalMassJump);
	}

	bool operator< (mass_t m) const
	{
		return (totalMassJump < m);
	}

	int getSatisfieableAaComboIdx(const vector<int>& availableAas) const;

	void addUnsortedAaCombo(const vector<int>& unsortedAas);

	void print() const;

	int ptmIdxs[MAX_NUM_PTMS_IN_COMBO];
	int numPtmsInCombo;

	vector<AaCombo> satisfyingAaCombos;

	string name;

	mass_t totalMassJump;
};

ostream& operator << (ostream& os, const PtmCombo& ptm);


class AlignmentPtmManager {
public:
	void setPtms(const vector<PTM>& _allPtms) { allPtms = _allPtms; }

	void generatePtmCombos( int maxNumPTMsInCombo = 3);

	const vector<PtmCombo>& getPtmCombos() const { return ptmCombos; }

	const vector<PTM>& getAllPtms() const { return allPtms; }

	const PtmCombo& getCombo(int comboIdx) const { return ptmCombos[comboIdx]; }

	int  getNumPtmCombosInRange(mass_t minMass, mass_t maxMass, int* firstPtmIdx) const;

	void printAllPtmCombos() const;
	void printAllPtms()      const;

private:
	vector<PTM>		    allPtms;
	vector<PtmCombo>    ptmCombos;

};


struct IdxPair {
	IdxPair() : peakIdx(-1), aaIdx(-1) {};
	int peakIdx,aaIdx;
};

ostream& operator << (ostream& os, const IdxPair& idxPair);

// holds the mass shift between a peak i and an aa prefix j
struct PairShift {
	bool operator< (const PairShift& other) const
	{
		return (massShift<other.massShift);
	}

	IdxPair idxs;
	mass_t  massShift;
};


//  holds the previous pair on the same diagonal for each location (i,j)
//  (peak i, prefix aa j)
class CoDiags {
public:
	void calcCodiags(const vector<CutPeak>&  cutPeaks, 
					 const vector<mass_t>&   prefixMasses,
					 const vector<PtmCombo>& ptmCombos);

	IdxPair getCodiag(int peakIdx,int aaIdx) const 
								{ return prevCodiag[peakIdx][aaIdx]; }

	IdxPair getPtmComboCodiag(int ptmComboIdx, int peakIdx, int aaIdx) const
								{ return ptmCodiags[peakIdx][aaIdx][ptmComboIdx]; }

	const vector<IdxPair>& getAllCodiags(int peakIdx, int aaIdx) const 
								{ return allCodiags[peakIdx][aaIdx]; }


private:
	vector< vector<IdxPair> >			prevCodiag;
	vector< vector< vector<IdxPair> > > ptmCodiags;  // co diags with a specified shift
	vector< vector< vector<IdxPair> > > allCodiags;
};





class MonoAlignment {
public:

	

	score_t getBestAlignmentInTable(const vector<CutPeak>& cutPeaks,
					SimpleAlignment& align, 
					int generalIdx, 
					int specificIdx,
					int formIdx=0,
					int peakIdx=-1, 
					int aaIdx=-1 ) const;

	void makeQuickDpTable( 
					Config *config, 
					int maxNumArbitraryShift, 
					int maxNumPTMShifts,
					int maxformIdxs,
					const ProteinSequence& ps, 
					const vector<CutPeak>& cutPeaks,
					const vector<PTM>& allPtms,
					bool verbose = false);

	void setSequence(const ProteinSequence& _sequence) { sequence = _sequence; }

	void printAllCoDiagPairs(int maxPeakIdx, int maxAaIdx, 
							 const ProteinSequence& ps, const vector<CutPeak>& cutPeaks) const;

	void printScoreTable() const;

	const AlignmentPtmManager& getAlignmentPtmManager() const { return ptmManager; }

private:
	Config *pConfig;

	ProteinSequence sequence;

	AlignmentPtmManager ptmManager;
	
	vector< vector< vector< vector< DpCell > > > > D,M; // #arbitrary jumps, #known PTM jumps, #peaks, #aas

	vector<PairShift> distancePairs; // between all pairs of peaks and prefix aa masses

	vector< vector<int> > aaCounts;

	CoDiags coDiags;


	void initAlignmentPtms(const vector<PTM>& allPtms, int maxNumPtmsInCombo);

	void initAaCounts();

	void parsePathFromDpTable(const vector<CutPeak>& cutPeaks,
				SimpleAlignment& align, 
				int generalIdx, 
				int specificIdx, 
				int peakIdx, 
				int aaIdx, 
				int formIdx) const;

	bool checkIfComboSatisfiable(int comboIdx, int startAaIdx, int endAaIdx) const;

	const DpCellLocation& getPreviousLocation(const DpCellLocation& loc) const
	{
		return D[loc.generalIdx][loc.specificIdx][loc.peakIdx][loc.aaIdx].pointers[loc.formIdx].previousCell;
	}

	bool doesPathContainCellLocation(const DpCellLocation& pathEndLoc,
									 const DpCellLocation& location) const;

	
};



#endif


