#include "QuickClustering.h"
#include "base64.h"

DAT_FileBuff::~DAT_FileBuff()
{
	if (buff && pos>buff)
		flush_buff();

	if (buff)
		delete [] buff; 
}

void DAT_FileBuff::init(string& _path,  int dat_buff_size)
{
	if (! ind_was_initialized)
	{
		buff = new char[dat_buff_size];
		if (! buff)
		{
			cout << "Error: couldn't allocate memory for DAT file buff!" << endl;
			exit(1);
		}
	}

	ind_was_initialized = 1;
	path = _path;
	max_pos = buff + dat_buff_size - 100;
	pos = buff;
	ind_first_write = 1; 
}


// copies the files to the DAT file
void DAT_FileBuff::add_spec_to_DAT_file(
							mass_t m_over_z, 
							int charge, 
							int mzxml_file_idx,
							int scan_number, 
							float retention_time, 
							float precursor_intensity, 
							int num_peaks, 
							char *peak_buff)
{
	int spec_bytes = sizeof(mass_t) + 4 * sizeof(int) + 2 * sizeof(float) * num_peaks;

	if (num_peaks>5000 || num_peaks<0)
		cout << mzxml_file_idx << " " << scan_number << " " << " p: " << num_peaks << endl;

	if (mzxml_file_idx<0 || scan_number<0)
	{
		cout <<"Error: bad file idx or scan number! " << mzxml_file_idx << ", " << scan_number << endl;
		exit(1);
	}

	if (pos + spec_bytes >= max_pos)
		flush_buff();

	mass_t *m_ptr = (mass_t *)pos;
	*m_ptr++ = m_over_z;

	int *i_ptr = (int *)m_ptr;

	*i_ptr++ = charge;
	*i_ptr++ = mzxml_file_idx;
	*i_ptr++ = scan_number;
	*i_ptr++ = num_peaks;

	float *f_ptr = (float *)i_ptr;

	*f_ptr++ = retention_time;
	*f_ptr++ = precursor_intensity;
	
	pos = (char *)f_ptr;
	memcpy(pos,peak_buff,2 * sizeof(float) * num_peaks);
	pos += 2 * sizeof(float) * num_peaks;

/*	if (0)
	{
	cout << m_over_z << " c:" << charge << " mzxml:" << mzxml_file_idx << " scan:" << 
		scan_number << " np:" << num_peaks << "  bytes: " << spec_bytes << endl;
	int i;
	float *p = (float *)f_ptr;
	for (i=0; i<2*num_peaks; i+=2)
		cout << left << setw(4) << i << setw(7) << p[i] << " " << 
		setw(7) << p[i+1] << endl;
	cout << endl;
	}

	if (0)
	{
		float *p=(float *)f_ptr;
		if (p[0]<0 || p[0]>5000 || p[1]<0)
		{
			cout << "Error p[0]=" << p[0] << "  p[1]=" << p[1] << endl;
			exit(0);
		}

		int j;
		for (j=2; j<2*num_peaks; j+=2)
		{
			if (p[j]<p[j-2] || p[j+1]<0)
			{
				cout << "Error: " << endl;
				cout << j/2 - 1<< "  mass  " << p[j-2] << "  intensity " << p[j-1] << endl;
				cout << j/2 << "  mass  " << p[j] << "  intensity " << p[j+1] << endl;	
				exit(0);
			}
		}
	} */

//	exit(0);
}



void DAT_FileBuff::flush_buff()
{
	FILE* dat_stream;

	if (pos == buff)
		return;

	if (! ind_was_initialized)
	{
		cout << "Error: must first initialize the DAT_FileBuff!" << endl;
		exit(1);
	}

	if (ind_first_write)
	{
		dat_stream=fopen(path.c_str(),"wb");
		ind_first_write = 0;
	}
	else
		dat_stream=fopen(path.c_str(),"ab");

	if (! dat_stream)
	{
		cout << "Error: couldn't open DAT file for writing: " << path.c_str() << endl;
		exit(1);
	}


	fwrite(buff,1,pos-buff,dat_stream);
	fclose(dat_stream);

	pos=buff;
}



void DAT_Converter::init_DAT_Converter(mass_t _max_m_over_z, mass_t _mass_increment, int _dat_buff_size)
{
//	name = _name;
//	out_dir = _out_dir;
	max_m_over_z = _max_m_over_z;
	mass_increment = _mass_increment;
	dat_buff_size = _dat_buff_size;

	max_dat_file_idx = (int)(max_m_over_z / mass_increment) + 1;

	dat_buffs.resize(max_dat_file_idx+1);

	ind_was_initialized =1;
}




// creates a file with the list of DAT files
void DAT_Converter::create_list_file() const
{
	FILE *list_stream;

	ostringstream oss;
	oss << batch;
	string list_path = out_dir + "/" + name + "_" + oss.str() + "_list.txt";

	list_stream = fopen(list_path.c_str(),"w");
	if (! list_stream)
	{
		cout << "Error: couldn't open list file for writing: " << list_path.c_str() << endl;
		exit(1);
	}

	int i;
	for (i=0; i<dat_buffs.size(); i++)
	{
		if (dat_buffs[i].buff)
		{
			fprintf(list_stream,"%s\n",dat_buffs[i].path.c_str());
		}
	}

	fclose(list_stream);
}



/******************************************************************
Converts a list of spectra to dat files.
If file_idx_start is given, it is added to the file idx that is written
(this is good when used with split list files).
*******************************************************************/
void DAT_Converter::convert_spectra_to_DAT(
					Config* config, 
					char *file_list, 
					char * _out_dir, 
					char * _name, 
					int _batch, 
					mass_t min_m_over_z, 
					mass_t max_m_over_z,
					int file_idx_start)
{
	static QCPeak *peak_buff=NULL;    
	static float  *dat_peak_buff=NULL;


	if (! peak_buff)
	{
		peak_buff     = new QCPeak[20000];
		dat_peak_buff = new  float[40000];
	}

	if (! peak_buff || ! dat_peak_buff)
	{
		cout << "Error: couldn't allocate memory for DAT conversion!" << endl;
		exit(1);
	}


	name = _name;
	out_dir = _out_dir;
	batch = _batch;

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	read_paths_into_list(file_list,list);

	BasicSpecReader bsr;
	FileManager fm;
	FileSet all_spec_fs;
	
	
	printf("Parsing file info...\n\n");

	int num_spectra_extracted=0;
	fm.init_from_list_file(config,file_list,min_m_over_z,max_m_over_z);
	all_spec_fs.select_all_files(fm,true);
	const int total_spectra = all_spec_fs.get_total_spectra();
	const vector<SingleSpectrumFile *>& all_ssf = all_spec_fs.get_ssf_pointers();
	
	printf("Writing tmp dat files (%d spectra)... (this might take a few minutes...)\n",
			all_ssf.size());


	int i;
	int ten_percent_size = (int)(0.1 * all_ssf.size());
	int pc=0,c=0;
	for (i=0; i<all_ssf.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssf[i];

		// no small spectra
		if (ssf->num_peaks<5)
			continue;

		int j,pos=0;
		int num_spec_peaks = bsr.read_basic_spec(config,fm,ssf,peak_buff);


		mass_t m_over_z = ssf->m_over_z;
		int scan_number=-1, file_idx=-1;
		float precursor_intensity=0;
		
		if (ssf->type == DTA)
		{
			scan_number=0;
			file_idx=ssf->file_idx + file_idx_start;
		}
		else if (ssf->type == MGF)
		{
			MGF_single *mgf_single = (MGF_single *)ssf;
			file_idx = mgf_single->file_idx + file_idx_start;
			scan_number = mgf_single->idx_in_file;
			
		}
		else if (ssf->type == MZXML)
		{
			MZXML_single *mzxml_single = (MZXML_single *)ssf;
			file_idx = mzxml_single->file_idx + file_idx_start;
			scan_number = mzxml_single->scan_number;
			precursor_intensity = mzxml_single->precursor_intensity;
		}
		else if (ssf->type == DAT)
		{
			DAT_single *dat_single = (DAT_single *)ssf;
			file_idx = dat_single->mzxml_file_idx + file_idx_start;
			scan_number = dat_single->scan_number;
		}

		// copy peaks
		
		for (j=0; j<num_spec_peaks; j++)
		{
			dat_peak_buff[pos++]=(float)peak_buff[j].mass;
			dat_peak_buff[pos++]=(float)peak_buff[j].intensity;

//			cout << j << ":>  " << dat_peak_buff[pos-2] << " " << dat_peak_buff[pos-1] << endl;

		}

		
		// add spectrum
		int DAT_file_idx =  (int)(m_over_z/mass_increment);
		if (DAT_file_idx > max_dat_file_idx)
			DAT_file_idx = max_dat_file_idx;

		if (! dat_buffs[DAT_file_idx].ind_was_initialized)
		{
			ostringstream os,os_batch;
			os << DAT_file_idx;
			os_batch << batch;
			string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
			dat_buffs[DAT_file_idx].init(path,dat_buff_size);
//			cout << "initialized : " << DAT_file_idx << endl;
		}

		int charge = ssf->charge;
		if (charge<0 || charge>1000)
			charge=0;

		dat_buffs[DAT_file_idx].add_spec_to_DAT_file(m_over_z,
			charge,file_idx,scan_number,ssf->retention_time,precursor_intensity,
			num_spec_peaks,(char *)dat_peak_buff);

		num_spectra_extracted++;

		c++;
		if (c == ten_percent_size)
		{
			pc++;
			printf("Done %d%% ...\n",pc*10);
			c=0;
		}
	}

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	
	create_list_file();

	cout << "Done.\nTotal #spectra extracted and converted to DAT: " << num_spectra_extracted << endl;
}

void DAT_Converter::convert_MZXML_to_DAT(Config* config, char *file_list, 
							char * _out_dir, char * _name, int _batch, 
							mass_t min_m_over_z, mass_t max_m_over_z, int file_start_idx)
{
	QCPeak peak_buff[20000];
	float     dat_peak_buff[40000];

	name = _name;
	out_dir = _out_dir;
	batch = _batch;

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	read_paths_into_list(file_list,list);

	BasicSpecReader bsr;
	FileManager fm;
	FileSet all_spec_fs;
	
	
	printf("Parsing file info...\n\n");

	int num_spectra_extracted=0;
	fm.init_from_list_file(config,file_list,min_m_over_z,max_m_over_z);
	all_spec_fs.select_all_files(fm,true);
	const int total_spectra = all_spec_fs.get_total_spectra();
	const vector<SingleSpectrumFile *>& all_ssf = all_spec_fs.get_ssf_pointers();
	
	printf("Writing tmp dat files (%d spectra)... (this might take a few minutes...)\n",
			all_ssf.size());


	int i;
	int ten_percent_size = (int)(0.1 * all_ssf.size());
	int pc=0,c=0;
	for (i=0; i<all_ssf.size(); i++)
	{
		MZXML_single *ssf = (MZXML_single *)all_ssf[i];
		int j,pos=0;
		int num_spec_peaks = bsr.read_basic_spec(config,fm,ssf,peak_buff);
		mass_t m_over_z = ssf->m_over_z;
		int scan_number = ssf->scan_number;
		int file_idx = ssf->file_idx + file_start_idx;

		// copy peaks
		for (j=0; j<num_spec_peaks; j++)
		{
			dat_peak_buff[pos++]=(float)peak_buff[j].mass;
			dat_peak_buff[pos++]=(float)peak_buff[j].intensity;

//			cout << j << ":>  " << dat_peak_buff[pos-2] << " " << dat_peak_buff[pos-1] << endl;

		}

		
		// add spectrum
		int DAT_file_idx =  (int)(m_over_z/mass_increment);
		if (DAT_file_idx > max_dat_file_idx)
			DAT_file_idx = max_dat_file_idx;

		if (! dat_buffs[DAT_file_idx].ind_was_initialized)
		{
			ostringstream os,os_batch;
			os << DAT_file_idx;
			os_batch << batch;
			string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
			dat_buffs[DAT_file_idx].init(path,dat_buff_size);
//			cout << "initialized : " << DAT_file_idx << endl;
		}


		dat_buffs[DAT_file_idx].add_spec_to_DAT_file(m_over_z,
			0,file_idx,scan_number,ssf->retention_time,ssf->precursor_intensity,
			num_spec_peaks,(char *)dat_peak_buff);

		num_spectra_extracted++;

		c++;
		if (c == ten_percent_size)
		{
			pc++;
			printf("Done %d%% ...\n",pc*10);
			c=0;
		}
	}

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	
	create_list_file();

	cout << "Done.\nTotal #spectra extracted and converted to DAT: " << num_spectra_extracted << endl;
}


void DAT_Converter::convert_MZXML_to_DAT_only_annotated(Config* config, char *file_list, 
								char *annotations_file, char * _out_dir, char * _name, int _batch)
{
	vector< vector<int> >    annotation_idxs;
	vector<mzXML_annotation> annotations;

	read_mzXML_annotations(file_list,annotations_file, annotation_idxs, annotations);

	cout << "Read: " << annotations.size() << endl;
	QCPeak peak_buff[20000];
	float     dat_peak_buff[40000];

	name = _name;
	out_dir = _out_dir;
	batch = _batch;

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	read_paths_into_list(file_list,list);

	BasicSpecReader bsr;
	FileManager fm;
	FileSet all_spec_fs;
	
	int num_spectra_extracted=0;
	fm.init_from_list_file(config,file_list,annotation_idxs);
	all_spec_fs.select_all_files(fm,true);
	const int total_spectra = all_spec_fs.get_total_spectra();
	const vector<SingleSpectrumFile *>& all_ssf = all_spec_fs.get_ssf_pointers();
	
	int i;
	for (i=0; i<all_ssf.size(); i++)
	{
		MZXML_single *ssf = (MZXML_single *)all_ssf[i];
		mass_t m_over_z = ssf->m_over_z;
		float retention_time = ssf->retention_time;
		float precursor_intensity = ssf->precursor_intensity;
		int scan_number = ssf->scan_number;
		int file_idx = ssf->file_idx;
		int j,pos=0;

		if (annotation_idxs[file_idx][scan_number]<0)
			continue;

		int num_spec_peaks = bsr.read_basic_spec(config,fm,ssf,peak_buff);
		
		// copy peaks
		for (j=0; j<num_spec_peaks; j++)
		{
			dat_peak_buff[pos++]=(float)peak_buff[j].mass;
			dat_peak_buff[pos++]=(float)peak_buff[j].intensity;

//			cout << j << ":>  " << dat_peak_buff[pos-2] << " " << dat_peak_buff[pos-1] << endl;

		}

		
		// add spectrum
		int DAT_file_idx =  (int)(m_over_z/mass_increment);
		if (DAT_file_idx > max_dat_file_idx)
			DAT_file_idx = max_dat_file_idx;

		if (! dat_buffs[DAT_file_idx].ind_was_initialized)
		{
			ostringstream os;
			os << DAT_file_idx;
			string path = out_dir + "/" + name + os.str() + ".dat";
			dat_buffs[DAT_file_idx].init(path,dat_buff_size);
//			cout << "initialized : " << DAT_file_idx << endl;
		}


		dat_buffs[DAT_file_idx].add_spec_to_DAT_file(m_over_z, 
			0,file_idx,scan_number,retention_time,precursor_intensity,
			num_spec_peaks,(char *)dat_peak_buff);

		num_spectra_extracted++;
	}

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	cout << "Total spectra extracted and converted to DAT: " << num_spectra_extracted << endl;

	create_list_file();

}




void DAT_Converter::convert_MZXML_to_DAT_on_the_fly(Config* config, char *file_list, 
							char * _out_dir, char * _name, int _batch, 
							mass_t min_m_over_z, mass_t max_m_over_z, int file_start_idx)
{
	name = _name;
	out_dir = _out_dir;
	batch = _batch;

	int num_spectra_extracted=0;

	name = _name;
	out_dir = _out_dir;

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	read_paths_into_list(file_list,list);

	cout << endl << endl <<"Extracting spectra and writing dat files for " << list.size() << " Files. " << endl << endl;

	int i;
	for (i=0; i<list.size(); i++)
	{
		num_spectra_extracted+=parse_single_MZXML_file(config,list[i],file_start_idx+i,
			min_m_over_z,max_m_over_z);
	}
	

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	
	
	cout << "Total spectra extracted and converted to DAT: " << num_spectra_extracted << endl;

	create_list_file();
}


int DAT_Converter::convert_single_non_MZXML_file_to_DAT(Config* config, string file, 
							mass_t min_m_over_z, mass_t max_m_over_z, int file_idx)
{
	static QCPeak *peak_buff=NULL;    
	static float  *dat_peak_buff=NULL;

	if (! peak_buff)
	{
		peak_buff     = new QCPeak[20000];
		dat_peak_buff = new  float[40000];
	}

	if (! peak_buff || ! dat_peak_buff)
	{
		cout << "Error: couldn't allocate memory for DAT conversion!" << endl;
		exit(1);
	}

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	list.push_back(file);

	BasicSpecReader bsr;
	FileManager fm;
	FileSet all_spec_fs;

	fm.init_from_list(config,list,true,file_idx);
	all_spec_fs.select_all_files(fm);

	const int total_spectra = all_spec_fs.get_total_spectra();
	const vector<SingleSpectrumFile *>& all_ssf = all_spec_fs.get_ssf_pointers();

	cout << file_idx << " " << file << " ... " << endl;

//	string tmp_scan_path = out_dir + "/" + name + "_scans.txt";
//	ofstream scan_file(tmp_scan_path.c_str(),ios::out);

//	scan_file << file << endl;

	int num_spectra_extracted=0;
	int i;
	for (i=0; i<all_ssf.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssf[i];

		// no small spectra
		if (ssf->num_peaks<5 || ssf->m_over_z<=min_m_over_z || ssf->m_over_z > max_m_over_z)
			continue;

		int j,pos=0;
		int num_spec_peaks = bsr.read_basic_spec(config,fm,ssf,peak_buff,true);

		mass_t m_over_z = ssf->m_over_z;
		int scan_number=-1, file_idx=-1;
		float precursor_intensity=0;
		
		if (ssf->type == DTA)
		{
			file_idx = 0;
			scan_number=-1;
		}
		else if (ssf->type == MGF)
		{
			MGF_single *mgf_single = (MGF_single *)ssf;
			file_idx    = mgf_single->file_idx;
			scan_number = mgf_single->idx_in_file;	
		}
		else if (ssf->type == MZXML)
		{
			MZXML_single *mzxml_single = (MZXML_single *)ssf;
			file_idx = mzxml_single->file_idx;
			scan_number = mzxml_single->scan_number;
			precursor_intensity = mzxml_single->precursor_intensity;
		}
		else if (ssf->type == DAT)
		{
			DAT_single *dat_single = (DAT_single *)ssf;
			file_idx = dat_single->mzxml_file_idx;
			scan_number = dat_single->scan_number;
		}

		// copy peaks
		
		for (j=0; j<num_spec_peaks; j++)
		{
			dat_peak_buff[pos++]=(float)peak_buff[j].mass;
			dat_peak_buff[pos++]=(float)peak_buff[j].intensity;
		}

		
		// add spectrum
		int DAT_file_idx =  (int)(m_over_z/mass_increment);
		if (DAT_file_idx > max_dat_file_idx)
			DAT_file_idx = max_dat_file_idx;

		if (! dat_buffs[DAT_file_idx].ind_was_initialized)
		{
			ostringstream os,os_batch;
			os << DAT_file_idx;
			os_batch << batch;
			string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
			dat_buffs[DAT_file_idx].init(path,dat_buff_size);
		}

		int charge = ssf->charge;
		if (charge<0 || charge>1000)
			charge=0;

	
		dat_buffs[DAT_file_idx].add_spec_to_DAT_file(
			m_over_z,
			charge,
			file_idx,
			scan_number,ssf->retention_time,
			precursor_intensity,
			num_spec_peaks,(char *)dat_peak_buff);

		num_spectra_extracted++;
	}
	cout << num_spectra_extracted << " spectra..." << endl;
//	scan_file.flush();
//	scan_file.close();

	return num_spectra_extracted;
}



void DAT_Converter::convert_files_to_DAT_on_the_fly(Config* config, char *file_list, 
							char * _out_dir, char * _name, int _batch, 
							mass_t min_m_over_z, mass_t max_m_over_z, int file_start_idx)
{
	name = _name;
	out_dir = _out_dir;
	batch = _batch;

	int num_spectra_extracted=0;

	name = _name;
	out_dir = _out_dir;

	if (! ind_was_initialized)
	{
		cout << "Error: must initialize DAT_Converter!" << endl;
		exit(0);
	}

	vector<string> list;
	read_paths_into_list(file_list,list);

	cout << endl << endl <<"Extracting spectra and writing dat files for " << list.size() << " Files. " << endl << endl;

	int i;
	for (i=0; i<list.size(); i++)
	{
		int file_type = get_file_extension_type(list[i]);

		if (file_type == MZXML)
		{
			num_spectra_extracted += parse_single_MZXML_file(config,list[i],file_start_idx+i,
				min_m_over_z,max_m_over_z);
		}
		else
		{
			num_spectra_extracted += convert_single_non_MZXML_file_to_DAT(config,list[i],
										min_m_over_z,max_m_over_z,file_start_idx+i);
		}
	}
	

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	
	
	cout << "Total spectra extracted and converted to DAT: " << num_spectra_extracted << endl;

	create_list_file();
}





int DAT_Converter::parse_single_MZXML_file(Config *config, 
										   string& mzxml_name, 
										   int file_idx,
										   mass_t min_m_over_z, 
										   mass_t max_m_over_z)
{
	static char* Buffer = NULL;
    static char* DecodedPeakBuffer = NULL;
    static float* Peaks = NULL;
	static float* FilteredPeaks = NULL;
	static int PeakBufferSize = 0;
    int Trail;
    static char* PrecursorStr;
    int FloatIndex;
    char* ByteOrderStr;
    int ByteOrderLittle = 1;

    int BytesToRead;
    int BufferStartPos = 0;
    int BytesRead;
    int BufferEnd = 0;
    FILE* MZXMLFile;
    int ParseState = 0;
    int FilePos = 0;
    

    // allocate
	if (! Buffer)
		Buffer = (char*)calloc(XML_BUFFER_SIZE + 1, sizeof(char));

    MZXMLFile = fopen(mzxml_name.c_str(), "rb");
    if (!MZXMLFile)
    {
        cout << "Error: Can't open MZXML file " <<  mzxml_name << endl;
        exit(1);
    }



    printf("File idx %d , '%s'...\n",file_idx, mzxml_name.c_str());

	int idx_in_file = 0;
	int spec_counter =0;
	char *scan_start_ptr = NULL;
    while (1)
    {
		char* ScanStr;
		char* ScanNumberStr;
		int ScanNumber;
		char* MSLevelStr;
		int MSLevel;
		char *PrecursorStr;
		mass_t PrecursorMZ;
		char *retentionTimeStr;
		float retentionTime;
		char *precursorIntensityStr;
		float precursorIntensity;
		char* PeakStr;
		char* PeakBuffer;
		int  BufferPos;

        // Read more data, to fill up the buffer:
	
		if ( ! scan_start_ptr || 
			( (Buffer + BufferEnd - scan_start_ptr) < XML_BUFFER_HALF_SIZE) )
		{
			// try shunt half of the buffer
			if (scan_start_ptr)
			{
				if (BufferEnd - XML_BUFFER_HALF_SIZE>0)
				{
					memmove(Buffer, Buffer + XML_BUFFER_HALF_SIZE, BufferEnd - XML_BUFFER_HALF_SIZE);
					BufferEnd -= XML_BUFFER_HALF_SIZE;
					scan_start_ptr -= XML_BUFFER_HALF_SIZE;

//					cout << "MOVED!" << endl;
				}
			}
			else
				scan_start_ptr = Buffer;

			BytesToRead = XML_BUFFER_SIZE - BufferEnd;
			BytesRead = fread(Buffer + BufferEnd, sizeof(char), BytesToRead, MZXMLFile);

			if (BytesRead<5)
				break;

			BufferEnd += BytesRead;
			Buffer[BufferEnd] = '\0';

			FilePos += BytesRead;
		}

        // Look for a new <scan tag opening:
		// this scan cannot be done with strstr since there might be NULL termination
		const char *last_pos = Buffer + BufferEnd - 5;
		char *pos = scan_start_ptr;

		while (++pos<last_pos)
		{
			if (*pos != '<')
				continue;

			if (*(pos+1)=='s' && *(pos+2)=='c' && *(pos+3)=='a' && *(pos+4)=='n')
				break;
		}
		ScanStr =  (pos<last_pos) ? pos : NULL;

        if (ScanStr)
        {
            BufferPos = ScanStr - Buffer;
        }
        else
        {
            BufferPos = 0;
        }

        if (!ScanStr )
        {
			scan_start_ptr = Buffer + BufferEnd-5;
            continue;
        }

        ScanNumberStr = strstr(ScanStr, "num=");
        if (!ScanNumberStr)
        {
            printf("** Warning: mzXML parser encountered a scan with no scan number!  File %s Pos %d\n", 
				   mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

            ScanNumber = -1;
        }
        else
        {
            ScanNumber = ParseIntFromXML(ScanNumberStr);
        }

		retentionTimeStr = strstr(ScanStr,"retentionTime=\"PT");
		if (! retentionTimeStr)
		{
			printf("Error: mzXML parser encountered a scan with no retnetion time: File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
			cout <<"SCANSTR:"<<endl << ScanStr << endl;
            exit(1);
		}
		else
		{
			retentionTime = ParseMassFromXML(retentionTimeStr);
		}


		char *PeakCountStr = strstr(ScanStr, "peaksCount=\"");
		if (!PeakCountStr)
		{
			cout << "Error parsing peaks from mzxml! " << endl;
			exit(1);
		}
		int PeakCount = ParseIntFromXML(PeakCountStr);
		
        MSLevelStr = strstr(ScanStr, "msLevel=");
        if (!MSLevelStr)
        {
            printf("** Warning: mzXML parser encountered a scan with no scan level!  File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
            MSLevel = -1;
        }
        else
        {
            MSLevel = ParseIntFromXML(MSLevelStr);
        }

		precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
		if (! precursorIntensityStr)
		{
			if (MSLevel>1)
			{
				printf("Error: mzXML parser encountered a scan with no precursor intenisty: File %s Pos %d\n", 
					mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
				exit(1);
			}
		}
		else
		{
			precursorIntensity = ParseMassFromXML(precursorIntensityStr);
		}


		PrecursorStr = strstr(ScanStr, "<precursorMz");
		if (PrecursorStr)
		{
			PrecursorStr = strstr(PrecursorStr, ">");
			PrecursorMZ = ParseMassFromXML(PrecursorStr);
		}

		if (!PrecursorStr && MSLevel > 1)
		{
			printf("Error: mzXML parser encountered a scan with no m/z: File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
			exit(1);
		}
		
		//  check that this is a good spectrum to output
		if (MSLevel>1 && PeakCount>2 && PrecursorMZ>= min_m_over_z && PrecursorMZ <= max_m_over_z)
		{

			// read peaks

			PeakStr = strstr(PrecursorStr, "<peaks");
			if (PeakStr)
			{
				// Get byte order:
				ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
				if (ByteOrderStr)
				{
					ByteOrderStr += 11;
					if (!strncmp(ByteOrderStr, "network", 7))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "big", 3))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "little", 6))
					{
						ByteOrderLittle = 1;
					}
				}
				PeakStr = strstr(PeakStr, ">");
			}
			if (!PeakStr)
			{
				cout << "Error parsing peaks from mzxml! " << endl;
				return 0;
			}

			PeakStr++;
			PeakBuffer = PeakStr;

			if (PeakBufferSize < PeakCount)
			{
				if (DecodedPeakBuffer)
				{
					char *dbf = DecodedPeakBuffer;
					free(DecodedPeakBuffer);
					DecodedPeakBuffer = NULL;
					free(Peaks);
					Peaks = NULL;
					free(FilteredPeaks);
					FilteredPeaks=NULL;
				}
				PeakBufferSize = (int)(PeakCount*1.5);
				DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
				Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
				FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
			}
			
			Trail = (PeakCount % 3);
			if (!(PeakCount % 3))
			{
				PeakBuffer[PeakCount * 32/3] = '\0';
			}
			else
			{
				PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
			}
	
		//	cout << "dd " << spec_counter << " " << ScanNumber << endl;

			b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
			for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
			{
		#ifdef BYTEORDER_LITTLE_ENDIAN
				if (!ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#else
				if (ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#endif
			}


			// add spectrum
			int DAT_file_idx =  (int)(PrecursorMZ/mass_increment);
			if (DAT_file_idx > max_dat_file_idx)
				DAT_file_idx = max_dat_file_idx;

			if (! dat_buffs[DAT_file_idx].ind_was_initialized)
			{
				ostringstream os, os_batch;
				os << DAT_file_idx;
				os_batch << batch;
				string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
				dat_buffs[DAT_file_idx].init(path,dat_buff_size);
//				cout << "initialized : " << DAT_file_idx << endl;
			}

			// join and filter peaks

//			cout << "Wrote: " << ScanNumber << " " << PeakCount << " " <<PrecursorMZ << " " << DAT_file_idx << endl;

			int new_num_peaks = join_and_filter_peak_list(config,PrecursorMZ,Peaks,PeakCount,FilteredPeaks);

			dat_buffs[DAT_file_idx].add_spec_to_DAT_file(PrecursorMZ, 0, file_idx, ScanNumber, retentionTime,  
								 precursorIntensity, new_num_peaks, (char *)FilteredPeaks);

			spec_counter++;

			scan_start_ptr = PeakStr + 8 * PeakCount;

		}
		else
			scan_start_ptr = ScanStr +50;

	}



	fclose(MZXMLFile);
	cout << spec_counter << " spectra..." << endl << endl;

	return spec_counter;
}




