////////////////////////////////////////////////////////////////
//
// Copyright (C) 2005 Affymetrix, Inc.
//
// This program is free software; you can redistribute it and/or modify 
// it under the terms of the GNU General Public License (version 2) as 
// published by the Free Software Foundation.
// 
// This program is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License 
// along with this program; if not, write to the 
// 
// Free Software Foundation, Inc., 
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////


#ifdef WIN32
#pragma warning(disable:4786)
#endif //WIN32

#include <assert.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <string>
#include <utility>
#include "SnpData.h"

#define PAR_FILE_BUFFER_SIZE 256

using namespace std;
using namespace affxsnp;


/*! Run with all parameters from commandline*/
int process_args(int argc, char *argv[], struct opt *o);
void usage(char *argv[]);

/*! Run with parameter file */
int multi_marker_mode(const char* szParFile); // determin the run mode. single- or multi-
bool ReadParFile(  //this is the diploid single marker version of ReadParFile
	const char* szParFile,
	std::vector<std::pair<std::string,std::string> >& vped,
	std::vector<std::pair<std::string,std::string> >& vinfo,
	std::vector<std::string>& vname,
	opt2& o
);
bool ReadParFile(const char* szParFile,	Hopt& o); //this is the multi-marker version of ReadParFile
void BatchProcess( // single marker mode
	std::vector<std::pair<std::string,std::string> > vped,
	std::vector<std::pair<std::string,std::string> > vinfo,
	std::vector<std::string> vname,
	opt2& o,
	const int& nBins
);
void BatchProcess(Hopt& o, const int& nBins); // multiple marker mode
void ProcessResults(
	std::vector<std::vector<affxsnp::CSnpPair> > vresult,
	std::vector<std::string> vname,
	std::string resultFileBase,
	std::string ecdfFileBase,
	const int nBins
);
void PrintECDF(
	const std::vector<std::pair<double,double> >& ecdf,
	std::fstream& fs,
	const int& count
);

// the following are overloads of multi-marker mode subroutines
double TwoMarkerRsq (const HaplotypeData& target, const HaplotypeData& marker1, const HaplotypeData& marker2);
double ThreeMarkerRsq (const HaplotypeData& target, const HaplotypeData& marker1, const HaplotypeData& marker2,
					   const HaplotypeData& marker3);
void ProcessPanels(CSnpPanel& PanelHaplo, const Hopt& o);
double CalculateRS(const HaplotypeData& target,const HaplotypeData& MewMarker);


int main(int argc, char *argv[]) {

	if(argc == 2) //users specify all setting in a single parameter file
	{
		cout << "Reading par file from " << argv[1] << endl;
		// need to determine whether the run is in single- or multi- marker mode
		// multi_marker_mode(argv[1])=0, if diploid single marker mode; 
		// multi_marker_mode(argv[1])=1, if multi_marker mode
		
		if (multi_marker_mode(argv[1])==0)
		{
			// do diploid single marker stuff
			cout << "Program now runs in single marker mode." << endl;
			//struct opt o;
			opt2 o2;
			std::vector<std::pair<std::string,std::string> > vped, vinfo;
			std::vector<std::string> vname;
			cout << "Reading par file ... " << endl;
			if(!ReadParFile(argv[1], vped, vinfo, vname, o2))
			{
				cout << "Invalid par file!!" << endl;
				return 1;
			}
			BatchProcess(vped,vinfo,vname,o2,100);
			exit(EXIT_SUCCESS);
			return 0;
		}

		// do multiple marker stuff
		cout << "Program now runs in multiple marker mode." << endl;
		struct Hopt o;
		if(!ReadParFile(argv[1],o))
		{
			cout << "Invalid par file!!" << endl;
			return 1;
		}
		BatchProcess(o,100);
		exit(EXIT_SUCCESS);
		return 0;
	}
	
	/* In case of command-line arguments 
	   Only the diploid single marker mode uses command line arguments */
	cout << "Program now runs in single marker mode." << endl;
	struct opt o;
	opt2 o2;
	if(process_args(argc,argv,&o)==EXIT_FAILURE) {
		cerr << argv[0] << ": problem processing command-line args\n";
		exit(EXIT_FAILURE);
	} else if(o.help) {
		usage(argv);
		exit(EXIT_SUCCESS);
	}
	
	if(o.verbose)
		cout << "Reading first SNP panel from " << o.pedFile1.c_str() << "\n";
	affxsnp::CSnpPanel ref;
	ref.Read(o.pedFile1,o.infoFile1);
	std::vector<int> refHighMAF;
	std::vector<int> refLowMAF;
	int nFiltered_ref = ref.highMAF(o.maf1,refHighMAF,refLowMAF);
	if(o.verbose) {
		cout << "  Ignoring " << nFiltered_ref << " SNPs with MAF < " << o.maf1 << "\n";
		cout << "  " << refHighMAF.size() << " SNPs remain\n";
	}
	ref.dropSNPs(refLowMAF);
	
	if(o.verbose)
		cout << "Reading second SNP panel from " << o.pedFile2.c_str() << "\n";
	affxsnp::CSnpPanel test;
	test.Read(o.pedFile2,o.infoFile2);
	std::vector<int> testHighMAF;
	std::vector<int> testLowMAF;
	int nFiltered_test = test.highMAF(o.maf2,testHighMAF,testLowMAF);
	if(o.verbose) {
		cout << "  Ignoring " << nFiltered_test << " SNPs with MAF < " << o.maf2 << "\n";
		cout << "  " << testHighMAF.size() << " SNPs remain\n";
	}
	test.dropSNPs(testLowMAF);
	
	// Phase both panels
	ref.PhaseTrios();
	test.PhaseTrios();
	
	cout << "Computing r2\n";
	std::vector<affxsnp::CSnpPair> result;
	affxsnp::AllPairwiseLDBetweenPanels(&ref,&test,o.window,o.skipSelf,o.freePass,result);
	std::fstream fs("sty_chr10_maxr2.txt",ios::out);
	unsigned int i;
	for(i=0; i < result.size(); i++) 
	{
		fs << std::setiosflags(ios::fixed);
		fs << std::setprecision(0);
		fs << result[i].GetSnpID1() << "\t" << result[i].GetPos1() << "\t" << result[i].GetSnpID2() << "\t" << result[i].GetPos2();
		fs << std::setprecision(4);
		fs << "\t" << result[i].GetR2() << "\n";
	}
	fs.close();
	
	unsigned int nBins = 100;
	std::vector<std::pair<double,double> > ecdf(nBins);
	double step = 0.01;
	for(i = 0; i < nBins; i ++)
	{
		ecdf[i].first = i*step;
		ecdf[i].second = 0;
	}
	for(i=0; i < result.size() ; i++) 
	{
		double r2 = result[i].GetR2();
		for(int j = nBins - 1; j >= 0; j --)
		{
			if(r2 > ecdf[j].first)
			{
				ecdf[j].second += 1;
				break;
			}
		}
	}
	
	std::fstream fs2("sty_chr10_maxr2_ecdf.txt",ios::out);
	double cum = 0;
	for(i=0; i < nBins; i++) 
	{
		fs2 << std::setiosflags(ios::fixed);
		fs2 << std::setprecision(2);
		fs2 << ecdf[i].first;
		fs2 << std::setprecision(4);
		cum += ecdf[i].second /result.size();
		fs2 << "\t" << ecdf[i].second /result.size() << "\t" << cum << "\n";	
	}
	fs2.close();
	exit(EXIT_SUCCESS);
	
	return 0;
}

void ProcessResults(
	std::vector<std::vector<affxsnp::CSnpPair> > vresult,
	std::vector<std::string> vname,
	std::string resultFileBase,
	std::string ecdfFileBase,
	const int nBins
)
{
	std::vector<std::pair<double,double> > ecdf(nBins), subecdf(nBins);
	double step = 1/double(nBins);
	long count = 0;
	unsigned int i;
	for(i = 0; i < (unsigned int) nBins; i ++)
	{
		ecdf[i].first = i*step;
		ecdf[i].second = 0;

		subecdf[i].first = i*step;
		subecdf[i].second = 0;
	}

	unsigned int j;
	for(j = 0; j < vresult.size(); j ++)
	{
		std::string resultFile = resultFileBase + "." + vname[j] + ".lst.txt";
		std::fstream resultFH(resultFile.c_str(),ios::out);
		std::string ecdfFile = ecdfFileBase + "." + vname[j] + ".ecdf.txt";
		std::fstream ecdfFH(ecdfFile.c_str(),ios::out);

		for(int l = 0; l < nBins; l ++)
			subecdf[l].second = 0;
		for(i=0; i < vresult[j].size(); i++) 
		{
			resultFH << std::setiosflags(ios::fixed);
			resultFH << std::setprecision(0);
			resultFH << vresult[j][i].GetSnpID1() << "\t" << vresult[j][i].GetPos1() << "\t" << vresult[j][i].GetSnpID2() << "\t" << vresult[j][i].GetPos2();
			resultFH << std::setprecision(4);
			resultFH << "\t" << vresult[j][i].GetR2() << "\n";

			double r2 = vresult[j][i].GetR2();
			for(int k = nBins - 1; k >= 0; k --)
			{
				if(r2 > ecdf[k].first)
				{
					ecdf[k].second += 1;
					subecdf[k].second += 1;
					break;
				}
			}
			count++;
		}

		PrintECDF(subecdf, ecdfFH, i);

		resultFH.close();
		ecdfFH.close();
	}

	std::string ecdfFile = ecdfFileBase + ".total.ecdf.txt";
	std::fstream ecdfFH(ecdfFile.c_str(),ios::out);
	PrintECDF(ecdf, ecdfFH, count);
	ecdfFH.close();

	return;
}

void PrintECDF(const std::vector<std::pair<double,double> >& ecdf, std::fstream& fs, const int& count)
{
	double cum = 0;
	fs << "r^2\tPDF\tCDF\t1-CDF\n";
	unsigned int i;
	for(i=0; i < ecdf.size(); i++)
	{
		fs << std::setiosflags(ios::fixed);
		fs << std::setprecision(2);
		fs << ecdf[i].first;
		fs << std::setprecision(4);
		double d = ecdf[i].second /count;
		cum += d;
		fs << "\t" << d << "\t" << cum << "\t" << 1- cum << "\n";	
	}
}


void BatchProcess( // diploid single marker mode
	std::vector<std::pair<std::string,std::string> > vped,
	std::vector<std::pair<std::string,std::string> > vinfo,
	std::vector<std::string> vname,
	opt2& o,
	const int& nBins)
{
	std::vector<std::vector<affxsnp::CSnpPair> > vresult(vped.size());
	unsigned int k;
	for(k = 0; k < vped.size(), k < vinfo.size(); k ++)
	{
		// the reference SNP panel
		if(o.verbose) {
			cout << endl;
			cout << "Processing " << vname[k] << endl;
			cout << "  Reading ref SNP panel from" << endl;
			cout << "    Ped:  " << vped[k].first.c_str() << endl;
			cout << "    Info: " << vinfo[k].first.c_str() << endl;
		}
		affxsnp::CSnpPanel ref;
		ref.Read(vped[k].first,vinfo[k].first);
		std::vector<int> refHighMAF;
		std::vector<int> refLowMAF;
		int nFiltered_ref = ref.highMAF(o.maf1,refHighMAF,refLowMAF);
		if(o.verbose) {
			cout << "  Ignoring " << nFiltered_ref << " SNPs with MAF < " << o.maf1 << "\n";
			cout << "  " << refHighMAF.size() << " SNPs remain\n";
		}

		// the genotyped SNP panel
		if(o.verbose) {
			cout << "  Reading test SNP panel from" << endl;
			cout << "    Ped:  " << vped[k].second.c_str() << endl;
			cout << "    Info: " << vinfo[k].second.c_str() << endl;
		}
		affxsnp::CSnpPanel test;
		test.Read(vped[k].second,vinfo[k].second);
		std::vector<int> testHighMAF;
		std::vector<int> testLowMAF;
		int nFiltered_test = test.highMAF(o.maf2,testHighMAF,testLowMAF);
		if(o.verbose) {
			cout << "  Ignoring " << nFiltered_test << " SNPs with MAF < " << o.maf2 << "\n";
			cout << "  " << testHighMAF.size() << " SNPs remain\n";
		}

		if(o.verbose)
			cout << "  Computing r2" << endl;
		affxsnp::vPairDouble ecdf;
		//affxsnp::ProcessLDBetweenPanels(ref,test,o.window,refHighMAF,ecdf,&vresult[k],o.skipSelf,o.freePass,o.AllPairRsq);
		affxsnp::ProcessLDBetweenPanels(ref,test,o,refHighMAF,ecdf,&vresult[k],vname[k]);
	}

	ProcessResults(vresult, vname, o.resultFileBase, o.ecdfFileBase, nBins);

	return;
}

bool ReadParFile( // diploid single marker verion
	const char* szParFile,
	std::vector<std::pair<std::string,std::string> >& vped,
	std::vector<std::pair<std::string,std::string> >& vinfo,
	std::vector<std::string>& vname,
	opt2& o) 
{
	int int_to_bool;
	FILE* fp = fopen(szParFile, "r");
	char szRef[PAR_FILE_BUFFER_SIZE], szTest[PAR_FILE_BUFFER_SIZE],szTemp[PAR_FILE_BUFFER_SIZE];
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "window=%lf", &o.window) != 1) return false;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "maf1=%lf", &o.maf1) != 1) return false;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "maf2=%lf", &o.maf2) != 1) return false;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "verbose=%d", &int_to_bool) != 1) return false;
	o.verbose = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "skipself=%d", &int_to_bool) != 1) return false;
	o.skipSelf = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "freepass=%d", &int_to_bool) != 1) return false;
	o.freePass = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "AllPairRsq=%d", &int_to_bool) != 1) return false;
	o.AllPairRsq = (int_to_bool == 0) ? false : true;

	memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "ResultFileBase=%s", szRef) != 1) return false;
	o.resultFileBase = szRef;
	memset(szTemp,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "ECDFFileBase=%s",szRef) != 1) return false;
	o.ecdfFileBase = szRef;

	int number = 0;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "number=%d", &number) != 1) return false;
	int i;
	for(i = 0; i < number; i++)
	{
		memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
		fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
		if(sscanf(szTemp, "%s", szRef) != 1) return false;		
		vname.push_back(std::string(szRef));
	}
	for(i = 0; i < number; i++)
	{
		memset(szRef,'\0',PAR_FILE_BUFFER_SIZE);
		memset(szTest,'\0',PAR_FILE_BUFFER_SIZE);
		fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
		if(sscanf(szTemp, "%s %s", szRef, szTest) != 2) return false;		
		vped.push_back(std::make_pair(std::string(szRef),std::string(szTest)));
	}
	for(i = 0; i < number; i++)
	{
		memset(szRef,'\0',PAR_FILE_BUFFER_SIZE);
		memset(szTest,'\0',PAR_FILE_BUFFER_SIZE);
		fgets(szTemp,PAR_FILE_BUFFER_SIZE,fp);
		if(sscanf(szTemp, "%s %s", szRef, szTest) != 2) return false;		
		vinfo.push_back(std::make_pair(std::string(szRef),std::string(szTest)));
	}

	assert(vinfo.size() == vped.size());
	assert(vinfo.size() == vname.size());

	fclose(fp);

	return true;
}

bool ReadParFile(const char* szParFile,Hopt& o){ // multi-marker version
	int int_to_bool;
	FILE* fp = fopen(szParFile, "r");
	char szRef[PAR_FILE_BUFFER_SIZE], szTest[PAR_FILE_BUFFER_SIZE], szTemp[PAR_FILE_BUFFER_SIZE];
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "window=%lf", &o.window) != 1) return false;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "maf=%lf", &o.maf) != 1) return false;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "verbose=%d", &int_to_bool) != 1) return false;
	o.verbose = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "OneMarker=%d", &int_to_bool) != 1) return false;
	o.OneMarker = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "TwoMarker=%d", &int_to_bool) != 1) return false;
	o.TwoMarker = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "ThreeMarker=%d", &int_to_bool) != 1) return false;
	o.ThreeMarker = (int_to_bool == 0) ? false : true;
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "TwoPanelCoverage=%d", &int_to_bool) != 1) return false;
	o.TwoPanelCoverage = (int_to_bool == 0) ? false : true;

	memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "format=%s", szRef) != 1) return false;
	o.format = szRef;
	
	memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "ResultFileBase=%s", szRef) != 1) return false;
	o.resultFileBase = szRef;

	memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "%s", szRef) != 1) return false;		
	o.pedFile = szRef;

	memset(szRef,'\0', PAR_FILE_BUFFER_SIZE);
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp);
	if(sscanf(szTemp, "%s", szRef) != 1) return false;		
	o.infoFile = szRef;

	fclose(fp);
	return true;
}

int multi_marker_mode(const char* szParFile){
	FILE* fp = fopen(szParFile, "r");
	char szTemp[PAR_FILE_BUFFER_SIZE];
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp); //"window" line
	fgets(szTemp, PAR_FILE_BUFFER_SIZE,fp); //2nd line;
	if (szTemp[3]=='1'){
		return 0; //single marker mode
	}
	else {
		return 1; //multiple marker mode
	}
}

/* Print usage info to STDOUT */
void usage(char *argv[]) {
  cout << "\n";
  cout << argv[0] << "\n  Analyzes the extent to which a panel of SNP markers covers a reference set.\n";
  cout << "USAGE:\n  [options]\n";
  cout << "OPTIONS:\n";
  cout << "  -h                  Print usage information\n";
  cout << "  -pedFile1 <file>    Specify .ped (linkage format) file for panel 1.\n";
  cout << "  -info1 <file>       Specify .info (linkage format) file for panel 1.\n";
  cout << "  -pedFile2 <file>    Specify .ped (linkage format) file for panel 2.\n";
  cout << "  -info2 <file>       Specify .info (linkage format) file for panel 2.\n";
  cout << "  -window <win>       Analyze SNPs within a distance <win> of each other.\n";
  cout << "  -maf1 <maf1>       Filter out SNPs with maf < maf1 in panel 1 (default is 0).\n";
  cout << "  -maf2 <maf2>       Filter out SNPs with maf < maf2 in panel 2 (default is 0).\n";
  cout << "\n";
}

/* Processes command-line options */
int process_args(int argc, char *argv[], struct opt *o) {
  int i;
  char *thisarg;

  /* set defaults */
  o->help = 0;
  o->pedFile1 = "";
  o->pedFile2 = "";
  o->infoFile1 = "";
  o->infoFile2 = "";
  o->window = 100000;
  o->maf1 = 0;
  o->maf2 = 0;
  o->verbose = false;
  o->skipSelf = false;
  o->freePass = false;

  /* read args */
  for (i=1; i<argc; i++) {
    thisarg = argv[i];
    if(thisarg[0] == '-') {
      if (!(strcmp(thisarg+1,"h") && strcmp(thisarg+1,"help"))) {
        o->help = 1;
      } else if (!strcmp(thisarg+1,"pedFile1")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -pedFile1 option\n";
          return(EXIT_FAILURE);
        } else {
          o->pedFile1 = argv[i];
        }
      } else if (!strcmp(thisarg+1,"pedFile2")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -pedFile2 option\n";
          return(EXIT_FAILURE);
        } else {
          o->pedFile2 = argv[i];
        }
      } else if (!strcmp(thisarg+1,"info1")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -info1 option\n";
          return(EXIT_FAILURE);
        } else {
          o->infoFile1 = argv[i];
        }
      } else if (!strcmp(thisarg+1,"info2")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -info2 option\n";
          return(EXIT_FAILURE);
        } else {
          o->infoFile2 = argv[i];
        }
      } else if (!strcmp(thisarg+1,"window")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -window option\n";
          return(EXIT_FAILURE);
        } else {
          o->window = atof(argv[i]);
        }
      } else if (!strcmp(thisarg+1,"maf1")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -maf1 option\n";
          return(EXIT_FAILURE);
        } else {
          o->maf1 = atof(argv[i]);
        }
      } else if (!strcmp(thisarg+1,"maf2")) {
        i++;
        if (i>=argc) {
          cerr << "must provide a filename with -maf2 option\n";
          return(EXIT_FAILURE);
        } else {
          o->maf2 = atof(argv[i]);
        }
      } else if (!strcmp(thisarg+1,"skipSelf")) {
        o->skipSelf = true;
      } else if (!strcmp(thisarg+1,"freePass")) {
        o->freePass = true;
      } else if (!strcmp(thisarg+1,"v")) {
        o->verbose = true;
      } else {
        cerr << "ERROR: " << argv[0] << ": unrecognized option: " << thisarg << "\n";
        return(EXIT_FAILURE);
      }
    }
  }
  if(o->pedFile1 == "") {
	cerr << "Must specify .ped file with -pedFile1 option\n";
	exit(EXIT_FAILURE);
  }
  if(o->pedFile2 == "") {
	cerr << "Must specify .ped file with -pedFile2 option\n";
	exit(EXIT_FAILURE);
  }
  if(o->infoFile1 == "") {
	cerr << "Must specify .ped file with -infoFile1 option\n";
	exit(EXIT_FAILURE);
  }
  if(o->infoFile2 == "") {
	cerr << "Must specify .ped file with -infoFile2 option\n";
	exit(EXIT_FAILURE);
  }

  return(EXIT_SUCCESS);
}

void BatchProcess(Hopt& o, const int& nBins) //multiple marker mode
{
	if(o.verbose) {
		cout << endl;
		//cout << "Processing " << o.pedFile << endl;
		cout << "  Reading ref SNP panel from" << endl;
		cout << "    Haps:  " << o.pedFile << endl;
		cout << "    Info: " << o.infoFile << endl;
	}
		
	affxsnp::CSnpPanel PanelHaplo;
	if (o.format=="Broad"){
		PanelHaplo.BroadRead(o.pedFile,o.infoFile);
	}
	else if (o.format=="Oxford"){
		PanelHaplo.OxfordRead(o.pedFile,o.infoFile);
	}
	else {
		throw "Error in reading format from par file";
	}

	std::vector<int> HighMAF;
	std::vector<int> LowMAF;
	int nFiltered = PanelHaplo.highMAF(o.maf,HighMAF,LowMAF);
	if(o.verbose) {
		cout << "  Ignoring " << nFiltered << " SNPs with MAF < " << o.maf << "\n";
		cout << "  " << HighMAF.size() << " SNPs remain\n";
	}

	// remove the low MAF SNP and sort the SNP by position
	std::vector<HaplotypeData> temp;
	temp.reserve(HighMAF.size());
	for(int i=0;i<HighMAF.size();i++)
	{
		temp.push_back(PanelHaplo.vhaplo[HighMAF[i]]);
	}
	std::sort(temp.begin(), temp.end(), CHaploLess());
	PanelHaplo.vhaplo.resize(HighMAF.size());
	//std::copy(temp.begin(), temp.end(), back_inserter(PanelHaplo.vhaplo));
	std::copy(temp.begin(), temp.end(), PanelHaplo.vhaplo.begin());

	if(o.verbose)
		cout << "  Computing r2" << endl;

	ProcessPanels(PanelHaplo,o);
	return;
}

void ProcessPanels(CSnpPanel& PanelHaplo, const Hopt& o)
{
	std::fstream fs(o.resultFileBase.c_str(),ios::out);
	fs << "TargetSNP" << "\t" << "Position" << "\t";
	fs << "PredictorOne" << "\t" << "Position" << "\t";
	if (o.ThreeMarker){
		fs << "PredictorTwo" << "\t" << "Position" << "\t";
		fs << "PredictorThree" << "\t" << "Position" << "\t" << "r^2"<<"\n";
	}
	else{
		fs << "PredictorTwo" << "\t" << "Position" << "\t" << "r^2"<<"\n";
	}

	for (int i=0;i<PanelHaplo.vhaplo.size();i++) 
	{
		//determine the boundary of the window
		if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[i].GetTarget())){
			continue;
		}
		double temp_rsq=0; double rsq=0;
		int left=i; int right=i; int flag=0;
		std::string predictor1_ID, predictor2_ID, predictor3_ID; 
		long predictor1_pos, predictor2_pos, predictor3_pos ;
		while (1) //determine the left bound 
		{
			if (PanelHaplo.vhaplo[i].GetPos()-PanelHaplo.vhaplo[left].GetPos()>o.window) {left++; break;}
			if (left==0) {break;}
			left--;
		}
		while (1) //determine the right bound
		{
			if (right==PanelHaplo.vhaplo.size()) {right--; break;}
			if (PanelHaplo.vhaplo[right].GetPos()-PanelHaplo.vhaplo[i].GetPos()>o.window) {right--; break;}
			right++;
		}

		// scan the window and record the largest single marker r^2; OneMarker case
		if (o.OneMarker)
		{
			for (int idx1=left; idx1<right; idx1++)
			{
				if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx1].GetPredictor())){
					continue;
				}

				if (idx1==i) { // this target SNP is directly genotyped
					rsq=1;
					predictor1_ID = PanelHaplo.vhaplo[idx1].GetSnpID();
					predictor1_pos = PanelHaplo.vhaplo[idx1].GetPos();
					flag=1;
					break;
				}

				double temp_rsq = CalculateRS(PanelHaplo.vhaplo[i],PanelHaplo.vhaplo[idx1]);
				if (temp_rsq>rsq) {
					rsq=temp_rsq;
					predictor1_ID = PanelHaplo.vhaplo[idx1].GetSnpID();
					predictor1_pos = PanelHaplo.vhaplo[idx1].GetPos();
					flag=1;	
				}
			}
		}

		// scan the window and record the largest multiple marker r^2; TwoMarker case
		if ((o.TwoMarker) &&(rsq<1) )
		{
			for (int idx1=left; idx1<right; idx1++)
			{
				if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx1].GetPredictor())){
					continue;
				}
				for (int idx2=idx1+1; idx2<=right; idx2++)
				{
					if (idx2==i) {continue;}
					if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx2].GetPredictor())){
						continue;
					}
					temp_rsq = TwoMarkerRsq(PanelHaplo.vhaplo[i],PanelHaplo.vhaplo[idx1],PanelHaplo.vhaplo[idx2]);
					if (temp_rsq>rsq) {
						rsq=temp_rsq;
						predictor1_ID = PanelHaplo.vhaplo[idx1].GetSnpID();
						predictor2_ID = PanelHaplo.vhaplo[idx2].GetSnpID();	
						predictor1_pos = PanelHaplo.vhaplo[idx1].GetPos();
						predictor2_pos = PanelHaplo.vhaplo[idx2].GetPos();
						flag=2;
					}
				}
			}
		}

		// scan the window and record the largest multiple marker r^2; ThreeMarker case
		if ((o.ThreeMarker)&&(rsq<1))
		{
			for (int idx1=left; idx1<right; idx1++)
			{
				if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx1].GetPredictor())){
					continue;
				}
				for (int idx2=idx1+1; idx2<=right; idx2++)
				{
					//if (idx2==i) {continue;}
					if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx2].GetPredictor())){
						continue;
					}
					for (int idx3=idx2+1; idx3<=right; idx3++)
					{
						if ((o.TwoPanelCoverage)&&(!PanelHaplo.vhaplo[idx3].GetPredictor())){
							continue;
						}
						temp_rsq = ThreeMarkerRsq(PanelHaplo.vhaplo[i],PanelHaplo.vhaplo[idx1],PanelHaplo.vhaplo[idx2],PanelHaplo.vhaplo[idx3]);
						if (temp_rsq>rsq) {
							rsq=temp_rsq;
							predictor1_ID = PanelHaplo.vhaplo[idx1].GetSnpID();
							predictor2_ID = PanelHaplo.vhaplo[idx2].GetSnpID();	
							predictor3_ID = PanelHaplo.vhaplo[idx3].GetSnpID();
							predictor1_pos = PanelHaplo.vhaplo[idx1].GetPos();
							predictor2_pos = PanelHaplo.vhaplo[idx2].GetPos();
							predictor3_pos = PanelHaplo.vhaplo[idx3].GetPos();
							flag=3;
						}
					}
				}
			}
		}

		//output results to file
		fs << PanelHaplo.vhaplo[i].GetSnpID() << "\t" << PanelHaplo.vhaplo[i].GetPos() << "\t";
		if (flag==3)
		{
			fs << predictor1_ID << "\t" << predictor1_pos << "\t";
			fs << predictor2_ID << "\t" << predictor2_pos << "\t";
			fs << predictor3_ID << "\t" << predictor3_pos << "\t";
		}
		else if (flag==2)
		{
			fs << predictor1_ID << "\t" << predictor1_pos << "\t";
			fs << predictor2_ID << "\t" << predictor2_pos << "\t";
			if (o.ThreeMarker){
				fs << "No need of 3nd predictor" << "\t" << "NA" << "\t";
			}
		}
		else if (flag==1)
		{
			fs << predictor1_ID << "\t" << predictor1_pos << "\t";
			fs << "No need of 2nd predictor" << "\t" << "NA" << "\t";
			if (o.ThreeMarker){
				fs << "No need of 3nd predictor" << "\t" << "NA" << "\t";
			}
		}
		else
		{
			fs << "No enough neighbor" << "\t" << "NA" << "\t";
			fs << "No enough neighbor" << "\t" << "NA" << "\t";
			if (o.ThreeMarker){
				fs << "No enough neighbor" << "\t" << "NA" << "\t";
			}
		}
		fs << rsq << "\n";
	}
	fs.close();
	return;
}

double TwoMarkerRsq (const HaplotypeData& target, const HaplotypeData& marker1, const HaplotypeData& marker2)
{
	//std::vector<char> Marker1Allele;
	//std::vector<char> Marker2Allele;
	//Marker1Allele.push_back(marker1.GetAllele1()); Marker1Allele.push_back(marker1.GetAllele2());
	//Marker2Allele.push_back(marker2.GetAllele1()); Marker2Allele.push_back(marker2.GetAllele2());

	char Marker1Allele[2];
	Marker1Allele[0] = marker1.GetAllele1(); Marker1Allele[1] = marker1.GetAllele2();
	char Marker2Allele[2];
	Marker2Allele[0] = marker2.GetAllele1(); Marker2Allele[1] = marker2.GetAllele2();

	double rsq=0;
	for (int i=0; i<2; i++)
	{
		for (int j=0; j<2; j++)
		{
			// (1) construct a new SNP
			HaplotypeData NewMarker;
			NewMarker.ReSize(target.GetNsample());
			int Nsample=target.GetNsample();
			for (unsigned int s=0; s<Nsample; s++)
			{
				char a11=marker1.GetHaplo()[s].GetHap1();
				char a12=marker1.GetHaplo()[s].GetHap2();
				char a21=marker2.GetHaplo()[s].GetHap1();
				char a22=marker2.GetHaplo()[s].GetHap2();
				char NewAllele1='2'; char NewAllele2='2'; 
				if ((a11=='0') || (a12=='0'))
				{
					NewMarker.SetHaplo(s,'0','0'); continue;
				}
				if ((a21=='0') || (a22=='0'))
				{
					NewMarker.SetHaplo(s,'0','0'); continue;
				}
				if ((a11==Marker1Allele[i]) && (a21==Marker2Allele[j]))
				{
					NewAllele1='1';
				}
				if ((a12==Marker1Allele[i]) && (a22==Marker2Allele[j]))
				{
					NewAllele2='1';
				}
				NewMarker.SetHaplo(s,NewAllele1,NewAllele2);
			}

			// (2) calculate the r^2 between NewMarker and target
			NewMarker.SetAlleles();
			double temp =CalculateRS(target,NewMarker);
			if (temp>rsq) {rsq=temp;}	
		}
	}
	return rsq;
}

double ThreeMarkerRsq (const HaplotypeData& target, const HaplotypeData& marker1, const HaplotypeData& marker2, const HaplotypeData& marker3)
{
	//std::vector<char> Marker1Allele;
	//std::vector<char> Marker2Allele;
	//Marker1Allele.push_back(marker1.GetAllele1()); Marker1Allele.push_back(marker1.GetAllele2());
	//Marker2Allele.push_back(marker2.GetAllele1()); Marker2Allele.push_back(marker2.GetAllele2());

	char Marker1Allele[2];
	Marker1Allele[0] = marker1.GetAllele1(); Marker1Allele[1] = marker1.GetAllele2();
	char Marker2Allele[2];
	Marker2Allele[0] = marker2.GetAllele1(); Marker2Allele[1] = marker2.GetAllele2();
	char Marker3Allele[2];
	Marker3Allele[0] = marker3.GetAllele1(); Marker3Allele[1] = marker3.GetAllele2();

	double rsq=0;
	for (int i=0; i<2; i++) // marker#1
	{
		for (int j=0; j<2; j++) // marker#2
		{
			for (int k=0; k<2; k++) // marker#3
			{
				// (1) construct a new SNP
				HaplotypeData NewMarker;
				NewMarker.ReSize(target.GetNsample());
				int Nsample=target.GetNsample();
				for (unsigned int s=0; s<Nsample; s++)
				{
					char a11=marker1.GetHaplo()[s].GetHap1();
					char a12=marker1.GetHaplo()[s].GetHap2();
					char a21=marker2.GetHaplo()[s].GetHap1();
					char a22=marker2.GetHaplo()[s].GetHap2();
					char a31=marker3.GetHaplo()[s].GetHap1();
					char a32=marker3.GetHaplo()[s].GetHap2();

					char NewAllele1='2'; char NewAllele2='2'; 
					// if any of the three predictors SNP contain missing data
					// the new marker will have missing data
					if ((a11=='0') || (a12=='0'))
					{
						NewMarker.SetHaplo(s,'0','0'); continue;
					}
					if ((a21=='0') || (a22=='0'))
					{
						NewMarker.SetHaplo(s,'0','0'); continue;
					}
					if ((a31=='0') || (a32=='0'))
					{
						NewMarker.SetHaplo(s,'0','0'); continue;
					}

					if ((a11==Marker1Allele[i]) && (a21==Marker2Allele[j]) && (a31==Marker3Allele[k]) )
					{
						NewAllele1='1';
					}
					if ((a12==Marker1Allele[i]) && (a22==Marker2Allele[j]) && (a32==Marker3Allele[k]) )
					{
						NewAllele2='1';
					}
					NewMarker.SetHaplo(s,NewAllele1,NewAllele2);
				}

				// (2) calculate the r^2 between NewMarker and target
				NewMarker.SetAlleles();
				double temp =CalculateRS(target,NewMarker);
				if (temp>rsq) {rsq=temp;}	
			}
		}
	}
	return rsq;
}

double CalculateRS(const HaplotypeData& marker1,const HaplotypeData& marker2)
{
	/* Read data to determine how many of each haplotype we have */
	char m11 = marker1.GetAllele1();
	char m12 = marker1.GetAllele2();
	char m21 = marker2.GetAllele1();
	char m22 = marker2.GetAllele2();

	std::vector<Haplotype> haplo1 = marker1.GetHaplo();
	std::vector<Haplotype> haplo2 = marker2.GetHaplo();
	int nAB=0;
	int nAb=0;
	int naB=0;
	int nab=0;
	//int nDH=0; // DH = DoubleHet
	int nN = 0;
	int Nsample=marker1.GetNsample();
	for (unsigned int s=0; s<Nsample; s++)
	{
		char a11=marker1.GetHaplo()[s].GetHap1();
		char a12=marker1.GetHaplo()[s].GetHap2();
		char a21=marker2.GetHaplo()[s].GetHap1();
		char a22=marker2.GetHaplo()[s].GetHap2();
		if((a11 == '0') || (a12 == '0') || (a21 == '0') || (a22 == '0')) {
			// There is at least one N
			// HaploView just ignores it, though it seems one can try extract more info.  However for consistency with Haploview we'll do the same thing...
			nN += 2;
			continue;
		} 
		if ((a11 == m11) && (a21 == m21)) {nAB++;}
		if ((a11 == m11) && (a21 == m22)) {nAb++;}
		if ((a11 == m12) && (a21 == m21)) {naB++;}
		if ((a11 == m12) && (a21 == m22)) {nab++;}

		if ((a12 == m11) && (a22 == m21)) {nAB++;}
		if ((a12 == m11) && (a22 == m22)) {nAb++;}
		if ((a12 == m12) && (a22 == m21)) {naB++;}
		if ((a12 == m12) && (a22 == m22)) {nab++;}	
	}
	double nChrom = nAB + nAb + naB + nab;

	/* If we have missing data (i.e. un-phased double hets) then use EM algorithm. */
	double pAB;
	double pAb;
	double paB;
	double pab;

	pAB = ((double)nAB) / nChrom;
	pAb = ((double)nAb) / nChrom;
	paB = ((double)naB) / nChrom;
	pab = ((double)nab) / nChrom;

	double pA = pAB + pAb;
	double pB = pAB + paB;
	double pa = paB + pab;
	double pb = pAb + pab;

	double D = pAB*pab - pAb*paB;
	double r2 = D*D /(pA*pa*pB*pb);

	return r2;
}


