////////////////////////////////////////////////////////////////
//
// Copyright (C) 2005 Affymetrix, Inc.
//
// This program is free software; you can redistribute it and/or modify 
// it under the terms of the GNU General Public License (version 2) as 
// published by the Free Software Foundation.
// 
// This program is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License 
// along with this program; if not, write to the 
// 
// Free Software Foundation, Inc., 
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////

#ifndef AFFX_SNPDATA__INCLUDED_
#define AFFX_SNPDATA__INCLUDED_

/*! \file SnpData.h This file provides capabilities for reading and analyzing SNP data.
 */

//////////////////////////////////////////////////////////////////////

#ifdef WIN32
#pragma warning(disable:4786)
#pragma warning(disable:4503)
#pragma warning(disable:291)
#endif

#include <algorithm>
#include <functional>
#include <fstream>
#include <iostream>
#include <map>
#include <limits>
#include <list>
#include <string>
#include <vector>

//////////////////////////////////////////////////////////////////////

namespace affxsnp
{

//////////////////////////////////////////////////////////////////////

#define R2_EM_MAX_ITERATIONS 1000
#define R2_EM_LIKELIHOOD_CONVERSION_TOLERANCE	1e-8
#define ECDF_DEFAULT_BIN_SIZE 100
#define DEFAULT_SEQ_NAME "NA"

/*! Amount by which to increment snp vector when incrementing - for efficiency reasons */
#define SNP_VECTOR_INCREMENT_BLOCKSIZE 5000

/*! Fudge factor to be added to denominator when determining intial probabilities for R2 calculations */
#define R2_EM_INITIALIZATION_FUDGE_FACTOR 0.1

#define EPSILON std::numeric_limits<double>::epsilon() 
#define slog(x) log((x)+ EPSILON)

//////////////////////////////////////////////////////////////////////

/*! The possible nucleotides. */
typedef enum _NUCLEOTIDE_TYPE
{
	NUCLEOTIDE_N,
	NUCLEOTIDE_A,
	NUCLEOTIDE_C,
	NUCLEOTIDE_G,
	NUCLEOTIDE_T

} NUCLEOTIDE_TYPE;

/*! The possible nucleotides in letter. */
const char szNUCLEOTIDE[5] = {'N','A','C','G','T'};

/*! The possible genders. */
typedef enum _GENDER_TYPE
{
	GENDER_MALE,
	GENDER_FEMALE,
	GENDER_UNKNOWN

} GENDER_TYPE;

/*! The possible affectation status. */
typedef enum _AFFECTATION_TYPE
{
	AFFECTATION_AFFECTED,
	AFFECTATION_UNAFFECTED,
	AFFECTATION_UNKNOWN

} AFFECTATION_TYPE;

/*! Data structure for Transition*/
typedef struct _TRANSITION
{
	char fatherTb;
	char fatherUb;
	bool fatherPhased;
	char motherTb;
	char motherUb;
	bool motherPhased;
	_TRANSITION()
	{
		fatherTb = NUCLEOTIDE_N;
		fatherUb = NUCLEOTIDE_N;
		fatherPhased = false;
		motherTb = NUCLEOTIDE_N;
		motherUb = NUCLEOTIDE_N;
		motherPhased = false;
	}
} TRANSITION;


/*! Data structure for holding SNP panel file info */
typedef struct _PANEL_FILES {
	std::string seq;
	std::string ped;
	std::string info;
} PANEL_FILES;


struct opt {
  int help;
  std::string pedFile1;
  std::string pedFile2;
  std::string infoFile1;
  std::string infoFile2;
  double window;
  double maf1;
  double maf2;
  bool verbose;
  bool skipSelf;
  bool freePass;
};

struct opt2 {
	std::string resultFileBase;
	std::string ecdfFileBase;
	double window;
	double maf1;
	double maf2;
	bool verbose;
	bool skipSelf;
	bool freePass;
	bool AllPairRsq;
	double callRate;
	double callRateHom;
	double callRateHet;
	double errorRateHom;
	double errorRateHet;
};

struct Hopt {  // this structure store the parameters of a multi-marker run
  std::string pedFile;
  std::string infoFile;
  std::string resultFileBase;
  std::string format;
  double window;
  double maf;
  bool verbose;
  bool OneMarker;
  bool TwoMarker;
  bool ThreeMarker;
  bool TwoPanelCoverage;
};


//////////////////////////////////////////////////////////////////////
/*! This class provides storage for a SNP genotype. */
class CGenotype
{
	friend class CSnpPanel;

protected:
	/*! Allele present on first chromosome. */
	char hap1;

	/*! Allele present on second chromosome. */
	char hap2;

	/*! Flag indicating if the phase is known. */
	bool phased;

public:
	/*! Constructor, genotype will be set to NN */
	CGenotype();

	/*! Constructor when genotype is known but phase is not.
	 * @param  h1 The allele on the first chromosome
	 * @param  h2 The allele on the second chromosome
         */
	CGenotype(char h1, char h2);

	/*! Constructor when genotype and phase are known
	 * @param  h1 The allele on the first chromosome
	 * @param  h2 The allele on the second chromosome
	 * @param  p The phase status
         */
	CGenotype(char h1, char h2, bool p);

	/*! Gets the allele on first chromosome
	 * @return The allele on first chromosome
	 */
	char GetHap1() const { return hap1; }

	/*! Gets the allele on second chromosome
	 * @return The allele on second chromosome
	 */
	char GetHap2() const { return hap2; }

	/*! Determines if genotype is phased or not.
	 * @return True if genotype is phased.
	 */
	bool isPhased() const { return phased; }

	/*! Sets the allele on first chromosome.
	 * @param  h The allele on the first chromosome.
	 */
	void SetHap1(char h);

	/*! Sets the allele on second chromosome.
	 * @param  h The allele on the second chromosome.
	 */
	void SetHap2(char h);

	/*! Sets the phased status.
	 * @param p Phasing status.
	 */
	void SetPhased(const bool p) { phased = p; }
};

	class Haplotype
	{
		//friend class CSnpPanel;

		protected:
			/*! Allele present on first chromosome. */
			char hap1;

			/*! Allele present on second chromosome. */
			char hap2;

		public:
			/*! Constructor, genotype will be set to NN */
			Haplotype() {}

			/*! Constructor when genotype is known but phase is not.
			* @param  h1 The allele on the first chromosome
			* @param  h2 The allele on the second chromosome
			*/
			Haplotype(char h1, char h2);

			/*! Gets the allele on first chromosome
			* @return The allele on first chromosome
			*/
			char GetHap1() const { return hap1; }

			/*! Gets the allele on second chromosome
			* @return The allele on second chromosome
			*/
			char GetHap2() const { return hap2; }

			/*! Sets the allele on first chromosome.
			* @param  h The allele on the first chromosome.
			*/
			void SetHap1(char h) { hap1 = h; }

			/*! Sets the allele on second chromosome.
			* @param  h The allele on the second chromosome.
			*/
			void SetHap2(char h) { hap2 = h; }
	};

////////////////////////////////////////////////////////////////////

/*! This class provides storage for a SNP genotype. */
class CSnpData
{
	friend class CSnpPanel;
	friend class CSnpLess;
	friend class CSnpWithin;

protected:
	/*! SNP identifier. */
	std::string snpID;

	/*! Sequence on which SNP is located. */
	std::string seq;

	/*! Position at which SNP is located. */
	double pos;

	/*! Alleles and Minor Allele Frequency */
	char allele1;
	char allele2;
	double maf;

	/*! SNP genotypes. */
	std::vector<CGenotype> geno;

public:
	/*! Constructor */
	CSnpData();

	/*! Constructor
	 * @param  i The snpID.
	 * @param  s The name of the sequence on which the SNP lies.
	 * @param  p The positions at which the SNP is located.
	 */
	CSnpData(std::string i, std::string s, double p);

	/*! Constructor
	 * @param  i The snpID.
	 * @param  s The name of the sequence on which the SNP lies.
	 * @param  p The positions at which the SNP is located.
	 * @param  n The number of samples typed.
	 */
	CSnpData(std::string i, std::string s, double p, int n);

	/*! equal operator, not a real one, only checks the identities of a SNP */
	bool operator == (const CSnpData& rhs)
	{
		return(snpID.compare(rhs.snpID) == 0 && seq.compare(rhs.seq) == 0 && pos == rhs.pos);
	}

	/*! Assignment operator to assign an existing snp to another one*/
	CSnpData& operator = (const CSnpData& rhs)
	{
		snpID = rhs.snpID;
		seq = rhs.seq;
		pos = rhs.pos;
		geno.resize(rhs.geno.size());
		std::copy(rhs.geno.begin(),rhs.geno.end(),geno.begin());
		allele1 = rhs.allele1;
		allele2 = rhs.allele2;
		maf = rhs.maf;
		return *this;
	}

	/*! Prints SNP info to STDOUT
	 */
	void print(void) const;

	/*! Gets the snpID.
	 * @return The snpID.
	 */
	std::string GetSnpID() const { return snpID; }

	/*! Gets the sequence on which the SNP lies.
	 * @return The sequence on which the SNP lies.
	 */
	std::string GetSeq() const { return seq; }

	/*! Gets the position at which the SNP lies.
	 * @return The position of the SNP.
	 */
	double GetPos() const { return pos; }

	/*! Gets the SNP call rate.
	 * @return SNP call rate.
	 */
	double GetCallRate() const;

	/*! Gets the first allele for the SNP.  Will be N if not enough info to determine.
	 * @return The first allele for the SNP.
	 */
	char GetAllele1() const { return allele1; }

	/*! Gets the second allele for the SNP.  Will be N if not enough info to determine.
	 * @return The second allele for the SNP.
	 */
	char GetAllele2() const { return allele2; }

	/*! Gets the minor allele frequency for the SNP.
	 * @return The minor allele frequency.
	 */
	double GetMAF() const { return maf; }

	/*! Gets a const_iterator for the SNP genotypes.
	 * @return const_iterator over SNP genotypes.
	 */
	std::vector<CGenotype> GetGeno() const { return geno; }

	/*! Gets number of samples stored for the SNP
	 * @return Number of samples stored.
	 */
	int GetNsample() const { return (int) geno.size(); }

	/*! Sets the snpID.
	 * @param  i The snpID.
	 */
	void SetSnpID(std::string i) { snpID=i; }

	/*! Sets the name of the sequence.
	 * @param  s The name of the sequence on which the SNP lies.
	 */
	void SetSeq(std::string s) { seq=s; }

	/*! Sets the position of the SNP
	 * @param  p The positions at which the SNP is located.
	 */
	void SetPos(double p) { pos=p; }

	void ReSize (unsigned int p) {geno.resize(p);}

	/*! Sets the genotype for a sample.
	 * @param  i The index of the sample to which the genotype applies.
	 * @param  h1 The haplotype for chr1.
	 * @param  h2 The haplotype for chr2.
	 * @param  p The phasing status of the genotype.
	 */
	void SetGeno(unsigned int i, char h1, char h2, bool p);

	/*! Analyzes genotypes to determine alleles, does some QC on the way to ensure the data is bi-allelic.
	 */
	void SetAlleles();

	/*! Applies a call rate p to the calls by possibly changing each non-NN genotype to NN with probability p.
	 * @param  p  The probabilty of a call becoming NN.
	 */
	void ApplyCallRate(const double& p);

	/*! Applies a call rate p to the het calls by possibly changing each het genotype to NN with probability p.
	 * @param  p  The probabilty of a het call becoming NN.
	 */
	void ApplyCallRateHet(const double& p);

	/*! Applies a call rate p to the hom calls by possibly changing each hom genotype to NN with probability p.
	 * @param  p  The probabilty of a hom call becoming NN.
	 */
	void ApplyCallRateHom(const double& p);

	/*! Applies error rates to the calls by possibly introducing an error to each non-NN genotype.
	 *  For a homozygous genotype if an error is introduced it will be to make the call het.
	 *  For a het genotype if an error is introduced it will be to make the call hom.  A random choice will be made up-front to decide which hom is selected when a het error is introduced, after which all het errors will be in favour of the same homozygote (to better reflect reality).
	 * @param  p_hom_err  The probabilty of an error for a homozygote.
	 * @param  p_het_err  The probabilty of an error for a heterozygote.
	 * @param  allele_bias  The probabilty of a het error being called as the preferred homozygote.  There is an equal chance of the preferred homozygote being each allele.
	 */
	void ApplyErrorRate(double p_hom_err, double p_het_err, double allele_bias);

	void ApplyErrorRate(double p_hom_err, double p_het_err);

	void ApplyErrorRate(double p);

	/*! Applies an error rate p to het calls by possibly introducing an error to each het genotype with probability p.
	 *  If an error is introduced it will be to make the call hom.  A 50:50 random choice will be made up-front to decide which hom is selected when an error is introduced, after which all errors will be in favour of the same homozygote (to better reflect reality).
	 * @param  p  The probabilty of an error.
	 */
	void ApplyErrorRateHet(const double& p, const double& q = 0.5);

	/*! Applies an error rate p to hom calls by possibly introducing an error to each hom genotype with probability p.
	 *  If an error is introduced it will be to make the call het.
	 * @param  p  The probabilty of an error.
	 */
	void ApplyErrorRateHom(const double& p);
};

/*! This class provides storage haplotype for a SNP genotype for all subjects */
class HaplotypeData
{
		friend class CSnpPanel;
		//friend class CSnpLess;
		//friend class CSnpWithin;

		protected:
			/*! SNP identifier. */
			std::string snpID;

			/*! whether the SNP is a target need to be covered*/
			int target;

			/*! whether the SNP is a typed predictor*/
			int predictor;

			/*! Position at which SNP is located. */
			long pos;

			/*! Alleles and Minor Allele Frequency */
			char allele1;
			char allele2;
			double maf;

			///*! SNP haplotype */
			std::vector<Haplotype> haplo;

		public:
			/*! Constructor */
			HaplotypeData() {}

			/*! Constructor
			* @param  i The snpID.
			* @param  p The positions at which the SNP is located.
			*/
			HaplotypeData(std::string i, double p);

			/*! equal operator, not a real one, only checks the identities of a SNP */
			bool operator == (const HaplotypeData& rhs)
			{
				return(snpID.compare(rhs.snpID) == 0 && pos == rhs.pos);
			}

			/*! Assignment operator to assign an existing snp to another one*/
			HaplotypeData& operator = (const HaplotypeData& rhs)
			{
				snpID = rhs.snpID;
				pos = rhs.pos;
				haplo.resize(rhs.haplo.size());
				std::copy(rhs.haplo.begin(),rhs.haplo.end(),haplo.begin());
				allele1 = rhs.allele1;
				allele2 = rhs.allele2;
				target = rhs.target;
				predictor = rhs.predictor;
				maf = rhs.maf;
				return *this;
			}

			/*! Prints SNP info to STDOUT
			*/
			void print(void) const;

			/*! Gets the snpID.
			* @return The snpID.
			*/
			std::string GetSnpID() const { return snpID; }

			int GetTarget () const { return target; }
			void SetTarget (int t) { target = t; }

			int GetPredictor () const { return predictor; }
			void SetPredictor (int t) { predictor = t; }

			/*! Gets the position at which the SNP lies.
			* @return The position of the SNP.
			*/
			long GetPos() const { return pos; }

			/*! Gets the first allele for the SNP.  Will be N if not enough info to determine.
			* @return The first allele for the SNP.
			*/
			char GetAllele1() const { return allele1; }

			/*! Gets the second allele for the SNP.  Will be N if not enough info to determine.
			* @return The second allele for the SNP.
			*/
			char GetAllele2() const { return allele2; }
			
			/*! Gets the minor allele frequency for the SNP.
			* @return The minor allele frequency.
			*/
			double GetMAF() const { return maf; }

			/*! Gets a const_iterator for the SNP haplotypes.
			 * @return const_iterator over SNP haplotypes.
			 */
			const std::vector<Haplotype>& GetHaplo() const { return haplo; }

			/*! Gets number of samples stored for the SNP
			 * @return Number of samples stored.
			 */
			int GetNsample() const { return (int)haplo.size(); }

			/*! Sets the snpID.
			 * @param  i The snpID.
			*/
			void SetSnpID(std::string i) { snpID=i; }

			/*! Sets the position of the SNP
			* @param  p The positions at which the SNP is located.
			*/
			void SetPos(long p) { pos=p; }

			/*! Calculate the SNP maf.
			* @return SNP maf.
			*/
			//void SetMaf();

			void ReSize (unsigned int p) {haplo.resize(p);}

			/*! Sets the genotype for a sample.
			* @param  i The index of the sample to which the genotype applies.
			* @param  h1 The haplotype for chr1.
			* @param  h2 The haplotype for chr2.
			*/
			void SetHaplo(unsigned int i, char h1, char h2);

			/*! Analyzes genotypes to determine alleles, does some QC on the way to ensure the data is bi-allelic.
			 */
			void SetAlleles();

			/*! Applies a call rate p to the calls by possibly changing each non-NN genotype to NN with probability p.
			 * @param  p  The probabilty of a call becoming NN.
			 */
			void ApplyCallRate(const double& p);

			/*! Applies a call rate p to the het calls by possibly changing each het genotype to NN with probability p.
			 * @param  p  The probabilty of a het call becoming NN.
			 */
			void ApplyCallRateHet(const double& p);

			/*! Applies a call rate p to the hom calls by possibly changing each hom genotype to NN with probability p.
			 * @param  p  The probabilty of a hom call becoming NN.
			 */
			void ApplyCallRateHom(const double& p);
	};


class CSnpPair
{
protected:
	std::string snpID1,snpID2;
	std::string seq1,seq2;
	double pos1,pos2,r2;
public:
	/*! Constructor
	 */
	CSnpPair()
	{
		snpID1 = "";
		snpID2 = "";
		seq1 = "";
		seq2 = "";
		pos1 = -1;
		pos2 = -1;
		r2 = -1;
	};

	/*! Assignment operator
	 */
	CSnpPair& operator = (const CSnpPair& rhs)
	{
		snpID1 = rhs.snpID1;
		snpID2 = rhs.snpID2;
		seq1 = rhs.seq1;
		seq2 = rhs.seq2;
		pos1 = rhs.pos1;
		pos2 = rhs.pos2;
		r2 = rhs.r2;
		return *this;
	};

	/*! Returns snpID1
	 * @return snpID1
	 */
	std::string GetSnpID1() const { return snpID1; }

	/*! Returns snpID2
	 * @return snpID2
	 */
	std::string GetSnpID2() const { return snpID2; }

	/*! Returns seq1
	 * @return seq1
	 */
	std::string GetSeq1() const { return seq1; }

	/*! Returns seq2
	 * @return seq2
	 */
	std::string GetSeq2() const { return seq2; }

	/*! Returns pos1
	 * @return pos1
	 */
	double GetPos1() const { return pos1; }

	/*! Returns pos2
	 * @return pos2
	 */
	double GetPos2() const { return pos2; }

	/*! Returns r2
	 * @return r2
	 */
	double GetR2() const { return r2; }

	/*! Sets snpID1
	 * @param snpID1
	 */
	void SetSnpID1(std::string x) { snpID1 = x; }

	/*! Sets snpID2
	 * @param snpID2
	 */
	void SetSnpID2(std::string x) { snpID2 = x; }

	/*! Sets seq1
	 * @param seq1
	 */
	void SetSeq1(std::string x) { seq1 = x; }

	/*! Sets seq2
	 * @param seq2
	 */
	void SetSeq2(std::string x) { seq2 = x; }

	/*! Sets pos1
	 * @param pos1
	 */
	void SetPos1(double x) { pos1 = x; }

	/*! Sets pos2
	 * @param pos2
	 */
	void SetPos2(double x) { pos2 = x; }

	/*! Sets r2
	 * @param r2
	 */
	void SetR2(double x) { r2 = x; }
};

class CSnpInfo
{
protected:
	std::string seq;
	double pos;
	std::string snpID;
public:
	/*! Constructor
	 * @param  i The snpID.
	 * @param  s The name of the sequence on which the SNP lies.
	 * @param  p The positions at which the SNP is located.
	 */
	CSnpInfo(std::string i, std::string s, double p);

	/*! StrictWeakOrdering so we can use this as a key in a map. */
	struct ltSnpInfo {
		bool operator()(const CSnpInfo& s1, const CSnpInfo& s2) const {
			if(s1.seq < s2.seq) {
				return true;
			} else if(s1.seq > s2.seq) {
				return false;
			} else if(s1.pos < (s2.pos-EPSILON)) {
				return true;
			} else if(s1.pos > (s2.pos+EPSILON)) {
				return false;
			} else if(s1.snpID < s2.snpID) {
				return true;
			} else if(s1.snpID > s2.snpID) {
				return false;
			} else {
				return false;
			}
		}
	};
};

////////////////////////////////////////////////////////////////////
/*! some typedefs for convenience */
typedef std::vector<CSnpData>::const_iterator vSnpIt;
typedef std::vector<CSnpData>::iterator vSnpItNC;
typedef std::pair<vSnpIt,vSnpIt> PairedSnpIt;

////////////////////////////////////////////////////////////////////
/*! This structure provides method of ordering CSnpData. */
class CSnpLess : public std::binary_function<CSnpData,CSnpData,bool>
{
	/*! Order SNPs according to the following rules. 
	*	1. Comparing sequence first, ordering by chromosome
	*	2. Comparing position second, ordering by location
	*	3. Comparing SnpId third, this is just for consistency
	*/
public:
	bool operator () (const CSnpData& x, const CSnpData& y) const
	{
		if(x.seq.compare(y.seq) < 0) return true;
		else if(x.seq.compare(y.seq) > 0) return false;
		else if(x.pos != y.pos) return (x.pos < y.pos);
		else return (x.snpID <= y.snpID);
	}
};

class CHaploLess : public std::binary_function<HaplotypeData,HaplotypeData,bool>
{
	/*! Order SNPs according to the following rules. 
	*	1. Comparing position second, ordering by location
	*	2. Comparing SnpId third, this is just for consistency
	*/
public:
	bool operator () (const HaplotypeData& x, const HaplotypeData& y) const
	{
		if(x.GetPos() != y.GetPos()) return (x.GetPos() < y.GetPos());
		else return (x.GetSnpID() <= y.GetSnpID());
	}
};

////////////////////////////////////////////////////////////////////
/*! This structure provides method of determining a SNP is
*	within certain distance of another snp or not 
*/
class CSnpWithin : public std::binary_function<CSnpData,CSnpData,bool>
{
public:
	bool operator () (const CSnpData& x, const CSnpData& y) const
	{
		// don't get any snp from another chromosome
		if(x.seq.compare(y.seq) != 0) return false;

		return (x.pos < y.pos + width && x.pos > y.pos - width); 
	}

	/*! Makes the window size adjustable globally,
	*	which will be applied to all panels
	*/
	static void SetWindowWidth(const double& windowidth) {width = windowidth; }

private:
	static double width;
};

////////////////////////////////////////////////////////////////////

/*! This class provides storage for sample info. */
class CSampleData
{
	friend class CSnpPanel;

protected:
	/*! Identifier for sample */
	std::string sampleID;

	/*! Identifier for family to which sample belongs. */
	std::string familyID;

	/*! Identifier for father of sample. */
	std::string fatherID;

	/*! Identifier for mother of sample. */
	std::string motherID;

	/*! Gender of sample. */
	char gender;

	/*! Affectedness status of sample. */
	char affected;

	/*! Storage index for sample. */
	int storage_index;

public:
	/*! Constructor
	 * @param  s The sampleID.
	 * @param  p The familyID.
	 * @param  f The fatherID.
	 * @param  m The motherID.
	 * @param  g The gender.
	 * @param  a The affectation status.
	 * @param  i The storage index.
	 */
	CSampleData(std::string s, std::string p, std::string f, std::string m, char g, char a, int i);

	/*! Gets the sampleID.
	 * @return sampleID.
	 */
	std::string GetSampleID() const { return sampleID; }

	/*! Gets the familyID.
	 * @return familyID.
	 */
	std::string GetFamilyID() const { return familyID; }

	/*! Gets the fatherID.
	 * @return fatherID.
	 */
	std::string GetFatherID() const { return fatherID; }

	/*! Gets the motherID.
	 * @return motherID.
	 */
	std::string GetMotherID() const { return motherID; }

	/*! Gets the sample gender.
	 * @return The gender.
	 */
	double GetGender() const { return gender; }

	/*! Gets the sample affectation status.
	 * @return The affectation status.
	 */
	double GetAffectation() const { return affected; }

	/*! Gets the storage index.
	 * @return The storage index.
	 */
	int GetStorageIndex() const { return storage_index; }

};

////////////////////////////////////////////////////////////////////
/*! Define parent type for convenience */
typedef std::pair<std::string,std::string> PairedString;
typedef std::map <PairedString, CSampleData>::const_iterator mapSampleIt;

/*!	Parent holds pointers to both father and mother */
typedef std::pair<mapSampleIt, mapSampleIt> Parent;
/*!	Family holds pointers to all family members */
typedef std::vector<mapSampleIt> Family;
/*!	ParentIndex holds storage indices of both father and mother*/
typedef std::pair<int,int> ParentIndex;

////////////////////////////////////////////////////////////////////

/*! This class provides storage for a panel of SNPs across a set of samples.*/
class CSnpPanel
{
//protected:
public:
	/*! A map, indexed by sampleID, of all samples in the panel. */
	std::map <std::pair<std::string,std::string>, CSampleData> sample;

	/*! A vector of all SNPs in the panel. Sorted with respect to (sequence name, SNP position, snpID) */
	std::vector <CSnpData> vsnp;

	/*! A vector of all SNPs in the panel. Sorted with respect to (sequence name, SNP position, snpID) */
	std::vector <HaplotypeData> vhaplo;

public:
	/* ! the default constructor */
	CSnpPanel();

	/* ! the default constructor */
	//CSnpPanel() {}

	/*! Initializes the object with SNP data from a .ped and .info file pair.
	 * @param  pedFile A full path to a .ped (linkage format) file of sample and SNP data.
	 * @param  infoFile A full path to a .info file with auxilliary SNP data.
	 */
	void Read(std::string panelFile);
	void Read(std::string panelFile, bool verbose);
	void Read(std::string pedFile, std::string infoFile);
	void Read(std::string pedFile, std::string infoFile, std::string seqName);
	void ReadPedFile(std::string pedFile);
	void ReadInfoFile(std::string infoFile, std::string seqName);

	// file reading subroutines for multiple marker mode
	void BroadRead(std::string pedFile, std::string infoFile);
	void ReadBroadPedFile(std::string pedFile);
	void ReadBroadInfoFile(std::string infoFile);

	void OxfordRead(std::string pedFile, std::string infoFile);
	void ReadOxfordPedFile(std::string pedFile);
	void ReadOxfordInfoFile(std::string infoFile);

	/*! Add to the object with additional SNP data from a .ped and .info file pair.
	 * @param  pedFile A full path to a .ped (linkage format) file of sample and SNP data.
	 * @param  infoFile A full path to a .info file with auxilliary SNP data.
	 * @return An error code for possible errors in reading and adding samples and SNPs, 0 if no error
	 */
	long Add(std::string pedFile, std::string infoFile, std::string seqName);
	long AddSNPs(std::string pedFile, std::string infoFile, int& nMoreSnps, std::string seqName);
	long AddPedInfo(std::string pedFile, const int& start);

	/*! Process the SNP panel to identify trios and phase parents from child */
	void PhaseTrios();
	void SortTest();
	
	/*! Filters out SNPs
	 * @param dropIndex Reference to vector of ints which are indices of SNPs to drop.
	 * @return Number of SNPs dropped.
	 */
	int dropSNPs(std::vector<int> &dropIndex);

	/*! Filters out SNPs with low MAF from the panel
	 * @param mafThreshold The MAF below which to reject SNPs.
	 * @param highMaf Reference to vector of ints which will hold indices of SNPs with MAF greater than or equal to than the threshold.
	 * @param lowMaf Reference to vector of ints which will hold indices of SNPs with MAF less than the threshold.
	 * @return The number of SNPs filtered.
	 */
	int highMAF(double mafThreshold, std::vector<int> &highMAF, std::vector<int> &lowMAF);

	/*! Process the SNP to identify trios and phase parents from child */
	void PhaseTrios(vSnpItNC& it);
	void PhaseTrios(CSnpData& snp);

	/*! Phasing a family
	*	@param child1,child2,father1,father2,mother1,mother2 are types for child,father and mother respectively
	*	@param res, a vector of 4, holds the Tb,Ub for both father and mother
	*	@return Which parent is phased, 0 for none, 1 for father, 2 for mother, 3 for both father and mother
	*/
	void PhasingFamily(const char child1,const char child2,const char father1,const char father2,const char mother1,const char mother2,TRANSITION& res);
	
	/*! Helping functions to traverse the sample vector to get individual samples*/
	Family GetFamily(const std::string strChildID) const;
	Parent GetParent(const mapSampleIt child) const;
	ParentIndex GetParentIndex(const mapSampleIt child) const;
	ParentIndex GetParentIndex(const Parent parent) const;
	CSampleData GetSampleByIndex(int i);

	/*! Gets all neighboring SNPs of a given SNP within a given distance.
	 * @param ref Reference to a SNP around which to find other SNPs.
	 * @param windowwidth The distance to search in each direction for SNPs.
	 * @param pit Reference to a pair of iterators which will be set to the lower and upper bound of the found SNPs.
	 *	The lower bound will point to the first SNP, the upper bound will point to one after the last SNP.
	 * @return Returns true if at least one SNP is found, otherwise returns false. 
	 */
	bool GetSNPsNearTo(const CSnpData& ref, const double& windowwidth, PairedSnpIt& pit);
	bool GetSNPsNearTo(const CSnpData& ref, const double& leftWindowSize, const double& rightWindowSize, PairedSnpIt& pit);
	bool GetSNPsNearTo(const CSnpData& ref, const double& leftWindowSize, const double& rightWindowSize, PairedSnpIt& pit, bool verbose);

	/*! Gets the last SNP before a particular sequence and position.
	 * @param s The sequence to search.
	 * @param p The position in the sequence.
	 * @return An iterator set to the requested SNP, or NULL if none found.
	 */
	std::vector<CSnpData>::const_iterator GetLastSnpBefore(std::vector<CSnpData>::const_iterator it);

	/*! Gets the first SNP after a particular sequence and position.
	 * @param s The sequence to search.
	 * @param p The position in the sequence.
	 * @return An iterator set to the requested SNP, or NULL if none found.
	 */
	std::vector<CSnpData>::const_iterator GetFirstSnpAfter(std::vector<CSnpData>::const_iterator it);

	/*! Gets the SNP at index in the panel.
	 * @return The SNP at the index.
	 * Caller is responsible to check bounds for the index
	 */
	CSnpData GetSnp(const int index) const {return vsnp[index];}
	HaplotypeData GetHaplotype(const int index) const {return vhaplo[index];}

	/*! Gets the first SNP in the panel.
	 * @return An iterator set to the requested SNP, or NULL if none found.
	 */
	std::vector<CSnpData>::const_iterator GetFirstSnp() const {return vsnp.begin(); }

	/*! Gets the number of SNPs in the panel.
	 * @return Number of SNPs in the panel.
	 */
	unsigned int GetNumberSnps() const {
		if (vsnp.size()>0){
			return vsnp.size(); 
		}
		else{
			return vhaplo.size();
		}
	}

	/*! Gets the first sample in the panel.
	 * @return An iterator set to the first sample in the panel.
	 */
	std::map <std::pair<std::string,std::string>, CSampleData> ::const_iterator GetFirstSample() const {return sample.begin(); }

	/*! Gets the number of samples in the panel.
	 * @return Number of samples in the panel.
	 */
	unsigned int GetNumberSamples() const {return sample.size(); }

	/*! Identify unrelated samples.
	 * @param Ref to vector of (index,sampleID) pairs representing indices of samples for which neither parent is included in the panel.
	 */
	void GetUnrelatedSampleIndex(std::vector <std::pair <unsigned int, std::string> >& u);
};



////////////////////////////////////////////////////////////////////
/*! some typedefs for convenience */
typedef std::vector<std::pair<double,double> > vPairDouble;
typedef std::vector<double> vDouble;

////////////////////////////////////////////////////////////////////

/*! Compute pairwise r^2 between all SNPs within distance windowwidth in two SNP panels
*	@param refPanel The reference SNP panel
*	@param testPanel The test SNP panel
*	@param windowwidth The window size to search SNPs from test panel
*	@param skipSelf If true then we skip computation of r2 between markers mapping to the same position.
*	@param freePass If true then assume r2 is 1 for markers mapping to the same position.
*	@param result Ref to vector of SNP_PAIR structures which will hold results
*/
void AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass,
	std::vector<CSnpPair>& result,
	const bool storeResult,
	const bool printResult
);
void AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass
);
void AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass,
	std::vector<CSnpPair>& result
);


/*! Process LD between two panels, calculate R^2 SNP by SNP
*	@param refPanel The reference panel the R^2s are calculated upon
*	@param testPanel The test panel the R^2s are calculated from
*	@param windowwidth The window size to search SNPs from test panel
*	@param vResults The result holder for all R^2s // temp, 
*	@param vRsCdf The input and output of reversed CDF of Rs, keep the number of counts for the time being // temp
*	@return The error code if there is any
*/

/*
int ProcessLDBetweenPanels(
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf
);
int ProcessLDBetweenPanels(
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	bool skipSelf,
	bool freePass,
	bool AllPairRsq
);
int ProcessLDBetweenPanels( //this function record the largest r^2 for each ref SNP, and output CDF
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	const double callRate,
	const double callRateHom,
	const double callRateHet,
	const double errorRateHom,
	const double errorRateHet,
	const bool skipSelf,
	const bool freePass
);
int ProcessLDBetweenPanels( //this function record all pairwise r^2 
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	const double callRate,
	const double callRateHom,
	const double callRateHet,
	const double errorRateHom,
	const double errorRateHet,
	const bool skipSelf,
	const bool freePass,
	bool AllPairRsq
);
*/

// the current defination
int ProcessLDBetweenPanels(
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	opt2 o,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	std::string vname
);




/*! Computes r2 between two markers for all samples
 * @param  m1 The first marker (of type vSnpIt)
 * @param  m2 The second marker (of type vSnpIt)
 * @return The value of r2 between the two SNPs.
 */
double CalculateRS(const CSnpData& m1, const CSnpData& m2);

/*! Computes r2 between two markers on a subset of samples.
 * @param  m1 The first marker (of type vSnpIt)
 * @param  m2 The second marker (of type vSnpIt)
 * @param  sampleIndex Vector of (index,sampleID) pairs specifying samples to be used in calculation.
 * @return The value of r2 between the two SNPs.
 */
double CalculateRS(const CSnpData& m1, const CSnpData& m2, std::vector <std::pair <unsigned int, std::string> >& sampleIndex);
double CalculateRS(const CSnpData& m1, const CSnpData& m2, std::vector <std::pair <unsigned int, std::string> >& sampleIndex, bool verbose);

//////////////////////////////////////////////////////////////////////
} // namespace
//////////////////////////////////////////////////////////////////////

#endif // !defined(AFFX_SNPDATA__INCLUDED_)

