## ----------------------------------------------------------------------------- # Clear workspace and load ASAFE rm(list=ls()) library(ASAFE) # adm_ancestries_test is a matrix with # Rows: Markers # Columns: Marker ID, individuals' chromosomes' ancestries # (e.g. ADM1, ADM1, ADM2, ADM2, and etc.) # adm_genotypes_test is a matrix with # Rows: Markers # Columns: Marker ID, individuals' genotypes (a1/a2) # (e.g. ADM1, ADM2, ADM3, and etc.) # Making the rsID column row names row.names(adm_ancestries_test) <- adm_ancestries_test[,1] row.names(adm_genotypes_test) <- adm_genotypes_test[,1] adm_ancestries_test <- adm_ancestries_test[,-1] adm_genotypes_test <- adm_genotypes_test[,-1] # alleles_list is a list of lists. # Outer list elements correspond to SNPs. # Inner list elements correspond to 250 people's alleles # with no delimiter separating alleles. alleles_list <- apply(X = adm_genotypes_test, MARGIN = 1, FUN = strsplit, split = "/") # Creates a matrix: # Alleles for chromosomes (ADM1, ADM1, ..., ADM250, ADM250) x (SNPs) alleles_unlisted <- sapply(alleles_list, unlist) # Change elements of the matrix to numeric alleles <- apply(X = alleles_unlisted, MARGIN = 2, as.numeric) # Apply the EM algorithm to each SNP to obtain # ancestry-specific allele frequency estimates for all SNPs in # matrices alleles and adm_ancestries_test. # # Columns correspond to markers. # Rows correspond to ancestries 0, 1, and then 2. # Entries in rows 2 through 4 # give P(Allele 1 | Ancestry a), a = 0, 1, or 2 for a marker. adm_estimates_test <- sapply(X = 1:ncol(alleles), FUN = algorithm_1snp_wrapper, alleles = alleles, ancestries = adm_ancestries_test) adm_estimates_test