## ----pipeline, eval=FALSE----------------------------------------------------- # rgSet <- readidat(datapath) # beta=mpreprocess(rgSet) ## ----example1, eval=FALSE----------------------------------------------------- # library(ENmix) # #read in data # require(minfiData) # #read in IDAT files # path <- file.path(find.package("minfiData"),"extdata") # rgSet <- readidat(path = path,recursive = TRUE) # #data pre-processing # beta=mpreprocess(rgSet,nCores=6) # #quality control, data pre-processing and imputation # beta=mpreprocess(rgSet,nCores=6,qc=TRUE,fqcfilter=TRUE, # rmcr=TRUE,impute=TRUE) # # #CpG information (from Illumina manifest file) is self-contained in rgDataSet or methDataSet # cginfo=getCGinfo(rgSet) # #It has the same infomation if extracted from methDataSet # meth=getmeth(rgSet) # cginfo1=rowData(meth) # #Probe information for internal controls # ictrl=getCGinfo(rgSet,type="ctrl") ## ----example2, eval=FALSE----------------------------------------------------- # library(ENmix) # #read in data # path <- file.path(find.package("minfiData"),"extdata") # rgSet <- readidat(path = path,recursive = TRUE) # #QC info # qc<-QCinfo(rgSet) # #background correction and dye bias correction # #if qc info object is provided, the low quality or outlier samples # # and probes will be excluded before background correction # mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", # QCinfo=qc, nCores=6) # #between-array normalization # mdat<-norm.quantile(mdat, method="quantile1") # #probe-type bias adjustment # beta<-rcp(mdat,qcscore=qc) # #set low quality and outlier data points as NA (missing value) # #remove samples and probes with too many (eg. >5%) missing data # #perform imputation to replace missing values # beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE,impute=TRUE) # #remove suffix and combine duplicated CpGs with rm.cgsuffix(), for EPIC v2 # #beta2=rm.cgsuffix(beta) ## ----example3, eval=FALSE----------------------------------------------------- # library(ENmix) # #read in data # path <- file.path(find.package("minfiData"),"extdata") # rgSet <- readidat(path = path,recursive = TRUE) # # #attach some phenotype info for later use # sheet <- read.metharray.sheet(file.path(find.package("minfiData"), # "extdata"),pattern = "csv$") # rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_") # colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame") # # #generate internal control plots to inspect quality of experiments # plotCtrl(rgSet) # # #generate quality control (QC) information and plots, # #identify outlier samples in data distribution # #if set detPtype="negative", recommend to set depPthre to <= 10E-6 # #if set detPtype="oob", depPthre of 0.01 or 0.05 may be sufficient # #see Heiss et al. PMID: 30678737 for details # qc<-QCinfo(rgSet,detPtype="negative",detPthre=0.000001) # # ###data preprocessing # #background correction and dye bias correction # mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", # QCinfo=qc, nCores=6) # #between-array normalization # mdat<-norm.quantile(mdat, method="quantile1") # #attach phenotype data for later use # colData(mdat)=as(sheet[colnames(mdat),],"DataFrame") # #probe-type bias adjustment # beta<-rcp(mdat,qcscore=qc) # # #Search for multimodal CpGs, so called gap probes # #beta matrix before qcfilter() step should be used for this purpose # nmode<-nmode(beta, minN = 3, modedist=0.2, nCores = 6) # # #filter low quality and outlier data points for each probe # #rows and columns with too many missing values will be removed if specify # #Imputation will be performed to fill missing data if specify. # beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE, rthre=0.05, # cthre=0.05,impute=TRUE) # # #If for epigenetic mutation analysis, outliers should be kept # beta <- qcfilter(beta,qcscore=qc,rmoutlier=FALSE,rmcr=TRUE, rthre=0.05, # cthre=0.05,impute=FALSE) # # #remove suffix and combine duplicated CpGs with rm.cgsuffix(), for EPIC v2 # #beta2=rm.cgsuffix(beta) # # ##Principal component regression analysis plot to check data variance structure # #phenotypes to be studied in the plot # cov<-data.frame(group=colData(mdat)$Sample_Group, # slide=factor(colData(mdat)$Slide)) # #missing data is not allowed in the analysis! # pcrplot(beta, cov, npc=6) # # #Non-negative control surrogate variables, which can be used in # # downstream association analysis to control for batch effects. # csva<-ctrlsva(rgSet) # # #estimate cell type proportions # #rgDataset or methDataSet can also be used for the estimation # celltype=estimateCellProp(userdata=beta, # refdata="FlowSorted.Blood.450k", # nonnegative = TRUE,normalize=TRUE) # # #predic sex based on rgDataSet or methDataset # sex=predSex(rgSet) # sex=predSex(mdat) # # #Methylation age calculation # mage=methyAge(beta) # # #ICC analysis can be performed to study measurement reliability if # # have duplicates, see manual # #dupicc() # # #After association analysis, the P values can be used for DMR analysis # #simulate a small example file in BED format # dat=simubed() # #using ipdmr method, low seed value (0.01 or 0.05) should be used in real study. # ipdmr(data=dat,seed=0.1) # #using comb-p method # combp(data=dat,seed=0.1) ## ----readdata, eval=FALSE----------------------------------------------------- # library(ENmix) # rgSet <- readidat(path = "directory path for idat files", # recursive = TRUE) # # #Download manifestfile for HumanMethylation450 beadchip # system("wget https://webdata.illumina.com/downloads/productfiles/humanmethylation450/humanmethylation450_15017482_v1-2.csv") # mf="HumanMethylation450_15017482_v1-2.csv" # rgSet <- readidat(path = "path to idat files",manifestfile=mf,recursive = TRUE) ## ----readdata2, eval=FALSE---------------------------------------------------- # M<-matrix_for_methylated_intensity # U<-matrix_for_unmethylated_intensity # pheno<-as.data.frame(cbind(colnames(M), colnames(M))) # names(pheno)<-c("Basename","filenames") # rownames(pheno)<-pheno$Basename # pheno<-AnnotatedDataFrame(data=pheno) # anno<-c("IlluminaHumanMethylation450k", "ilmn12.hg19") # names(anno)<-c("array", "annotation") # mdat<-MethylSet(Meth = M, Unmeth = U, annotation=anno, # phenoData=pheno) ## ----ctrlplot, eval=FALSE----------------------------------------------------- # plotCtrl(rgSet) ## ----ctrlplot2, eval=FALSE---------------------------------------------------- # # path <- file.path(find.package("minfiData"),"extdata") # rgSet <- readidat(path = path,recursive = TRUE) # sheet <- read.metharray.sheet(file.path(find.package("minfiData"), # "extdata"), pattern = "csv$") # rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_") # colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame") # #control plots # IDorder=rownames(colData(rgSet))[order(colData(rgSet)$Slide, # colData(rgSet)$Array)] # plotCtrl(rgSet,IDorder) ## ----distplot, eval=FALSE----------------------------------------------------- # # mraw <- getmeth(rgSet) # #total intensity plot is userful for data quality inspection # #and identification of outlier samples # multifreqpoly(assays(mraw)$Meth+assays(mraw)$Unmeth, # xlab="Total intensity") # #Compare frequency polygon plot and density plot # beta<-getB(mraw) # anno=rowData(mraw) # beta1=beta[anno$Infinium_Design_Type=="I",] # beta2=beta[anno$Infinium_Design_Type=="II",] # library(geneplotter) # jpeg("dist.jpg",height=900,width=600) # par(mfrow=c(3,2)) # multidensity(beta,main="Multidensity") # multifreqpoly(beta,main="Multifreqpoly",xlab="Beta value") # multidensity(beta1,main="Multidensity: Infinium I") # multifreqpoly(beta1,main="Multifreqpoly: Infinium I", # xlab="Beta value") # multidensity(beta2,main="Multidensity: Infinium II") # multifreqpoly(beta2,main="Multifreqpoly: Infinium II", # xlab="Beta value") # dev.off() ## ----filter, eval=FALSE------------------------------------------------------- # qc<-QCinfo(rgSet) # #QCinfo returns "detP","nbead","bisul","badsample","badCpG","outlier_sample" # #Samples with low quality data # qc$badsample # #CpGs with low quality data # qc$badCpG # #outlier samples # qc$outlier_sample # # #Low quality samples and probes should be excluded before data preprocesssing # #by specifying `QCinfo` in `preprocessENmix()` # mdat<-preprocessENmix(rgSet, QCinfo=qc, nCores=6) ## ----rmoutlier, eval=FALSE---------------------------------------------------- # #filter outlier values only # b1=qcfilter(beta) # #filter low quality and outlier values # b2=qcfilter(beta,qcscore=qc) # #filter low quality and outlier values, remove rows # #and columns with too many missing values # b3=qcfilter(beta,qcscore=qc,rmcr=TRUE) # #filter low quality and outlier values, remove rows and # #columns with too many (rthre=0.05,cthre=0.05, 5% in default) missing values, # # and then do imputation # b3=qcfilter(beta,qcscore=qc,rmcr=TRUE,rthre=0.05, # cthre=0.05,impute=TRUE) ## ----preprocessENmix, eval=FALSE---------------------------------------------- # qc=QCinfo(rgSet) # mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", # QCinfo=qc, exCpG=NULL, nCores=6) ## ----normalize.quantile, eval=FALSE------------------------------------------- # mdat<-norm.quantile(mdat, method="quantile1") ## ----rcp, eval=FALSE---------------------------------------------------------- # beta<-rcp(mdat) ## ----ctrlsva, eval=FALSE------------------------------------------------------ # sva<-ctrlsva(rgSet) ## ----pcrplot, eval=FALSE------------------------------------------------------ # require(minfiData) # mdat <- preprocessRaw(RGsetEx) # beta=getBeta(mdat, "Illumina") # group=pData(mdat)$Sample_Group # slide=factor(pData(mdat)$Slide) # cov=data.frame(group,slide) # pcrplot(beta,cov,npc=6) ## ----nmode, eval=FALSE-------------------------------------------------------- # nmode<- nmode(beta, minN = 3, modedist=0.2, nCores = 5) ## ----celltype, eval=FALSE----------------------------------------------------- # require(minfidata) # path <- file.path(find.package("minfiData"),"extdata") # #based on rgDataset # rgSet <- readidat(path = path,recursive = TRUE) # celltype=estimateCellProp(userdata=rgSet, # refdata="FlowSorted.Blood.450k", # nonnegative = TRUE,normalize=TRUE) ## ----mage, eval=FALSE--------------------------------------------------------- # require(minfidata) # path <- file.path(find.package("minfiData"),"extdata") # #based on rgDataset # rgSet <- readidat(path = path,recursive = TRUE) # meth=getmeth(rgSet) # beta=getB(meth) # mage=methyAge(beta) ## ----dmr, eval=FALSE---------------------------------------------------------- # dat=simubed() # names(dat) # ipdmr(data=dat,seed=0.1) # combp(data=dat,seed=0.1) ## ----icc, eval=FALSE---------------------------------------------------------- # require(minfiData) # path <- file.path(find.package("minfiData"),"extdata") # rgSet <- readidat(path = path,recursive = TRUE) # mdat=getmeth(rgSet) # beta=getB(mdat,"Illumina") # #assuming list in id1 are corresponding duplicates of id2 # dupidx=data.frame(id1=c("5723646052_R02C02","5723646052_R04C01", # "5723646052_R05C02"), # id2=c("5723646053_R04C02","5723646053_R05C02", # "5723646053_R06C02")) # iccresu<-dupicc(dat=beta,dupid=dupidx) ## ----oxbs, eval=FALSE--------------------------------------------------------- # load(system.file("oxBS.MLE.RData",package="ENmix")) # resu<-oxBS.MLE(beta.BS,beta.oxBS,N.BS,N.oxBS) # dim(resu[["5mC"]]) # dim(resu[["5hmC"]]) ## ----ENmixAndminfi, eval=FALSE------------------------------------------------ # library(ENmix) # #minfi functions to read in data # sheet <- read.metharray.sheet(file.path(find.package("minfiData"), # "extdata"), pattern = "csv$") # rgSet <- read.metharray.exp(targets = sheet, extended = TRUE) # #ENmix function for control plot # plotCtrl(rgSet) # #minfi functions to extract methylation and annotation data # mraw <- preprocessRaw(rgSet) # beta<-getB(mraw, "Illumina") # anno=getAnnotation(rgSet) # #ENmix function for fast and accurate distribution plot # multifreqpoly(beta,main="Data distribution") # multifreqpoly(beta[anno$Type=="I",],main="Data distribution, type I") # multifreqpoly(beta[anno$Type=="II",],main="Data distribution, type II") # #ENmix background correction # mset<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", nCores=6) # #minfi functions for further preprocessing and analysis # gmSet <- preprocessQuantile(mset) # bumps <- bumphunter(gmSet, design = model.matrix(~ gmSet$status), B = 0, # type = "Beta", cutoff = 0.25) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()