## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, dev = "png", message = FALSE, error = FALSE, warning = TRUE) ## ----introduction_workflow---------------------------------------------------- library(GenomicRanges) library(epigraHMM) library(SummarizedExperiment) # Creating input for epigraHMM bamFiles <- system.file(package="chromstaRData","extdata","euratrans", c("lv-H3K27me3-BN-male-bio2-tech1.bam", "lv-H3K27me3-BN-male-bio2-tech2.bam")) colData <- data.frame(condition = c('BN','BN'),replicate = c(1,2)) # Creating epigraHMM object object <- epigraHMMDataSetFromBam(bamFiles = bamFiles, colData = colData, genome = 'rn4', windowSize = 1000, gapTrack = TRUE) # Creating normalizing offsets object <- normalizeCounts(object = object, control = controlEM()) # Initializing epigraHMM object <- initializer(object = object, control = controlEM()) # Running epigraHMM for consensus peak detection object <- epigraHMM(object = object, control = controlEM(), type = 'consensus') # Calling peaks with FDR control of 0.05 peaks <- callPeaks(object = object, control = controlEM(), method = 0.05) ## ----data_input_countmatrixinput---------------------------------------------- countData <- list('counts' = matrix(rpois(4e5,10),ncol = 4), 'controls' = matrix(rpois(4e5,5),ncol = 4)) colData <- data.frame(condition = c('A','A','B','B'), replicate = c(1,2,1,2)) rowRanges <- GRanges('chrA',IRanges(start = seq(1,by = 500, length.out = 1e5), width = 500)) object_matrix <- epigraHMMDataSetFromMatrix(countData = countData, colData = colData, rowRanges = rowRanges) object_matrix ## ----data_input_baminput------------------------------------------------------ # Creating input for epigraHMM bamFiles <- system.file(package="chromstaRData","extdata","euratrans", c("lv-H3K4me3-SHR-male-bio2-tech1.bam", "lv-H3K4me3-SHR-male-bio3-tech1.bam")) colData <- data.frame(condition = c('SHR','SHR'), replicate = c(1,2)) # Creating epigraHMM object object_bam <- epigraHMMDataSetFromBam(bamFiles = bamFiles, colData = colData, genome = 'rn4', windowSize = 250, gapTrack = TRUE) object_bam ## ----data_norm_offset--------------------------------------------------------- # Normalizing counts object_normExample <- object_bam object_normExample <- normalizeCounts(object_normExample,control = controlEM()) normCts <- as.matrix(assay(object_normExample)/ exp(assay(object_normExample,'offsets'))) # Summary of raw counts summary(assay(object_normExample)) colSums(assay(object_normExample))/1e5 # Summary of normalized counts summary(normCts) colSums(normCts)/1e5 ## ----gcnorm,fig.show='hide',results='hide'------------------------------------ library(gcapc) library(BSgenome.Rnorvegicus.UCSC.rn4) # Toy example of utilization of gcapc for GC-content normalization with model offsets # See ?gcapc::refineSites for best practices # Below, subset object_bam simply to illustrate the use of `gcapc::refineSites` # with epigraHMM object_gcExample <- object_bam[2e4:5e4,] gcnorm <- gcapc::refineSites(counts = assay(object_gcExample), sites = rowRanges(object_gcExample), gcrange = c(0,1), genome = 'rn4') # gcapc::refineSites outputs counts/2^gce gcnorm_offsets <- log2((assay(object_gcExample) + 1) / (gcnorm + 1)) # Adding offsets to epigraHMM object object_gcExample <- addOffsets(object = object_gcExample, offsets = gcnorm_offsets) ## ----data_input_controls------------------------------------------------------ # Creating input for epigraHMM bamFiles <- list('counts' = system.file(package="chromstaRData", "extdata","euratrans", "lv-H4K20me1-BN-male-bio1-tech1.bam"), 'controls' = system.file(package="chromstaRData", "extdata","euratrans", "lv-input-BN-male-bio1-tech1.bam")) colData <- data.frame(condition = 'BN',replicate = 1) # Creating epigraHMM object object_bam <- epigraHMMDataSetFromBam(bamFiles = bamFiles, colData = colData, genome = 'rn4', windowSize = 1000, gapTrack = TRUE) object_bam ## ----data_peak_consensus,message = FALSE-------------------------------------- # Creating input for epigraHMM bamFiles <- system.file(package="chromstaRData","extdata","euratrans", c("lv-H3K27me3-BN-male-bio2-tech1.bam", "lv-H3K27me3-BN-male-bio2-tech2.bam")) colData <- data.frame(condition = c('BN','BN'), replicate = c(1,2)) # Creating epigraHMM object object_consensus <- epigraHMMDataSetFromBam(bamFiles = bamFiles, colData = colData, genome = 'rn4', windowSize = 1000, gapTrack = TRUE) # Normalizing counts object_consensus <- normalizeCounts(object_consensus, control = controlEM()) # Initializing epigraHMM object_consensus <- initializer(object_consensus,controlEM()) # Differential peak calling object_consensus <- epigraHMM(object = object_consensus, control = controlEM(), type = 'consensus', dist = 'zinb') ## ----data_peak_differential,message = FALSE----------------------------------- # Creating input for epigraHMM bamFiles <- system.file(package="chromstaRData","extdata","euratrans", c("lv-H3K27me3-BN-male-bio2-tech1.bam", "lv-H3K27me3-BN-male-bio2-tech2.bam", "lv-H3K27me3-SHR-male-bio2-tech1.bam", "lv-H3K27me3-SHR-male-bio2-tech2.bam")) colData <- data.frame(condition = c('BN','BN','SHR','SHR'), replicate = c(1,2,1,2)) # Creating epigraHMM object object_differential <- epigraHMMDataSetFromBam(bamFiles = bamFiles, colData = colData, genome = 'rn4', windowSize = 1000, gapTrack = TRUE) # Normalizing counts object_differential <- normalizeCounts(object_differential, control = controlEM()) # Initializing epigraHMM object_differential <- initializer(object_differential,controlEM()) # Differential peak calling object_differential <- epigraHMM(object = object_differential, control = controlEM(), type = 'differential') ## ----peaks_consensus---------------------------------------------------------- peaks_consensus <- callPeaks(object = object_consensus) peaks_consensus ## ----peaks_differential------------------------------------------------------- peaks_differential <- callPeaks(object = object_differential) peaks_differential ## ----viz_anno----------------------------------------------------------------- library(GenomicRanges) library(rtracklayer) library(GenomeInfoDb) session <- browserSession() genome(session) <- 'rn4' refSeq <- getTable(ucscTableQuery(session, table = "refGene")) annotation <- makeGRangesFromDataFrame(refSeq, starts.in.df.are.0based = TRUE, seqnames.field = 'chrom', start.field = 'txStart', end.field = 'txEnd', strand.field = 'strand', keep.extra.columns = TRUE) ## ----viz_consensus------------------------------------------------------------ plotCounts(object = object_consensus, ranges = GRanges('chr12',IRanges(19500000,20000000)), peaks = peaks_consensus, annotation = annotation) ## ----viz_differential--------------------------------------------------------- plotCounts(object = object_differential, ranges = GRanges('chr12',IRanges(19500000,20100000)), peaks = peaks_differential, annotation = annotation) ## ----viz_heatmap1------------------------------------------------------------- # Selecting target differential peak pattern_peak <- peaks_differential[peaks_differential$name == 'peak4'] pattern_peak plotPatterns(object = object_differential, ranges = pattern_peak, peaks = peaks_differential) ## ----viz_heatmap2------------------------------------------------------------- # Selecting target differential peak pattern_peak <- peaks_differential[peaks_differential$name == 'peak3'] pattern_peak plotPatterns(object = object_differential, ranges = pattern_peak, peaks = peaks_differential) ## ----misc_patterns------------------------------------------------------------ # Getting object information info(object_differential) ## ----misc_probs--------------------------------------------------------------- # Getting posterior probabilties callPatterns(object = object_differential,peaks = peaks_differential,type = 'all') ## ----misc_max----------------------------------------------------------------- # Getting most likely differential enrichment callPatterns(object = object_differential,peaks = peaks_differential,type = 'max') ## ----misc_pruning,message = TRUE---------------------------------------------- data('helas3') control <- controlEM(pruningThreshold = 0.05,quietPruning = FALSE) object <- normalizeCounts(object = helas3,control) object <- initializer(object = object,control) object <- epigraHMM(object = object,control = control,type = 'differential') ## ----sessioninfo-------------------------------------------------------------- sessionInfo()