## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(csaw) library(edgeR) library(GenomicRanges) library(ChIPseeker) library(genefilter) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) library(clusterProfiler) }) ## ----configure-test------------------------------------------------------------------------------- stopifnot( getRversion() >= '3.2' && getRversion() < '3.3', BiocInstaller::biocVersion() == "3.2" ) ## ----null-p, cache=TRUE--------------------------------------------------------------------------- ## 100,000 t-tests under the null, n = 6 n <- 6; m <- matrix(rnorm(n * 100000), ncol=n) P <- genefilter::rowttests(m, factor(rep(1:2, each=3)))$p.value quantile(P, c(.001, .01, .05)) hist(P, breaks=20) ## ---- eval=FALSE---------------------------------------------------------------------------------- # vignette("ChIPseeker") ## ----csaw-setup----------------------------------------------------------------------------------- files <- local({ acc <- c(es_1="SRR074398", es_2="SRR074399", tn_1="SRR074417", tn_2="SRR074418") data.frame(Treatment=sub("_.*", "", names(acc)), Replicate=sub(".*_", "", names(acc)), sra=sprintf("%s.sra", acc), fastq=sprintf("%s.fastq.gz", acc), bam=sprintf("%s.fastq.gz.subread.BAM", acc), row.names=acc, stringsAsFactors=FALSE) }) ## ----csaw-setwd, eval=FALSE----------------------------------------------------------------------- # setwd("~/UseBioconductor-data/ChIPSeq/NFYA") ## ----csaw-reduction, eval=FALSE------------------------------------------------------------------- # library(csaw) # library(GenomicRanges) # frag.len <- 110 # system.time({ # data <- windowCounts(files$bam, width=10, ext=frag.len) # }) # 156 seconds # acc <- sub(".fastq.*", "", data$bam.files) # colData(data) <- cbind(files[acc,], colData(data)) ## ----load-csaw------------------------------------------------------------------------------------ data <- readRDS("csaw-data.Rds") ## ----csaw-filter---------------------------------------------------------------------------------- library(edgeR) # for aveLogCPM() keep <- aveLogCPM(assay(data)) >= -1 data <- data[keep,] ## ----csaw-normalize, eval=FALSE------------------------------------------------------------------- # system.time({ # binned <- windowCounts(files$bam, bin=TRUE, width=10000) # }) #139 second # normfacs <- normalize(binned) ## ----csaw-normacs-load---------------------------------------------------------------------------- normfacs <- readRDS("csaw-normfacs.Rds") ## ----csaw-experimental-design--------------------------------------------------------------------- design <- model.matrix(~Treatment, colData(data)) ## ----csaw-de-------------------------------------------------------------------------------------- y <- asDGEList(data, norm.factors=normfacs) y <- estimateDisp(y, design) fit <- glmQLFit(y, design, robust=TRUE) results <- glmQLFTest(fit, contrast=c(0, 1)) head(results$table) ## ----csaw-merge-windows--------------------------------------------------------------------------- merged <- mergeWindows(rowRanges(data), tol=1000L) ## ----csaw-combine-merged-tests-------------------------------------------------------------------- tabcom <- combineTests(merged$id, results$table) head(tabcom) ## ----csaw-grangeslist----------------------------------------------------------------------------- gr <- rowRanges(data) mcols(gr) <- as(results$table, "DataFrame") grl <- split(gr, merged$id) mcols(grl) <- as(tabcom, "DataFrame") ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()