## ---- echo=FALSE, results="hide"----------------------------------------- library(knitr) opts_chunk$set(error=FALSE) ## ----style, echo=FALSE, results='asis'----------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE--------------------------------------------------- options(digits=3) suppressPackageStartupMessages({ library(csaw) library(edgeR) library(org.Mm.eg.db) library(TxDb.Mmusculus.UCSC.mm10.knownGene) library(Gviz) }) ## ------------------------------------------------------------------------ bam.files <- file.path("/home/ubuntu/data/aaronlun", c("es_1.bam", "es_2.bam", "tn_1.bam", "tn_2.bam")) ## ------------------------------------------------------------------------ system.file("doc", "sra2bam.sh", package="csaw") ## ------------------------------------------------------------------------ frag.len <- 110 window.width <- 10 spacing <- 50 ## ------------------------------------------------------------------------ param <- readParam(minq=50, restrict=paste0('chr', c(1:19, 'X', 'Y'))) data <- windowCounts(bam.files, ext=frag.len, width=window.width, spacing=spacing, param=param) data ## ------------------------------------------------------------------------ max.delay <- 500 dedup.on <- readParam(dedup=TRUE, minq=50) x <- correlateReads(bam.files, max.delay, param=dedup.on) plot(0:max.delay, x, type="l", ylab="CCF", xlab="Delay (bp)") ## ------------------------------------------------------------------------ param ## ------------------------------------------------------------------------ binned <- windowCounts(bam.files, bin=TRUE, width=10000, param=param) ## ------------------------------------------------------------------------ normfacs <- normalize(binned) normfacs ## ------------------------------------------------------------------------ adj.counts <- cpm(asDGEList(binned), log=TRUE) cur.x <- adj.counts[,1] cur.y <- adj.counts[,2] smoothScatter(x=(cur.x+cur.y)/2+6*log2(10), y=cur.x-cur.y, xlab="A", ylab="M", main="1 vs 2") all.dist <- diff(log2(normfacs[c(2, 1)])) abline(h=all.dist, col="red") ## ------------------------------------------------------------------------ keep <- aveLogCPM(asDGEList(data)) > -1 normalize(data[keep,]) ## ------------------------------------------------------------------------ abundance <- aveLogCPM(asDGEList(data)) ## ------------------------------------------------------------------------ keep <- abundance > -1 original <- data data <- data[keep,] nrow(data) ## ------------------------------------------------------------------------ binned.2 <- windowCounts(bam.files, bin=TRUE, width=2000, param=param) filter.stat.bg <- filterWindows(original, background=binned.2, type="global") background.keep <- filter.stat.bg$filter > log2(3) summary(background.keep) ## ------------------------------------------------------------------------ surrounds <- 2000 neighbour <- suppressWarnings(resize(rowRanges(original), surrounds, fix="center")) wider <- regionCounts(bam.files, regions=neighbour, ext=frag.len, param=param) ## ------------------------------------------------------------------------ filter.stat.loc <- filterWindows(original, wider, type="local") local.keep <- filter.stat.loc$filter > log2(3) summary(local.keep) ## ------------------------------------------------------------------------ grouping <- factor(c('es', 'es', 'tn', 'tn')) design <- model.matrix(~0 + grouping) colnames(design) <- levels(grouping) ## ------------------------------------------------------------------------ y <- asDGEList(data, norm.factors=normfacs) y <- estimateDisp(y, design) ## ------------------------------------------------------------------------ fit <- glmQLFit(y, design, robust=TRUE) ## ------------------------------------------------------------------------ contrast <- makeContrasts(es - tn, levels=design) results <- glmQLFTest(fit, contrast=contrast) ## ------------------------------------------------------------------------ norep.fit <- glmFit(y, design, dispersion=0.05) norep.results <- glmLRT(norep.fit, contrast=contrast) ## ------------------------------------------------------------------------ bin.adjc <- cpm(asDGEList(binned.2), log=TRUE) plotMDS(bin.adjc, labels=grouping) ## ------------------------------------------------------------------------ plotBCV(y) ## ------------------------------------------------------------------------ plotQLDisp(fit) ## ------------------------------------------------------------------------ clustered <- mergeWindows(rowRanges(data), tol=1000) clustered$region ## ------------------------------------------------------------------------ tabcom <- combineTests(clustered$id, results$table) head(tabcom) ## ------------------------------------------------------------------------ clustered2 <- mergeWindows(rowRanges(data), tol=100, max.width=5000) ## ------------------------------------------------------------------------ tab.best <- getBestTest(clustered$id, results$table) head(tab.best) ## ------------------------------------------------------------------------ gene.bodies <- genes(TxDb.Mmusculus.UCSC.mm10.knownGene) prom <- promoters(gene.bodies, upstream=3000, downstream=1000) head(prom) ## ------------------------------------------------------------------------ olap <- findOverlaps(prom, rowRanges(data)) olap ## ------------------------------------------------------------------------ tabbroad <- combineOverlaps(olap, results$table) head(tabbroad[!is.na(tabbroad$PValue),]) ## ------------------------------------------------------------------------ anno <- detailRanges(clustered$region, orgdb=org.Mm.eg.db, txdb=TxDb.Mmusculus.UCSC.mm10.knownGene) head(anno$overlap) ## ------------------------------------------------------------------------ head(anno$left) ## ------------------------------------------------------------------------ all.results <- data.frame(as.data.frame(clustered$region)[,1:3], tabcom, anno) all.results <- all.results[order(all.results$PValue),] write.table(all.results, file="saved.tsv", row.names=FALSE, quote=FALSE, sep="\t") ## ------------------------------------------------------------------------ cur.region <- GRanges("chr18", IRanges(77806807, 77807165)) collected <- list() for (i in 1:length(bam.files)) { reads <- extractReads(bam.files[i], cur.region) pcov <- as(coverage(reads[strand(reads)=="+"])/data$totals[i]*1e6, "GRanges") ncov <- as(coverage(reads[strand(reads)=="-"])/data$totals[i]*1e6, "GRanges") ptrack <- DataTrack(pcov, type="histogram", lwd=0, fill=rgb(0,0,1,.4), ylim=c(0,1), name=bam.files[i], col.axis="black", col.title="black") ntrack <- DataTrack(ncov, type="histogram", lwd=0, fill=rgb(1,0,0,.4), ylim=c(0,1)) collected[[i]] <- OverlayTrack(trackList=list(ptrack,ntrack)) } gax <- GenomeAxisTrack(col="black") plotTracks(c(gax, collected), from=start(cur.region), to=end(cur.region)) ## ------------------------------------------------------------------------ anno2 <- detailRanges(clustered$region, orgdb=org.Mm.eg.db, txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, dist=3000, promoter=c(0, 2000)) ## ------------------------------------------------------------------------ all.regions <- clustered$region elementMetadata(all.regions) <- tabcom ## ----eval=FALSE---------------------------------------------------------- # csawUsersGuide()