## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png", message=FALSE, error=FALSE, warning=FALSE) ## ----eval=FALSE--------------------------------------------------------------- # # first build tx2gene to gene- or TSS-level # # (for isoform level skip `tx2gene`) # library(ensembldb) # library(plyranges) # # gene level: # tx2gene <- transcripts(edb) %>% # select(tx_id, group_id=gene_id) # # TSS level: # tx2gene <- makeTx2Tss(edb, maxgap=50) %>% # select(tx_id, gene_id, group_id, tss) # # import counts: # y <- importAllelicCounts( # coldata, a1="alt", a2="ref", # format="wide", tx2gene=tx2gene # ) # # testing with Swish: # y <- labelKeep(y) # y <- y[mcols(y)$keep,] # # see below for other tests and details # y <- swish(y, x="allele", pair="sample", fast=1) # mcols(y)$qvalue # <-- gives FDR-bounded sets ## ----echo=FALSE--------------------------------------------------------------- knitr::include_graphics("images/SEESAW.png") ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(ensembldb)) library(EnsDb.Hsapiens.v86) library(fishpond) edb <- EnsDb.Hsapiens.v86 t2g <- makeTx2Tss(edb) # GRanges object mcols(t2g)[,c("tx_id","group_id")] ## ----eval=FALSE--------------------------------------------------------------- # n <- ncol(y)/2 # rep1a1 <- assay(y, "infRep1")[,y$allele == "a1"] # rep1a2 <- assay(y, "infRep1")[,y$allele == "a2"] # mcols(y)$someInfo <- rowSums(abs(rep1a1 - rep1a2) < 1) < n # y <- y[ mcols(y)$someInfo, ] ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(SummarizedExperiment)) ## ----------------------------------------------------------------------------- set.seed(1) y <- makeSimSwishData(allelic=TRUE) colData(y) levels(y$allele) # a1/a2 allelic fold changes ## ----echo=FALSE--------------------------------------------------------------- # hidden chunk to add ranges to the `se` tss <- t2g[!duplicated(t2g$group_id)] tss$symbol <- mapIds(edb, tss$gene_id, "SYMBOL", "GENEID") names(tss) <- paste0(tss$symbol, "-", tss$tss) mcols(tss) <- mcols(tss)[,c("tx_id","gene_id","tss","group_id","symbol")] # slow... #tx_id <- CharacterList(split(t2g$tx_id,t2g$group_id)) #tss$tx_id <- tx_id[names(tss)] tab <- table(tss$gene_id) tss$ntss <- as.numeric(tab[tss$gene_id]) tss <- tss[tss$ntss > 1 & tss$ntss < 5 & seqnames(tss) == "1"] tss <- tss[order(tss$gene_id),] tss <- tss[43:1042] # swap 2nd and 3rd isoform of first gene tss[2:3] <- tss[3:2] rowRanges(y) <- tss ## ----fig.dim=c(7,3.5)--------------------------------------------------------- y <- computeInfRV(y) # for posterior mean, variance gene <- rowRanges(y)$gene_id[1] idx <- mcols(y)$gene_id == gene plotAllelicHeatmap(y, idx=idx) ## ----------------------------------------------------------------------------- y <- labelKeep(y) y <- swish(y, x="allele", pair="sample", fast=1) ## ----fig.dim=c(8,4)----------------------------------------------------------- dat <- data.frame(minusLogQ=-log10(mcols(y)$qvalue[idx]), row.names=rownames(y)[idx]) plotAllelicHeatmap(y, idx=idx, annotation_row=dat) ## ----fig.dim=c(5,5)----------------------------------------------------------- par(mfrow=c(2,1), mar=c(1,4.1,2,2)) plotInfReps(y, idx=1, x="allele", cov="sample", xaxis=FALSE, xlab="") plotInfReps(y, idx=2, x="allele", cov="sample", xaxis=FALSE, xlab="") ## ----fig.dim=c(8,7)----------------------------------------------------------- gene <- rowRanges(y)$gene_id[1] plotAllelicGene(y, gene, edb) ## ----fig.dim=c(8,7)----------------------------------------------------------- plotAllelicGene(y, symbol="ADSS", db=edb) ## ----fig.dim=c(8,7)----------------------------------------------------------- plotAllelicGene(y, gene, edb, transcriptAnnotation="transcript", labels=list(a2="maternal",a1="paternal")) ## ----fig.dim=c(5,4)----------------------------------------------------------- y$allele_new <- y$allele # note a2 is non-effect, a1 is effect: levels(y$allele) # replace a2 then a1: levels(y$allele_new) <- c("maternal","paternal") plotInfReps(y, idx=1, x="allele_new", legend=TRUE, legendPos="bottom") ## ----------------------------------------------------------------------------- set.seed(1) y <- makeSimSwishData(diffAI=TRUE, n=12) colData(y) table(y$condition, y$allele) ## ----------------------------------------------------------------------------- y <- labelKeep(y) y <- swish(y, x="allele", pair="sample", cov="condition", interaction=TRUE) ## ----------------------------------------------------------------------------- mcols(y)[1:2,c("stat","qvalue")] ## ----------------------------------------------------------------------------- hist(mcols(y)[-c(1:6),"pvalue"]) ## ----------------------------------------------------------------------------- plotInfReps(y, idx=1, x="allele", cov="condition", xaxis=FALSE, legend=TRUE, legendPos="bottomright") ## ----fig.dim=c(8,4)----------------------------------------------------------- idx <- c(1:6) row_dat <- data.frame(minusLogQ=-log10(mcols(y)$qvalue[idx]), row.names=rownames(y)[idx]) col_dat <- data.frame(condition=y$condition[1:12], row.names=paste0("s",1:12)) plotAllelicHeatmap(y, idx=idx, annotation_row=row_dat, annotation_col=col_dat, cluster_rows=FALSE) ## ----------------------------------------------------------------------------- set.seed(1) y <- makeSimSwishData(dynamicAI=TRUE) colData(y) ## ----echo=FALSE--------------------------------------------------------------- rowRanges(y) <- tss ## ----------------------------------------------------------------------------- y <- labelKeep(y) y <- swish(y, x="allele", pair="sample", cov="time", cor="pearson") ## ----------------------------------------------------------------------------- mcols(y)[1:2,c("stat","qvalue")] ## ----------------------------------------------------------------------------- y <- computeInfRV(y) ## ----fig.dim=c(7,7)----------------------------------------------------------- par(mfrow=c(2,1), mar=c(2.5,4,2,2)) plotInfReps(y, idx=1, x="time", cov="allele", shiftX=.01, xaxis=FALSE, xlab="", main="") par(mar=c(4.5,4,0,2)) plotInfReps(y, idx=2, x="time", cov="allele", shiftX=.01, main="") ## ----fig.dim=c(7,5)----------------------------------------------------------- plotInfReps(y, idx=1, x="time", cov="allele", shiftX=.01) dat <- data.frame( time = y$time[1:10], a2 = assay(y, "mean")[1,y$allele=="a2"], a1 = assay(y, "mean")[1,y$allele=="a1"]) lines(lowess(dat[,c(1,2)]), col="dodgerblue") lines(lowess(dat[,c(1,3)]), col="goldenrod4") ## ----fig.dim=c(8,4)----------------------------------------------------------- idx <- c(1:4) row_dat <- data.frame(minusLogQ=-log10(mcols(y)$qvalue[idx]), row.names=rownames(y)[idx]) col_dat <- data.frame(time=y$time[1:10], row.names=paste0("s",1:10)) plotAllelicHeatmap(y, idx=idx, annotation_row=row_dat, annotation_col=col_dat) ## ----------------------------------------------------------------------------- y$time_bins <- cut(y$time,breaks=c(0,.25,.75,1), include.lowest=TRUE, labels=FALSE) y$time_bins <- paste0("time-",y$time_bins) table(y$time_bins[ y$allele == "a2" ]) ## ----fig.dim=c(8,7)----------------------------------------------------------- gene <- rowRanges(y)$gene_id[1] plotAllelicGene(y, gene, edb, cov="time_bins", qvalue=FALSE, log2FC=FALSE) ## ----fig.dim=c(8,7)----------------------------------------------------------- plotAllelicGene(y, gene, edb, cov="time_bins", covFacetIsoform=TRUE, qvalue=FALSE, log2FC=FALSE) ## ----------------------------------------------------------------------------- set.seed(1) y1 <- makeSimSwishData(dynamicAI=TRUE) y2 <- makeSimSwishData(dynamicAI=TRUE) y2$sample <- factor(rep(paste0("sample",11:20),2)) y <- cbind(y1, y2) y$group <- factor(rep(c("A","B"),each=20)) table(y$time, y$group) # 2 allelic counts per cell ## ----------------------------------------------------------------------------- reps <- grep("infRep", assayNames(y), value=TRUE) for (k in reps) { assay(y,k)[1,21:40] <- assay(y,k)[1,c(31:40,21:30)] } ## ----fig.dim=c(8,7)----------------------------------------------------------- y <- computeInfRV(y) par(mfrow=c(2,2),mar=c(3,3,3,1)) for (i in 1:2) { plotInfReps(y[,y$group=="A"], idx=i, x="time", cov="allele", shiftX=.01) plotInfReps(y[,y$group=="B"], idx=i, x="time", cov="allele", shiftX=.01) } ## ----------------------------------------------------------------------------- pc <- 5 # a pseudocount idx1 <- which(y$allele == "a1") idx2 <- which(y$allele == "a2") # the sample must be aligned all.equal(y$sample[idx1], y$sample[idx2]) cov <- y$time[idx1] group <- y$group[idx1] # interaction of group and covariate (time) design <- model.matrix(~group + cov + group:cov) ## ----------------------------------------------------------------------------- reps <- grep("infRep", assayNames(y), value=TRUE) nreps <- length(reps) infReps <- assays(y)[reps] infRepsArray <- abind::abind(as.list(infReps), along=3) lfcArray <- log2(infRepsArray[,idx1,] + pc) - log2(infRepsArray[,idx2,] + pc) ## ----------------------------------------------------------------------------- computeStat <- function(k, design, name) { fit <- limma::lmFit(lfcArray[,,k], design) tstats <- fit$coef/fit$stdev.unscaled/fit$sigma tstats[,name] } ## ----------------------------------------------------------------------------- stat <- rowMeans(sapply(1:nreps, \(k) computeStat(k, design, "groupB:cov"))) ## ----------------------------------------------------------------------------- n <- nrow(design) nperms <- 100 nulls <- matrix(nrow=nrow(y), ncol=nperms) set.seed(1) for (p in seq_len(nperms)) { # permute group labels pgroup <- group[sample(n)] pdesign <- model.matrix(~pgroup + cov + pgroup:cov) nulls[,p] <- rowMeans(sapply(1:nreps, \(k) computeStat(k, pdesign, "pgroupB:cov"))) } ## ----------------------------------------------------------------------------- pvalue <- qvalue::empPvals(abs(stat), abs(nulls)) q_res <- qvalue::qvalue(pvalue) locfdr <- q_res$lfdr qvalue <- q_res$qvalues res <- data.frame(stat, pvalue, locfdr, qvalue) ## ----------------------------------------------------------------------------- head(res) hist(res$pvalue[-1]) ## ----------------------------------------------------------------------------- sessionInfo()