## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----eval=FALSE--------------------------------------------------------------- # head(snp.dat) # # CHROM POS ID REF ALT QUAL FILTER LT.rna.dp LN.rna.dp ON.rna.dp OT.rna.dp BT.wes.dp LT.wes.dp LN.wes.dp ON.wes.dp # 1 chr1 54757 rs202000650 T G . PASS NA NA NA NA NA NA NA NA # 2 chr1 564636 . C T . PASS NA NA NA NA NA NA NA NA # 3 chr1 564862 rs1988726 T C . PASS NA NA NA NA NA NA NA NA # 4 chr1 564868 rs1832728 T C . PASS NA NA NA NA NA NA NA NA # 5 chr1 565454 rs7349151 T C . PASS NA NA NA NA NA NA NA NA # 6 chr1 565464 rs6594030 T C . PASS NA NA NA NA NA NA NA NA # OT.wes.dp LT.wgs.dp LN.wgs.dp ON.wgs.dp OT.wgs.dp LT.rna.freq LN.rna.freq ON.rna.freq OT.rna.freq BT.wes.freq LT.wes.freq # 1 NA 25 24 27 19 NA NA NA NA NA NA # 2 NA 21 NA NA 14 NA NA NA NA NA NA # 3 NA 10 15 55 13 NA NA NA NA NA NA # 4 NA 10 12 60 14 NA NA NA NA NA NA # 5 NA 21 14 26 24 NA NA NA NA NA NA # 6 NA 25 16 33 29 NA NA NA NA NA NA # LN.wes.freq ON.wes.freq OT.wes.freq LT.wgs.freq LN.wgs.freq ON.wgs.freq OT.wgs.freq # 1 NA NA NA 0.2400 0.1667 0.2222 0.3684 # 2 NA NA NA 0.0000 NA NA 0.1429 # 3 NA NA NA 0.4000 0.5333 0.9091 0.7692 # 4 NA NA NA 0.5000 0.6667 0.9333 0.7857 # 5 NA NA NA 0.1429 0.3571 0.6538 0.6250 # 6 NA NA NA 0.2000 0.3750 0.7273 0.5862 # ## ----eval=FALSE--------------------------------------------------------------- # is.hetero <- function(x, a=0.3, b=0.7) { # (x - a) * (b - x) >= 0 # } # # wgs.snp.ss <- subset(snp.dat, ! CHROM %in% c("chrX", "chrY") & # LN.wgs.dp >= 15 & # ON.wgs.dp >=15 & # is.hetero(LN.wgs.freq, 0.4, 0.6) & # is.hetero(ON.wgs.freq, 0.4, 0.6)) ## ----eval=FALSE--------------------------------------------------------------- # library(GenomicRanges) # wgs.hetero.grs <- list() # wgs.hetero.grs$lung <- with(wgs.snp.ss, GRanges(CHROM, IRanges(POS, POS), mcols=cbind(LT.wgs.dp, LT.wgs.freq))) # wgs.hetero.grs$ovary <- with(wgs.snp.ss, GRanges(CHROM, IRanges(POS, POS), mcols=cbind(OT.wgs.dp, OT.wgs.freq))) # names(elementMetadata(wgs.hetero.grs$lung)) <- names(elementMetadata(wgs.hetero.grs$ovary)) <- c("dp", "freq") ## ----eval=FALSE--------------------------------------------------------------- # library(GenomicRanges) # min.num <- 10 # cnv.gr <- with(subset(seg$output, num.mark >= min.num & ! chrom %in% c("chrX", "chrY")) , GRanges(chrom, IRanges(loc.start, loc.end), mcols=cbind(num.mark, seg.mean))) ## ----------------------------------------------------------------------------- suppressMessages( library(BubbleTree) ) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) head(allCall.lst[[1]]@rbd) ## ----------------------------------------------------------------------------- # load sample files load(system.file("data", "cnv.gr.rda", package="BubbleTree")) load(system.file("data", "snp.gr.rda", package="BubbleTree")) # load annotations load(system.file("data", "centromere.dat.rda", package="BubbleTree")) load(system.file("data", "cyto.gr.rda", package="BubbleTree")) load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree")) load(system.file("data", "vol.genes.rda", package="BubbleTree")) load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree")) # initialize RBD object r <- new("RBD", unimodal.kurtosis=-0.1) # create new RBD object with GenomicRanges objects for SNPs and CNVs rbd <- makeRBD(r, snp.gr, cnv.gr) head(rbd) # create a new prediction btreepredictor <- new("BTreePredictor", rbd=rbd, max.ploidy=6, prev.grid=seq(0.2,1, by=0.01)) pred <- btpredict(btreepredictor) # create rbd plot btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) btree <- drawBTree(btreeplotter, pred@rbd) print(btree) # create rbd.adj plot btreeplotter <- new("BTreePlotter", branch.col="gray50") btree <- drawBTree(btreeplotter, pred@rbd.adj) print(btree) # create a combined plot with rbd and rbd.adj that shows the arrows indicating change btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) arrows <- trackBTree(btreeplotter, pred@rbd, pred@rbd.adj, min.srcSize=0.01, min.trtSize=0.01) btree <- drawBTree(btreeplotter, pred@rbd) + arrows print(btree) # create a plot with overlays of significant genes btreeplotter <- new("BTreePlotter", branch.col="gray50") annotator <- new("Annotate") comm <- btcompare(vol.genes, cancer.genes.minus2) sample.name <- "22_cnv_snv" btree <- drawBTree(btreeplotter, pred@rbd.adj) + ggplot2::labs(title=sprintf("%s (%s)", sample.name, info(pred))) out <- pred@result$dist %>% filter(seg.size >= 0.1 ) %>% arrange(gtools::mixedorder(as.character(seqnames)), start) ann <- with(out, { annoByGenesAndCyto(annotator, as.character(out$seqnames), as.numeric(out$start), as.numeric(out$end), comm$comm, gene.uni.clean.gr=gene.uni.clean.gr, cyto.gr=cyto.gr) }) out$cyto <- ann$cyto out$genes <- ann$ann btree <- btree + drawFeatures(btreeplotter, out) print(btree) # print out purity and ploidy values info <- info(pred) cat("\nPurity/Ploidy: ", info, "\n") ## ----------------------------------------------------------------------------- load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree")) load(system.file("data", "vol.genes.rda", package="BubbleTree")) load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree")) load(system.file("data", "cyto.gr.rda", package="BubbleTree")) load(system.file("data", "centromere.dat.rda", package="BubbleTree")) load(system.file("data", "all.somatic.lst.RData", package="BubbleTree")) load(system.file("data", "allHetero.lst.RData", package="BubbleTree")) load(system.file("data", "allCNV.lst.RData", package="BubbleTree")) load(system.file("data", "hg19.seqinfo.rda", package="BubbleTree")) ## ----------------------------------------------------------------------------- # lists of 379 known cancer genes head(cancer.genes.minus2) # another list of 105 known cancer genes head(vol.genes) # full gene annotation Grange object head(gene.uni.clean.gr) # cytoband coordinate data head(cyto.gr) # centromere coordinate data head(centromere.dat) # SNV location data head(all.somatic.lst, n=1L) # sequence variants head(allHetero.lst, n=1L) # copy number variation data head(allCNV.lst, n=1L) # hg19 sequence data hg19.seqinfo ## ----------------------------------------------------------------------------- btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) print(btreeplotter@branches) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) sample <- allCall.lst[["sam10"]] rbd1 <- sample@rbd rbd2 <- sample@rbd.adj arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01) btree <- drawBTree(btreeplotter, rbd1) print(btree) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) sample <- allCall.lst[["sam12"]] rbd1 <- sample@rbd rbd2 <- sample@rbd.adj arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01) btree <- drawBTree(btreeplotter, rbd1) + drawBubbles(btreeplotter, rbd2, "gray80") + arrows print(btree) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10) sample <- allCall.lst[["sam12"]] rbd1 <- sample@rbd rbd2 <- sample@rbd.adj arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01) btree <- drawBTree(btreeplotter, rbd1) + arrows print(btree) ## ----------------------------------------------------------------------------- load(system.file("data", "allRBD.lst.RData", package="BubbleTree")) btreepredictor <- new("BTreePredictor") btreepredictor@config$cutree.h <- 0.15 high.ploidy <- rep(TRUE, length(allRBD.lst)) high.purity <- rep(TRUE, length(allRBD.lst)) high.ploidy[c("sam6", "ovary.wgs", "ovary.wes", "TCGA-06-0145-01A-01W-0224-08", "TCGA-13-1500-01A-01D-0472-01", "TCGA-AO-A0JJ-01A-11W-A071-09")] <- FALSE high.purity[c("sam6", "ovary.wgs", "ovary.wes")] <- FALSE nn <- "sam13" rbd <- allRBD.lst[[nn]] btreepredictor@config$high.ploidy <- high.ploidy[nn] btreepredictor@config$high.purity <- high.purity[nn] btreepredictor <- loadRBD(btreepredictor, rbd) btreepredictor@config$min.segSize <- ifelse(max(btreepredictor@rbd$seg.size, na.rm=TRUE) < 0.4, 0.1, 0.4) btreepredictor <- btpredict(btreepredictor) ## ----------------------------------------------------------------------------- cat(info(btreepredictor), "\n") names(allCall.lst) <- names(allRBD.lst) results <- list() for (name in names(allCall.lst)) { results[[name]] <- allCall.lst[[name]]@result$dist } ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree")) load(system.file("data", "vol.genes.rda", package="BubbleTree")) load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree")) load(system.file("data", "cyto.gr.rda", package="BubbleTree")) # 77 common cancer genes merged from 2 sets comm <- btcompare(vol.genes, cancer.genes.minus2) btreeplotter <- new("BTreePlotter", branch.col="gray50") annotator <- new("Annotate") nn <- "sam13" cc <- allCall.lst[[nn]] z <- drawBTree(btreeplotter, cc@rbd.adj) + ggplot2::labs(title=sprintf("%s (%s)", nn, info(cc))) out <- cc@result$dist %>% filter(seg.size >= 0.1 ) %>% arrange(gtools::mixedorder(as.character(seqnames)), start) ann <- with(out, { annoByGenesAndCyto(annotator, as.character(out$seqnames), as.numeric(out$start), as.numeric(out$end), comm$comm, gene.uni.clean.gr=gene.uni.clean.gr, cyto.gr=cyto.gr) }) out$cyto <- ann$cyto out$genes <- ann$ann v <- z + drawFeatures(btreeplotter, out) print(v) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) load(system.file("data", "centromere.dat.rda", package="BubbleTree")) load(system.file("data", "all.somatic.lst.RData", package="BubbleTree")) load(system.file("data", "allHetero.lst.RData", package="BubbleTree")) load(system.file("data", "allCNV.lst.RData", package="BubbleTree")) load(system.file("data", "hg19.seqinfo.rda", package="BubbleTree")) trackplotter <- new("TrackPlotter") nn <- "sam12" ymax <- ifelse(nn %in% c("lung.wgs", "lung.wes"), 9, 4.3) p1 <- xyTrack(trackplotter, result.dat=allCall.lst[[nn]]@result, gr2=centromere.dat, ymax=ymax) + ggplot2::labs(title=nn) p2 <- bafTrack(trackplotter, result.dat=allCall.lst[[nn]]@result, gr2=centromere.dat, somatic.gr=all.somatic.lst[[nn]]) t1 <- getTracks(p1, p2) z1 <- heteroLociTrack(trackplotter, allCall.lst[[nn]]@result, centromere.dat, allHetero.lst[[nn]]) z2 <- RscoreTrack(trackplotter, allCall.lst[[nn]]@result, centromere.dat, allCNV.lst[[nn]]) t2 <- getTracks(z1, z2) gridExtra::grid.arrange(t1,t2, ncol=1) ## ----------------------------------------------------------------------------- load(system.file("data", "allCall.lst.RData", package="BubbleTree")) btp <- new("BTreePlotter", max.ploidy=5, max.size=10) nn1 <- "HCC11.Primary.Tumor" nn2 <- "HCC11.Recurrent.Tumor" rbd1 <- allCall.lst[[nn1]]@result$dist rbd2 <- allCall.lst[[nn2]]@result$dist srcSize <- 0.5 trtSize <- 1 minOver <- 1e7 arrows <- trackBTree(btp, rbd1, rbd2, min.srcSize=srcSize, min.trtSize=trtSize, min.overlap=minOver) z <- drawBTree(btp, rbd1) if(!is.null(arrows)) { z <- z + arrows + ggplot2::labs(title=sprintf("%s -> %s", nn1, nn2)) } print(z)