## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE,tidy = TRUE, warning=FALSE, message=FALSE, comment = "##>" ) ## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("bambu") # BiocManager::install("NanoporeRNASeq") ## ----results = "hide"--------------------------------------------------------- library(bambu) test.bam <- system.file("extdata", "SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.bam", package = "bambu") fa.file <- system.file("extdata", "Homo_sapiens.GRCh38.dna_sm.primary_assembly_chr9_1_1000000.fa", package = "bambu") gtf.file <- system.file("extdata", "Homo_sapiens.GRCh38.91_chr9_1_1000000.gtf", package = "bambu") bambuAnnotations <- prepareAnnotations(gtf.file) se <- bambu(reads = test.bam, annotations = bambuAnnotations, genome = fa.file) ## ----------------------------------------------------------------------------- library(bambu) library(NanoporeRNASeq) data("SGNexSamples") SGNexSamples library(ExperimentHub) NanoporeData <- query(ExperimentHub(), c("NanoporeRNA", "GRCh38","BAM")) bamFile <- Rsamtools::BamFileList(NanoporeData[["EH3808"]]) ## ----------------------------------------------------------------------------- # get path to fasta file genomeSequenceData <- query(ExperimentHub(), c("NanoporeRNA", "GRCh38","FASTA")) genomeSequence <- genomeSequenceData[["EH7260"]] ## ----------------------------------------------------------------------------- library(BSgenome.Hsapiens.NCBI.GRCh38) genomeSequenceBsgenome <- BSgenome.Hsapiens.NCBI.GRCh38 ## ----------------------------------------------------------------------------- gtf.file <- system.file("extdata", "Homo_sapiens.GRCh38.91_chr9_1_1000000.gtf", package = "bambu") annotation <- prepareAnnotations(gtf.file) ## ----------------------------------------------------------------------------- txdb <- system.file("extdata", "Homo_sapiens.GRCh38.91_chr9_1_1000000.gtf", package = "bambu") annotation <- prepareAnnotations(txdb) ## ----------------------------------------------------------------------------- data("HsChr22BambuAnnotation") HsChr22BambuAnnotation ## ----results = "hide"--------------------------------------------------------- se <- bambu(reads = bamFile, annotations = HsChr22BambuAnnotation, genome = genomeSequence, ncore = 1) ## ----------------------------------------------------------------------------- se ## ----results = "hide"--------------------------------------------------------- seNoquant <- bambu(reads = bamFile, annotations = HsChr22BambuAnnotation, genome = genomeSequence, quant = FALSE) ## ----results = "hide"--------------------------------------------------------- seUnextended <- bambu(reads = bamFile, annotations = HsChr22BambuAnnotation, genome = genomeSequence, discovery = FALSE) ## ----------------------------------------------------------------------------- seUnextended ## ----results = "hide"--------------------------------------------------------- bamFiles <- Rsamtools::BamFileList(NanoporeData[["EH3808"]], NanoporeData[["EH3809"]],NanoporeData[["EH3810"]], NanoporeData[["EH3811"]], NanoporeData[["EH3812"]], NanoporeData[["EH3813"]]) se.multiSample <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence) ## ----results = "hide"--------------------------------------------------------- se.NDR_0.3 <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, NDR = 0.3) ## ----results = 'hide'--------------------------------------------------------- newAnnotations <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, NDR = 1, quant = FALSE) annotations.filtered <- newAnnotations[(!is.na(mcols(newAnnotations)$NDR) & mcols(newAnnotations)$NDR<0.1) | is.na(mcols(newAnnotations)$NDR)] se.NDR_1 <- bambu(reads = bamFiles, annotations = annotations.filtered, genome = genomeSequence, NDR = 1, discovery = FALSE) ## ----fig.width = 8, fig.height = 6-------------------------------------------- library(ggplot2) plotBambu(se.multiSample, type = "heatmap") ## ----fig.width = 8, fig.height = 6-------------------------------------------- plotBambu(se.multiSample, type = "pca") ## ----fig.width = 8, fig.height = 10------------------------------------------- plotBambu(se.multiSample, type = "annotation", gene_id = "ENSG00000099968") ## ----------------------------------------------------------------------------- colData(se.multiSample)$condition <- as.factor(SGNexSamples$cellLine) ## ----------------------------------------------------------------------------- seGene.multiSample <- transcriptToGeneExpression(se.multiSample) seGene.multiSample ## ----fig.width = 8, fig.height = 6-------------------------------------------- colData(seGene.multiSample)$groupVar <- SGNexSamples$cellLine plotBambu(seGene.multiSample, type = "heatmap") ## ----------------------------------------------------------------------------- save.dir <- tempdir() writeBambuOutput(se.multiSample, path = save.dir, prefix = "NanoporeRNASeq_") ## ----------------------------------------------------------------------------- save.file <- tempfile(fileext = ".gtf") writeToGTF(rowRanges(se.multiSample), file = save.file) ## ----results = 'hide'--------------------------------------------------------- se <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, opt.discovery = list(fitReadClassModel = FALSE)) ## ----results = 'hide'--------------------------------------------------------- novelAnnotations <- bambu(reads = bamFiles, annotations = NULL, genome = genomeSequence, NDR = 1, quant = FALSE) ## ----eval = FALSE------------------------------------------------------------- # #se <- bambu(rcFile = rcFiles, annotations = annotations, genome = fa.file) ## ----results = 'hide'--------------------------------------------------------- se <- bambu(reads = bamFiles, rcOutDir = save.dir, annotations = HsChr22BambuAnnotation, genome = genomeSequence) ## ----------------------------------------------------------------------------- library(BiocFileCache) bfc <- BiocFileCache(save.dir, ask = FALSE) info <- bfcinfo(bfc) ## ----------------------------------------------------------------------------- info # running bambu using the first file se <- bambu(reads = info$rpath[1:3], annotations = HsChr22BambuAnnotation, genome = genomeSequence) ## ----------------------------------------------------------------------------- se <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, discovery = FALSE, quant = FALSE) ## ----------------------------------------------------------------------------- rowData(se[[1]]) ## ----------------------------------------------------------------------------- se <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, trackReads = TRUE) metadata(se)$readToTranscriptMaps[[1]] ## ----eval = FALSE------------------------------------------------------------- # # first train the model using a related annotated dataset from .bam # # se = bambu(reads = sample1.bam, annotations = annotations, genome = fa.file, discovery = FALSE, quant = FALSE, opt.discovery = list(returnModel = TRUE)) # note that discovery and quant need to be set to FALSE, alternatively you can have them set to TRUE and retrieve the model from the rcFile as long as returnModel = TRUE. # # newDefaultModel = metadata(se[[1]])$model # [[1]] will select the model trained on the first sample # # #alternatively train the model using an rcFile # rcFile <- readRDS(pathToRcFile) # newDefaultModel = trainBambu(rcFile) # # # use the trained model on another sample # # sample2.bam and fa.file2 represent the aligned reads and genome for the poorly annotated sample # se <- bambu(reads = sample2.bam, annotations = NULL, genome = fa.file2, opt.discovery = list(defaultModels = newDefaultModel, fitReadClassModel = FALSE)) # # #trainBambu Arguments # # rcFile <- NULL, min.readCount = 2, nrounds = 50, NDR.threshold = 0.1, verbose = TRUE ## ----------------------------------------------------------------------------- se <- bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence, opt.discovery = list(min.txScore.singleExon = 0)) ## ----------------------------------------------------------------------------- library(DESeq2) dds <- DESeqDataSetFromMatrix(round(assays(seGene.multiSample)$counts), colData = colData(se.multiSample), design = ~ condition) dds.deseq <- DESeq(dds) deGeneRes <- DESeq2::results(dds.deseq, independentFiltering = FALSE) head(deGeneRes[order(deGeneRes$padj),]) ## ----------------------------------------------------------------------------- summary(deGeneRes) ## ----fig.width = 8, fig.height = 6-------------------------------------------- library(apeglm) resLFC <- lfcShrink(dds.deseq, coef = "condition_MCF7_vs_K562", type = "apeglm") plotMA(resLFC, ylim = c(-3,3)) ## ----------------------------------------------------------------------------- library(DEXSeq) dxd <- DEXSeqDataSet(countData = round(assays(se.multiSample)$counts), sampleData = as.data.frame(colData(se.multiSample)), design = ~sample + exon + condition:exon, featureID = rowData(se.multiSample)$TXNAME, groupID = rowData(se.multiSample)$GENEID) dxr <- DEXSeq(dxd) head(dxr) ## ----fig.width = 8, fig.height = 6-------------------------------------------- plotMA(dxr, cex = 0.8 ) ## ----------------------------------------------------------------------------- sessionInfo()