NanoporeRNASeq contains RNA-Seq data from the K562 and MCF7 cell lines that were generated by the SG-NEx project (https://github.com/GoekeLab/sg-nex-data). Each of these cell line has three replicates, with 1 direct RNA sequencing data and 2 cDNA sequencing data. The files contains reads aligned to the human genome (Grch38) chromosome 22 (1:25500000).
library("NanoporeRNASeq")
data("SGNexSamples")
SGNexSamples##> DataFrame with 6 rows and 6 columns
##> sample_id Platform cellLine protocol cancer_type
##> <character> <character> <character> <character> <character>
##> 1 K562_directcDNA_repl.. MinION K562 directcDNA Leukocyte
##> 2 K562_directcDNA_repl.. GridION K562 directcDNA Leukocyte
##> 3 K562_directRNA_repli.. GridION K562 directRNA Leukocyte
##> 4 MCF7_directcDNA_repl.. MinION MCF7 directcDNA Breast
##> 5 MCF7_directcDNA_repl.. GridION MCF7 directcDNA Breast
##> 6 MCF7_directRNA_repli.. GridION MCF7 directRNA Breast
##> fileNames
##> <character>
##> 1 NanoporeRNASeq/versi..
##> 2 NanoporeRNASeq/versi..
##> 3 NanoporeRNASeq/versi..
##> 4 NanoporeRNASeq/versi..
##> 5 NanoporeRNASeq/versi..
##> 6 NanoporeRNASeq/versi..
library(ExperimentHub)
query(ExperimentHub(), c("NanoporeRNA", "GRCh38", "Bam"))
NanoporeData <- Rsamtools::BamFileList(NanoporeData[["EH3808"]], NanoporeData[["EH3809"]],
bamFiles <-"EH3810"]], NanoporeData[["EH3811"]], NanoporeData[["EH3812"]],
NanoporeData[["EH3813"]]) NanoporeData[[
data("HsChr22BambuAnnotation")
HsChr22BambuAnnotation##> GRangesList object of length 1500:
##> $ENST00000043402
##> GRanges object with 2 ranges and 2 metadata columns:
##> seqnames ranges strand | exon_rank exon_endRank
##> <Rle> <IRanges> <Rle> | <integer> <integer>
##> [1] 22 20241415-20243110 - | 2 1
##> [2] 22 20268071-20268531 - | 1 2
##> -------
##> seqinfo: 1 sequence from an unspecified genome; no seqlengths
##>
##> $ENST00000086933
##> GRanges object with 3 ranges and 2 metadata columns:
##> seqnames ranges strand | exon_rank exon_endRank
##> <Rle> <IRanges> <Rle> | <integer> <integer>
##> [1] 22 19148576-19149095 - | 3 1
##> [2] 22 19149663-19149916 - | 2 2
##> [3] 22 19150025-19150283 - | 1 3
##> -------
##> seqinfo: 1 sequence from an unspecified genome; no seqlengths
##>
##> $ENST00000155674
##> GRanges object with 8 ranges and 2 metadata columns:
##> seqnames ranges strand | exon_rank exon_endRank
##> <Rle> <IRanges> <Rle> | <integer> <integer>
##> [1] 22 17137511-17138357 - | 8 1
##> [2] 22 17138550-17138738 - | 7 2
##> [3] 22 17141059-17141233 - | 6 3
##> [4] 22 17143098-17143131 - | 5 4
##> [5] 22 17145024-17145117 - | 4 5
##> [6] 22 17148448-17148560 - | 3 6
##> [7] 22 17149542-17149745 - | 2 7
##> [8] 22 17165209-17165287 - | 1 8
##> -------
##> seqinfo: 1 sequence from an unspecified genome; no seqlengths
##>
##> ...
##> <1497 more elements>
We can visualize the one sample for a single gene ENST00000215832 (MAPK1)
library(ggbio)
HsChr22BambuAnnotation$ENST00000215832
range <-# plot mismatch track
library(BSgenome.Hsapiens.NCBI.GRCh38)
# plot annotation track
autoplot(range, aes(col = strand), group.selfish = TRUE)
tx <-# plot coverage track
autoplot(bamFiles[[1]], aes(col = coverage), which = range)
coverage <-
# merge the tracks into one plot
tracks(annotation = tx, coverage = coverage, heights = c(1, 3)) + theme_minimal()
library(bambu)
query(ExperimentHub(), c("NanoporeRNA", "GRCh38", "FASTA"))
genomeSequenceData <- genomeSequenceData[["EH7260"]] genomeSequence <-
Applying bambu to bamFiles
bambu(reads = bamFiles, annotations = HsChr22BambuAnnotation, genome = genomeSequence) se <-
bambu returns a SummarizedExperiment object
se##> class: RangedSummarizedExperiment
##> dim: 1542 6
##> metadata(2): incompatibleCounts warnings
##> assays(4): counts CPM fullLengthCounts uniqueCounts
##> rownames(1542): BambuTx1 BambuTx2 ... ENST00000641933 ENST00000641967
##> rowData names(11): TXNAME GENEID ... txid eqClassById
##> colnames(6): ce9765df3dce7_3844 ce976cd9b5c4_3846 ...
##> ce9767f6adae5_3852 ce9767422b638_3854
##> colData names(1): name
We can visualize the annotated and novel isoforms identified in this gene example using plot functions from bambu
plotBambu(se, type = "annotation", gene_id = "ENSG00000099968")
##> [[1]]
##> TableGrob (3 x 1) "arrange": 3 grobs
##> z cells name grob
##> 1 1 (2-2,1-1) arrange gtable[layout]
##> 2 2 (3-3,1-1) arrange gtable[layout]
##> 3 3 (1-1,1-1) arrange text[GRID.text.262]
sessionInfo()
##> R Under development (unstable) (2022-10-25 r83175)
##> Platform: x86_64-pc-linux-gnu (64-bit)
##> Running under: Ubuntu 22.04.1 LTS
##>
##> Matrix products: default
##> BLAS: /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so
##> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##>
##> locale:
##> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
##> [3] LC_TIME=en_GB LC_COLLATE=C
##> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
##> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
##> [9] LC_ADDRESS=C LC_TELEPHONE=C
##> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##>
##> attached base packages:
##> [1] stats4 stats graphics grDevices utils datasets methods
##> [8] base
##>
##> other attached packages:
##> [1] bambu_3.1.0
##> [2] SummarizedExperiment_1.29.0
##> [3] Biobase_2.59.0
##> [4] MatrixGenerics_1.11.0
##> [5] matrixStats_0.62.0
##> [6] BSgenome.Hsapiens.NCBI.GRCh38_1.3.1000
##> [7] BSgenome_1.67.0
##> [8] rtracklayer_1.59.0
##> [9] ggbio_1.47.0
##> [10] ggplot2_3.3.6
##> [11] Rsamtools_2.15.0
##> [12] Biostrings_2.67.0
##> [13] XVector_0.39.0
##> [14] GenomicRanges_1.51.0
##> [15] GenomeInfoDb_1.35.0
##> [16] IRanges_2.33.0
##> [17] S4Vectors_0.37.0
##> [18] NanoporeRNASeq_1.9.0
##> [19] ExperimentHub_2.7.0
##> [20] AnnotationHub_3.7.0
##> [21] BiocFileCache_2.7.0
##> [22] dbplyr_2.2.1
##> [23] BiocGenerics_0.45.0
##>
##> loaded via a namespace (and not attached):
##> [1] RColorBrewer_1.1-3 rstudioapi_0.14
##> [3] jsonlite_1.8.3 magrittr_2.0.3
##> [5] GenomicFeatures_1.51.1 farver_2.1.1
##> [7] rmarkdown_2.17 BiocIO_1.9.0
##> [9] zlibbioc_1.45.0 vctrs_0.5.0
##> [11] memoise_2.0.1 RCurl_1.98-1.9
##> [13] base64enc_0.1-3 htmltools_0.5.3
##> [15] progress_1.2.2 curl_4.3.3
##> [17] xgboost_1.6.0.1 Formula_1.2-4
##> [19] sass_0.4.2 bslib_0.4.0
##> [21] htmlwidgets_1.5.4 plyr_1.8.7
##> [23] cachem_1.0.6 GenomicAlignments_1.35.0
##> [25] mime_0.12 lifecycle_1.0.3
##> [27] pkgconfig_2.0.3 Matrix_1.5-1
##> [29] R6_2.5.1 fastmap_1.1.0
##> [31] GenomeInfoDbData_1.2.9 shiny_1.7.3
##> [33] digest_0.6.30 colorspace_2.0-3
##> [35] GGally_2.1.2 reshape_0.8.9
##> [37] OrganismDbi_1.41.0 AnnotationDbi_1.61.0
##> [39] Hmisc_4.7-1 RSQLite_2.2.18
##> [41] labeling_0.4.2 filelock_1.0.2
##> [43] fansi_1.0.3 httr_1.4.4
##> [45] compiler_4.3.0 bit64_4.0.5
##> [47] withr_2.5.0 htmlTable_2.4.1
##> [49] backports_1.4.1 BiocParallel_1.33.0
##> [51] DBI_1.1.3 highr_0.9
##> [53] biomaRt_2.55.0 rappdirs_0.3.3
##> [55] DelayedArray_0.25.0 rjson_0.2.21
##> [57] tools_4.3.0 foreign_0.8-83
##> [59] interactiveDisplayBase_1.37.0 httpuv_1.6.6
##> [61] nnet_7.3-18 glue_1.6.2
##> [63] restfulr_0.0.15 promises_1.2.0.1
##> [65] grid_4.3.0 checkmate_2.1.0
##> [67] cluster_2.1.4 reshape2_1.4.4
##> [69] generics_0.1.3 gtable_0.3.1
##> [71] tidyr_1.2.1 ensembldb_2.23.0
##> [73] data.table_1.14.4 hms_1.1.2
##> [75] xml2_1.3.3 utf8_1.2.2
##> [77] BiocVersion_3.17.0 pillar_1.8.1
##> [79] stringr_1.4.1 later_1.3.0
##> [81] splines_4.3.0 dplyr_1.0.10
##> [83] lattice_0.20-45 survival_3.4-0
##> [85] bit_4.0.4 deldir_1.0-6
##> [87] biovizBase_1.47.0 RBGL_1.75.0
##> [89] tidyselect_1.2.0 knitr_1.40
##> [91] gridExtra_2.3 ProtGenerics_1.31.0
##> [93] xfun_0.34 stringi_1.7.8
##> [95] lazyeval_0.2.2 yaml_2.3.6
##> [97] evaluate_0.17 codetools_0.2-18
##> [99] interp_1.1-3 tibble_3.1.8
##> [101] graph_1.77.0 BiocManager_1.30.19
##> [103] cli_3.4.1 rpart_4.1.19
##> [105] xtable_1.8-4 munsell_0.5.0
##> [107] jquerylib_0.1.4 dichromat_2.0-0.1
##> [109] Rcpp_1.0.9 png_0.1-7
##> [111] XML_3.99-0.12 parallel_4.3.0
##> [113] ellipsis_0.3.2 assertthat_0.2.1
##> [115] blob_1.2.3 prettyunits_1.1.1
##> [117] latticeExtra_0.6-30 jpeg_0.1-9
##> [119] AnnotationFilter_1.23.0 bitops_1.0-7
##> [121] VariantAnnotation_1.45.0 scales_1.2.1
##> [123] purrr_0.3.5 crayon_1.5.2
##> [125] rlang_1.0.6 KEGGREST_1.39.0
##> [127] formatR_1.12