## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------ options(max.print=1000) BiocStyle::latex() library(knitr) opts_chunk$set(cache=TRUE, tidy=FALSE) ## ----packages, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE------- suppressPackageStartupMessages({ library(ShortRead) library(VariantAnnotation) library(ggplot2) library(RNAseqData.HNRNPC.bam.chr14) library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(BSgenome.Hsapiens.UCSC.hg19) library(AnnotationHub) library(rtracklayer) }) ## ----setup,echo=FALSE---------------------------------------------------- library(GenomicRanges) plotRanges <- function(x, xlim = x, main = deparse(substitute(x)), col = "black", sep = 0.5, ...) { height <- 1 if (is(xlim, "Ranges")) xlim <- c(min(start(xlim)), max(end(xlim))) bins <- disjointBins(IRanges(start(x), end(x) + 1)) plot.new() par(mai=c(0.6, 0.2, 0.6, 0.2)) plot.window(xlim, c(0, max(bins)*(height + sep))) ybottom <- bins * (sep + height) - height rect(start(x)-0.5, ybottom, end(x)+0.5, ybottom + height, col = col, ...) title(main, cex.main=2.8, font.main=1) axis(1) } ## ----GRanges-genes------------------------------------------------------- genes <- GRanges(seqnames=c("chr3R", "chrX"), ranges=IRanges( start=c(19967117, 18962306), end = c(19973212, 18962925)), strand=c("+", "-"), seqlengths=c(chr3R=27905053, chrX=22422827)) ## ----GRanges-display----------------------------------------------------- genes ## ----GRanges-vignettes, eval=FALSE--------------------------------------- ## vignette(package="GenomicRanges") ## ----GRanges-basics------------------------------------------------------ genes[2] strand(genes) width(genes) length(genes) names(genes) <- c("FBgn0039155", "FBgn0085359") genes # now with names ## ----ranges-ir----------------------------------------------------------- ir <- IRanges(start=c(7, 9, 12, 14, 22:24), end=c(15, 11, 12, 18, 26, 27, 28)) ## ----ranges-ir-plot, results='hide', echo=FALSE-------------------------- png("ranges-ir-plot.png", width=800, height=160) plotRanges(ir, xlim=c(5, 35), main="Original") dev.off() png("ranges-shift-ir-plot.png", width=800, height=160) plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift") dev.off() png("ranges-reduce-ir-plot.png", width=800, height=160) plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce") dev.off() ## ----ranges-shift-ir----------------------------------------------------- shift(ir, 5) ## ----ranges-shift-ir-plot, results='hide', echo=FALSE-------------------- png("ranges-shift-ir-plot.png", width=800, height=160) plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift") dev.off() ## ----ranges-reduce-ir---------------------------------------------------- reduce(ir) ## ----ranges-reduce-ir-plot, results='hide', echo=FALSE------------------- png("ranges-reduce-ir-plot.png", width=800, height=160) plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce") dev.off() ## ----coverage------------------------------------------------------------ cvg <- coverage(ir) cvg ## plot(as.integer(cvg), type="s", xlab="Coordinate", ylab="Depth of coverage") ## ----GRanges-mcols------------------------------------------------------- mcols(genes) <- DataFrame(EntrezId=c("42865", "2768869"), Symbol=c("kal-1", "CG34330")) ## ----GRanges-metadata---------------------------------------------------- metadata(genes) <- list(CreatedBy="A. User", Date=date()) ## ----GRangesList-eg-setup, echo=FALSE------------------------------------ library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene # alias ## have ENSEMBL ids ensids <- c("ENSG00000130720", "ENSG00000103257", "ENSG00000156414", "ENSG00000144644", "ENSG00000159307", "ENSG00000144485") ## need ENTREZID's egids <- select(org.Hs.eg.db, ensids, "ENTREZID", "ENSEMBL")$ENTREZID exByGn <- exonsBy(txdb, "gene")[egids] ## ----GRangesList-eg, echo=FALSE------------------------------------------ exByGn ## ----SYMBOL-to-ENTREZID-------------------------------------------------- library(org.Hs.eg.db) egid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")$ENTREZID library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene egToTx <- select(txdb, egid, "TXNAME", "GENEID") cdsByTx <- cdsBy(txdb, "tx", use.names=TRUE)[egToTx$TXNAME] ## ----getsequence--------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) txx <- extractTranscriptsFromGenome(Hsapiens, cdsByTx) ## ----translate----------------------------------------------------------- all(width(txx) %% 3 == 0) # sanity check translate(txx) # amino acid sequence ## ----summarizeOverlaps-BAM----------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES ## ----summarizeOverlaps-gene-model---------------------------------------- library(BiocParallel) library(TxDb.Hsapiens.UCSC.hg19.knownGene) ex <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene") ## ----summarizeOverlaps-count--------------------------------------------- counts <- summarizeOverlaps(ex, fls) colSums(assay(counts)) sum(rowSums(assay(counts)) != 0)