## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----loadDESeq2, echo=FALSE---------------------------------------------- # in order to print version number below library("DESeq2") ## ----options, results="hide", echo=FALSE--------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----loadExpPck---------------------------------------------------------- library( "parathyroidSE" ) ## ----findExtData--------------------------------------------------------- extDataDir <- system.file("extdata", package = "parathyroidSE", mustWork = TRUE) extDataDir ## ----LookIntoExtData----------------------------------------------------- list.files( extDataDir ) ## ----loadSampleTable,eval=FALSE------------------------------------------ ## sampleTable <- read.csv( "/path/to/your/sampleTable.csv", header=TRUE ) ## ----makeSampleTable,echo=FALSE------------------------------------------ sampleTable <- data.frame( sampleName = c( "Ctrl_24h_1", "Ctrl_48h_1", "DPN_24h_1" ), fileName = c( "SRR479052.bam", "SRR479053.bam", "SRR479054.bam" ), treatment = c( "Control", "Control", "DPN" ), time = c( "24h", "48h", "24h" ) ) ## ----showSampleTable----------------------------------------------------- sampleTable ## ----getBamList---------------------------------------------------------- bamFiles <- file.path( extDataDir, sampleTable$fileName ) bamFiles ## ----eval=FALSE---------------------------------------------------------- ## library( "GenomicFeatures" ) ## hse <- makeTranscriptDbFromGFF( "/path/to/your/genemodel_file.GTF", format="gtf" ) ## exonsByGene <- exonsBy( hse, by="gene" ) ## ----loadExonsByGene, echo=FALSE----------------------------------------- library( "GenomicFeatures" ) hse <- makeTranscriptDbFromGFF( "Homo_sapiens.GRCh37.75.subset.gtf.gz", format="gtf" ) exonsByGene <- exonsBy( hse, by="gene" ) ## ----showExons----------------------------------------------------------- exonsByGene ## ----countIt------------------------------------------------------------- library( "GenomicAlignments" ) se <- summarizeOverlaps( exonsByGene, BamFileList( bamFiles ), mode="Union", singleEnd=FALSE, ignore.strand=TRUE, fragments=TRUE ) ## ----examineSE----------------------------------------------------------- se head( assay(se) ) colSums( assay(se) ) colData(se) rowData(se) ## ----setColData---------------------------------------------------------- colData(se) <- DataFrame( sampleTable ) ## ----setColNames--------------------------------------------------------- colnames(se) <- sampleTable$sampleName head( assay(se) ) ## ----beginner_plotSE, dev="pdf", echo=FALSE------------------------------ par(mar=c(0,0,0,0)) plot(1,1,xlim=c(0,100),ylim=c(0,100),bty="n", type="n",xlab="",ylab="",xaxt="n",yaxt="n") polygon(c(45,80,80,45),c(10,10,70,70),col=rgb(1,0,0,.5),border=NA) polygon(c(45,80,80,45),c(68,68,70,70),col=rgb(1,0,0,.5),border=NA) text(62.5,40,"assay(s)") text(62.5,30,"e.g. 'counts'") polygon(c(20,40,40,20),c(10,10,70,70),col=rgb(0,0,1,.5),border=NA) polygon(c(20,40,40,20),c(68,68,70,70),col=rgb(0,0,1,.5),border=NA) text(30,40,"rowData") polygon(c(45,80,80,45),c(75,75,90,90),col=rgb(.5,0,.5,.5),border=NA) polygon(c(45,47,47,45),c(75,75,90,90),col=rgb(.5,0,.5,.5),border=NA) text(62.5,82.5,"colData") ## ----loadEcs------------------------------------------------------------- data( "parathyroidGenesSE" ) se <- parathyroidGenesSE colnames(se) <- se$run ## ----coldataSE----------------------------------------------------------- colData(se)[1:5,1:4] ## ----fromSE-------------------------------------------------------------- ddsFull <- DESeqDataSet( se, design = ~ patient + treatment ) ## ----columnData---------------------------------------------------------- as.data.frame( colData( ddsFull )[ ,c("sample","patient","treatment","time") ] ) ## ----collapse------------------------------------------------------------ ddsCollapsed <- collapseReplicates( ddsFull, groupby = ddsFull$sample, run = ddsFull$run ) head( as.data.frame( colData(ddsCollapsed)[ ,c("sample","runsCollapsed") ] ), 12 ) ## ----checkCollapsed------------------------------------------------------ original <- rowSums( counts(ddsFull)[ , ddsFull$sample == "SRS308873" ] ) all( original == counts(ddsCollapsed)[ ,"SRS308873" ] ) ## ----subsetCols---------------------------------------------------------- dds <- ddsCollapsed[ , ddsCollapsed$time == "48h" ] ## ----refactor------------------------------------------------------------ dds$time <- droplevels( dds$time ) ## ----relevel------------------------------------------------------------- dds$treatment <- relevel( dds$treatment, "Control" ) ## ----multifactorColData-------------------------------------------------- as.data.frame( colData(dds) ) ## ----subsetRows, echo=FALSE---------------------------------------------- idx <- which(rowSums(counts(dds)) > 0)[1:4000] dds <- dds[idx,] ## ----runDESeq, cache=TRUE------------------------------------------------ dds <- DESeq(dds) ## ----extractResults------------------------------------------------------ res <- results( dds ) res ## ----resCols------------------------------------------------------------- mcols(res, use.names=TRUE) ## ----resultsOther-------------------------------------------------------- res <- results( dds, contrast = c("treatment", "DPN", "Control") ) res ## ----geneAnnot, cache=TRUE----------------------------------------------- library( "biomaRt" ) ensembl = useMart( "ensembl", dataset = "hsapiens_gene_ensembl" ) genemap <- getBM( attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"), filters = "ensembl_gene_id", values = rownames(res), mart = ensembl ) head( genemap ) ## ----geneAnnotAssign----------------------------------------------------- res[ genemap$ensembl_gene_id, "entrezgene" ] <- genemap$entrezgene res[ genemap$ensembl_gene_id, "hgnc_symbol" ] <- genemap$hgnc_symbol ## ----showAnnot----------------------------------------------------------- head(res,4) ## ----rawpvalue----------------------------------------------------------- sum( res$pvalue < 0.01, na.rm=TRUE ) table( is.na(res$pvalue) ) ## ----adjpvalue----------------------------------------------------------- sum( res$padj < 0.1, na.rm=TRUE ) ## ----strongGenes--------------------------------------------------------- resSig <- res[ which(res$padj < 0.1 ), ] head( resSig[ order( resSig$log2FoldChange ), ] ) ## ----strongGenesUp------------------------------------------------------- tail( resSig[ order( resSig$log2FoldChange ), ] ) ## ----beginner_MA--------------------------------------------------------- plotMA( res, ylim = c(-1, 1) ) ## ----beginner_dispPlot--------------------------------------------------- plotDispEsts( dds, ylim = c(1e-6, 1e1) ) ## ----beginner_pvalHist, dev="pdf"---------------------------------------- hist( res$pvalue, breaks=20, col="grey" ) ## ----beginner_ratioSmallP, dev="pdf", fig.width=8, fig.height=4---------- # create bins using the quantile function qs <- c( 0, quantile( res$baseMean[res$baseMean > 0], 0:7/7 ) ) # "cut" the genes into the bins bins <- cut( res$baseMean, qs ) # rename the levels of the bins using the middle point levels(bins) <- paste0("~",round(.5*qs[-1] + .5*qs[-length(qs)])) # calculate the ratio of $p$ values less than .01 for each bin ratios <- tapply( res$pvalue, bins, function(p) mean( p < .01, na.rm=TRUE ) ) # plot these ratios barplot(ratios, xlab="mean normalized count", ylab="ratio of small $p$ values") ## ----beginner_filtByMean, dev="pdf"-------------------------------------- attr(res,"filterThreshold") plot(attr(res,"filterNumRej"),type="b", xlab="quantiles of 'baseMean'", ylab="number of rejections") ## ----geneAnnotRes-------------------------------------------------------- res[1:2,] ## ----writeCSV, eval=FALSE------------------------------------------------ ## write.csv( as.data.frame(res), file="results.csv" ) ## ----loadReactome-------------------------------------------------------- library( "reactome.db" ) ## ----subsetTores2-------------------------------------------------------- res2 <- res[ res$entrezgene %in% keys( reactome.db, "ENTREZID" ) & !is.na( res$padj ) , ] head(res2) ## ----queryReactome------------------------------------------------------- reactomeTable <- AnnotationDbi::select( reactome.db, keys=as.character(res2$entrezgene), keytype="ENTREZID", columns=c("ENTREZID","REACTOMEID") ) head(reactomeTable) ## ----CreateIncidenceMatrix----------------------------------------------- incm <- do.call( rbind, with(reactomeTable, tapply( ENTREZID, factor(REACTOMEID), function(x) res2$entrez %in% x ) )) colnames(incm) <- res2$entrez str(incm) ## ----PruneIncm----------------------------------------------------------- within <- function(x, lower, upper) (x>=lower & x<=upper) incm <- incm[ within(rowSums(incm), lower=20, upper=80), ] ## ----testFun------------------------------------------------------------- testCategory <- function( reactomeID ) { isMember <- incm[ reactomeID, ] data.frame( reactomeID = reactomeID, numGenes = sum( isMember ), avgLFC = mean( res2$log2FoldChange[isMember] ), sdLFC = sd( res2$log2FoldChange[isMember] ), zValue = mean( res2$log2FoldChange[isMember] ) / sd( res2$log2FoldChange[isMember] ), strength = sum( res2$log2FoldChange[isMember] ) / sqrt(sum(isMember)), pvalue = t.test( res2$log2FoldChange[ isMember ] )$p.value, reactomeName = reactomePATHID2NAME[[reactomeID]], stringsAsFactors = FALSE ) } ## ----testTestFun--------------------------------------------------------- testCategory("109581") ## ----runGSEA,cache=TRUE-------------------------------------------------- reactomeResult <- do.call( rbind, lapply( rownames(incm), testCategory ) ) ## ----padjGSEA------------------------------------------------------------ reactomeResult$padjust <- p.adjust( reactomeResult$pvalue, "BH" ) ## ----GSEAresult---------------------------------------------------------- reactomeResultSignif <- reactomeResult[ reactomeResult$padjust < 0.05, ] reactomeResultSignif[ order(reactomeResultSignif$strength), ] ## ----GSEAresult2--------------------------------------------------------- head( reactomeResult[ order(reactomeResult$zValue), ] ) head( reactomeResult[ order(reactomeResult$zValue, decreasing=TRUE), ] ) ## ----rld, cache=TRUE----------------------------------------------------- rld <- rlog( dds ) head( assay(rld) ) ## ----beginner_rldPlot,fig.width=10,fig.height=5-------------------------- par( mfrow = c( 1, 2 ) ) plot( log2( 1+counts(dds, normalized=TRUE)[, 1:2] ), col="#00000020", pch=20, cex=0.3 ) plot( assay(rld)[, 1:2], col="#00000020", pch=20, cex=0.3 ) ## ----euclDist------------------------------------------------------------ sampleDists <- dist( t( assay(rld) ) ) sampleDists ## ----beginner_sampleDistHeatmap, dev="pdf"------------------------------- sampleDistMatrix <- as.matrix( sampleDists ) rownames(sampleDistMatrix) <- paste( rld$treatment, rld$patient, sep="-" ) colnames(sampleDistMatrix) <- NULL library( "gplots" ) library( "RColorBrewer" ) colours = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) heatmap.2( sampleDistMatrix, trace="none", col=colours) ## ----beginner_samplePCA, dev="pdf", fig.width=4.3, fig.height=6---------- plotPCA( rld, intgroup = c( "patient", "treatment" ) ) ## ----topVarGenes--------------------------------------------------------- library( "genefilter" ) topVarGenes <- head( order( rowVars( assay(rld) ), decreasing=TRUE ), 35 ) ## ----beginner_geneHeatmap, dev="pdf", fig.width=9, fig.height=9---------- heatmap.2( assay(rld)[ topVarGenes, ], scale="row", trace="none", dendrogram="column", col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255), ColSideColors = c( Control="gray", DPN="darkgreen", OHT="lightblue" )[ colData(rld)$treatment ] ) ## ----sessInfo, results="asis", echo=FALSE-------------------------------- toLatex(sessionInfo()) ## ----resetOptions, results="hide", echo=FALSE---------------------------- options(prompt="> ", continue="+ ")