## ----style, echo=FALSE, results='asis'----------------------------------- BiocStyle::markdown() options(max.print=1000) suppressPackageStartupMessages({ library(genefilter) library(airway) library(DESeq2) library(GenomicAlignments) library(GenomicFeatures) }) ## ----sleep-t.test-------------------------------------------------------- head(sleep) plot(extra ~ group, data = sleep) ## Traditional interface with(sleep, t.test(extra[group == 1], extra[group == 2])) ## Formula interface t.test(extra ~ group, sleep) ## equal variance between groups t.test(extra ~ group, sleep, var.equal=TRUE) ## ----sleep-lm------------------------------------------------------------ ## linear model; compare to t.test(var.equal=TRUE) fit <- lm(extra ~ group, sleep) anova(fit) ## ----sleep-model.matrix-------------------------------------------------- ## underlying model, used in `lm.fit()` model.matrix(extra ~ group, sleep) # last column indicates group effect model.matrix(extra ~ 0 + group, sleep) # contrast between columns ## ----sleep-diff---------------------------------------------------------- fit0 <- lm(extra ~ ID, sleep) fit1 <- lm(extra ~ ID + group, sleep) anova(fit0, fit1) t.test(extra ~ group, sleep, var.equal=TRUE, paired=TRUE) ## ----airway-bam-path----------------------------------------------------- library(airway) path <- system.file(package="airway", "extdata") dir(path) ## ----airway-csv---------------------------------------------------------- csvfile <- dir(path, "sample_table.csv", full=TRUE) sampleTable <- read.csv(csvfile, row.names=1) head(sampleTable) ## ----airway-bam---------------------------------------------------------- library(Rsamtools) filenames <- dir(path, ".bam$", full=TRUE) bamfiles <- BamFileList(filenames, yieldSize=1000000) names(bamfiles) <- sub("_subset.bam", "", basename(filenames)) ## ----airway-gtf-to-txdb-------------------------------------------------- library(GenomicFeatures) gtffile <- file.path(path, "Homo_sapiens.GRCh37.75_subset.gtf") txdb <- makeTxDbFromGFF(gtffile, format="gtf", circ_seqs=character()) genes <- exonsBy(txdb, by="gene") ## ------------------------------------------------------------------------ library(GenomicAlignments) se <- summarizeOverlaps(features=genes, reads=bamfiles, mode="Union", singleEnd=FALSE, ignore.strand=TRUE, fragments=TRUE) colData(se) <- as(sampleTable, "DataFrame") se colData(se) rowData(se) head(assay(se)) ## ----airway-data--------------------------------------------------------- data(airway) se <- airway ## ----airway-cell-dex----------------------------------------------------- colData(se) ## ----airway-DESeq2-design------------------------------------------------ library(DESeq2) dds <- DESeqDataSet(se, design = ~ cell + dex) dds <- DESeq(dds) res <- results(dds) ## ----plotcounts, fig.width=5, fig.height=5------------------------------- topGene <- rownames(res)[which.min(res$padj)] res[topGene,] plotCounts(dds, gene=topGene, intgroup=c("dex")) ## ----plotma-------------------------------------------------------------- plotMA(res, ylim=c(-5,5)) ## ----airway-DESeq2-hist-------------------------------------------------- hist(res$pvalue, breaks=50) ## ----airway-DESeq2-volcano----------------------------------------------- plot(-log10(padj) ~ log2FoldChange, as.data.frame(res), pch=20)