## ----setup, echo = FALSE, results = "hide"------------------------------------ knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, dev = "png", message = FALSE, error = FALSE, warning = TRUE, fig.height = 4, fig.width = 4) library(kableExtra) ## ----Basic1------------------------------------------------------------------- library(NanoTube) example_data <- system.file("extdata", "GSE117751_RAW", package = "NanoTube") sample_info <- system.file("extdata", "GSE117751_sample_data.csv", package = "NanoTube") ## ----Basic2------------------------------------------------------------------- dat <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis") ## ----BasicGroups-------------------------------------------------------------- table(dat$groups) ## ----Basic3------------------------------------------------------------------- limmaResults <- runLimmaAnalysis(dat, base.group = "None") ## ----Basic3b------------------------------------------------------------------ limmaStats <- makeDiffExprFile(limmaResults, filename = NULL, returns = "stats") limmaStats <- as.data.frame(limmaStats) # Rounding for clarity limmaTab <- head(limmaStats[order(limmaStats$`p-val (Autoimmune.retinopathy)`, decreasing = FALSE), 1:4]) limmaTab[,1] <- format(limmaTab[,1], digits = 2, nsmall = 1) limmaTab[,3] <- format(limmaTab[,3], digits = 1, scientific = TRUE) limmaTab[,4] <- format(limmaTab[,4], digits = 1, nsmall = 1) # Order by lowest to highest p-value for 'Autoimmune Retinopathy' vs. 'None' knitr::kable(head(limmaTab), row.names = TRUE, format = "html", align = "c") ## ----BasicVol----------------------------------------------------------------- deVolcano(limmaResults, plotContrast = "Autoimmune.retinopathy") ## ----Basic4------------------------------------------------------------------- data("ExamplePathways") fgseaResults <- limmaToFGSEA(limmaResults, gene.sets = ExamplePathways) # FGSEA was run separately for Autoimmune Retinopathy vs. None and # Retinitis Pigmentosa vs. None names(fgseaResults) ## ----BasicTab----------------------------------------------------------------- fgseaTab <- head(fgseaResults$Autoimmune.retinopathy[ order(fgseaResults$Autoimmune.retinopathy$pval, decreasing = FALSE),]) fgseaTab[,2:6] <- lapply(fgseaTab[,2:6], format, digits = 1, nsmall = 1) knitr::kable(fgseaTab, row.names = FALSE, format = "html", align = "c") ## ----proc1-------------------------------------------------------------------- # Without supplying idCol, a warning is returned. # The CSV file contains samples in the proper order, so this is technically ok. dat <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01, skip.housekeeping = FALSE, output.format = "ExpressionSet") ## ----proc1idCol--------------------------------------------------------------- # With idCol dat <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01, skip.housekeeping = FALSE, output.format = "ExpressionSet") ## ----proc2-------------------------------------------------------------------- csv_data <- system.file("extdata", "GSE117751_expression_matrix.csv", package = "NanoTube") dat2 <- processNanostringData(nsFile = csv_data, sampleTab = sample_info, idCol = "GEO_Accession", groupCol = "Sample_Diagnosis", normalization = "none") ## ----norm1-------------------------------------------------------------------- # Set housekeeping genes manually (optional) hk.genes <- c("TUBB", "TBP", "POLR2A", "GUSB", "SDHA") dat <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01, housekeeping = hk.genes, skip.housekeeping = FALSE) ## ----norm2-------------------------------------------------------------------- # Automatically detect housekeeping genes dat <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01) ## ----normRUVIII--------------------------------------------------------------- # This sample data table contains fake replicate identifiers (for use in example). sample_info_reps <- system.file("extdata", "GSE117751_sample_data_replicates.csv", package = "NanoTube") datRUVIII <- processNanostringData(nsFiles = example_data, sampleTab = sample_info_reps, idCol = "RCC_Name", replicateCol = "Replicate_ID", groupCol = "Sample_Diagnosis", normalization = "RUVIII", bgType = "t.test", bgPVal = 0.01, n.unwanted = 1) ## ----normRUVg----------------------------------------------------------------- hk.genes <- c("TUBB", "TBP", "POLR2A", "GUSB", "SDHA") datRUVg <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "RUVg", bgType = "t.test", bgPVal = 0.01, n.unwanted = 1, RUVg.drop = 0, housekeeping = hk.genes) ## ----normRUVg2---------------------------------------------------------------- # Housekeeping genes are automatically identified datRUVg <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "RUVg", bgType = "t.test", bgPVal = 0.01, n.unwanted = 1, RUVg.drop = 0) ## ----rle, fig.width = 6------------------------------------------------------- # Here's how to do it with the NanoString ExpressionSet. If your data set is in # list form, you would use dat$exprs instead of exprs(dat). ruv::ruv_rle(t(log2(exprs(dat))), ylim = c(-2, 2)) ## ----pca, fig.width = 6------------------------------------------------------- dat <- processNanostringData(example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01) library(plotly) nanostringPCA(dat, interactive.plot = TRUE)$plt ## ----pca2, fig.width = 6------------------------------------------------------ nanostringPCA(dat, pc1=3, pc2=4, interactive.plot = FALSE)$plt ## ----qc1---------------------------------------------------------------------- dat <- processNanostringData(example_data, idCol = "RCC_Name", output.format = "list", includeQC = TRUE) ## ----qc2---------------------------------------------------------------------- head(dat$qc)[,1:5] ## ----qcPos1------------------------------------------------------------------- posQC <- positiveQC(dat) knitr::kable(head(posQC$tab), row.names = FALSE, format = "html", align = "c", digits = 2) ## ----qcPos2, fig.width = 7---------------------------------------------------- posQC2 <- positiveQC(dat, samples = 1:6) posQC2$plt ## ----qcNeg1------------------------------------------------------------------- negQC <- negativeQC(dat, interactive.plot = FALSE) knitr::kable(head(negQC$tab), row.names = TRUE, format = "html", align = "c") ## ----qcNeg2, fig.height = 7, fig.width = 7------------------------------------ negQC$plt ## ----qcHK1-------------------------------------------------------------------- signif(head(dat$hk.scalefactors), digits = 2) ## ----gps---------------------------------------------------------------------- dat <- processNanostringData(example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01) table(dat$groups) ## ----deLimma------------------------------------------------------------------ limmaResults <- runLimmaAnalysis(dat, base.group = "None") head(signif(limmaResults$coefficients, digits = 2)) ## ----deLimma2, fig.height = 3.5----------------------------------------------- # Generate fake batch labels batch <- rep(c(0, 1), times = ncol(dat) / 2) # Reorder groups ("None" first) group <- factor(dat$groups, levels = c("None", "Autoimmune retinopathy", "Retinitis pigmentosa")) # Design matrix including sample group and batch design <- model.matrix(~group + batch) # Analyze data limmaResults2 <- runLimmaAnalysis(dat, design = design) # We can see there is no batch effect in this data, due to the somewhat uniform # distribution of p-values. This is good, since the batches are fake. hist(limmaResults2$p.value[,"batch"], main = "p-values of Batch terms") ## ----desInput----------------------------------------------------------------- sample_info_design <- system.file("extdata", "GSE117751_design_matrix.csv", package = "NanoTube") datDes <- processNanostringData(example_data, sampleTab = sample_info_design, idCol = "RCC_Name", normalization = "nSolver", bgType = "t.test", bgPVal = 0.01) head(pData(datDes)) ## ----limma3------------------------------------------------------------------- # Analyze data limmaResults3 <- runLimmaAnalysis(datDes, design = pData(datDes)[,2:(ncol(pData(datDes))-1)]) hist(limmaResults3$p.value[,"Age"], main = "p-values for Age term") ## ----volcano------------------------------------------------------------------ deVolcano(limmaResults, y.var = "p.value") ## ----volcano2----------------------------------------------------------------- deVolcano(limmaResults, plotContrast = "Autoimmune.retinopathy", y.var = "p.value") + geom_hline(yintercept = 2, linetype = "dashed", colour = "darkred") + geom_vline(xintercept = 0.5, linetype = "dashed", colour = "darkred") + geom_vline(xintercept = -0.5, linetype = "dashed", colour = "darkred") ## ----deExport----------------------------------------------------------------- limmaStats <- makeDiffExprFile(limmaResults, filename = NULL, returns = "stats") limmaStats <- as.data.frame(limmaStats) # Order by lowest to highest p-value for 'Autoimmune Retinopathy' vs. 'None' knitr::kable(head(limmaStats[order(limmaStats$`p-val (Autoimmune.retinopathy)`, decreasing = FALSE),]), row.names = TRUE, format = "html", align = "c") ## ----nsDiffConvert------------------------------------------------------------ # Remember to set normalization = "none" datNoNorm <- processNanostringData(nsFiles = example_data, sampleTab = sample_info, idCol = "RCC_Name", groupCol = "Sample_Diagnosis", normalization = "none") nsDiffSet <- makeNanoStringSetFromEset(datNoNorm) colnames(pData(nsDiffSet)) ## ----nsDiffRun, eval = FALSE-------------------------------------------------- # # ### This block is not run! ### # # # This step is fast # nsDiffSet <- NanoStringDiff::estNormalizationFactors(nsDiffSet) # # # This step likely to take multiple hours on standard desktop computers. # result <- NanoStringDiff::glm.LRT(nsDiffSet, # design.full = as.matrix(pData(nsDiffSet)), # contrast = c(1, -1, 0)) # #Contrast: Autoimmune retinopathy vs. None # ## ----gsea1-------------------------------------------------------------------- data("ExamplePathways") fgseaResults <- limmaToFGSEA(limmaResults, gene.sets = ExamplePathways, min.set = 5, rank.by = "t", skip.first = TRUE) names(fgseaResults) ## ----gsea2-------------------------------------------------------------------- fgseaTab <- head(fgseaResults$Autoimmune.retinopathy[ order(fgseaResults$Autoimmune.retinopathy$pval, decreasing = FALSE),]) fgseaTab[,2:6] <- lapply(fgseaTab[,2:6], format, digits = 1, nsmall = 1) knitr::kable(fgseaTab, row.names = FALSE, format = "html", align = "c") ## ----gsea3-------------------------------------------------------------------- # Leading edge for pathways with adjusted p < 0.2 leading.edge <- fgseaToLEdge(fgsea.res = fgseaResults, cutoff.type = "padj", cutoff = 0.2) ## ----gsea4-------------------------------------------------------------------- # Leading edge for pathways with abs(NES) > 1 leading.edge.nes <- fgseaToLEdge(fgsea.res = fgseaResults, cutoff.type = "NES", cutoff = 1, nes.abs.cutoff = TRUE) # Leading edge for pathways with NES > 1.5 leading.edge.nes <- fgseaToLEdge(fgsea.res = fgseaResults, cutoff.type = "NES", cutoff = 1.5, nes.abs.cutoff = FALSE) # Leading edge for pathways with NES < -0.5 leading.edge.nes <- fgseaToLEdge(fgsea.res = fgseaResults, cutoff.type = "NES", cutoff = -0.5, nes.abs.cutoff = FALSE) ## ----gsea5, fig.width = 8, fig.height = 3------------------------------------- pheatmap::pheatmap(t(leading.edge$Autoimmune.retinopathy), legend = FALSE, color = c("white", "black")) ## ----gsea6-------------------------------------------------------------------- # Group pathways with a binary distance below 0.5 fgsea.grouped <- groupFGSEA(fgseaResults$Autoimmune.retinopathy, leading.edge$Autoimmune.retinopathy, join.threshold = 0.5, dist.method = "binary") fgseaTab <- head(fgsea.grouped) fgseaTab[,2:6] <- lapply(fgseaTab[,2:6], format, digits = 1, nsmall = 1) knitr::kable(fgseaTab, row.names = FALSE, format = "html", align = "c") ## ----exportGSEA--------------------------------------------------------------- fgseaPostprocessingXLSX(genesetResults = fgseaResults, leadingEdge = leading.edge, limmaResults = limmaResults, join.threshold = 0.5, filename = "analysis.xlsx") ## ----exportGSEA2-------------------------------------------------------------- results.AR <- groupedGSEAtoStackedReport( fgsea.grouped, leadingEdge = leading.edge$Autoimmune.retinopathy, de.fit = limmaResults) # View Cluster 1 gene statistics resultsTab <- results.AR[results.AR$Cluster == 1, 1:6] resultsTab[,2:6] <- lapply(resultsTab[,2:6], format, digits = 1, nsmall = 1) knitr::kable(resultsTab, row.names = FALSE, format = "html", align = "c")