iSEEpathways 1.2.0
In this vignette, we use the example of a differential expression and pathway analysis workflow on a real data set to demonstrate how a selection made in a panel of pathway analysis results may be transmitted to other row-oriented panels in the iSEE application.
We use the ?airway
data set.
We briefly adjust the reference level of the treatment factor to the untreated condition.
library("airway")
data("airway")
airway$dex <- relevel(airway$dex, "untrt")
We also map the Ensembl gene identifiers to more recognisable gene symbols, setting row names to a unique identifier composed of either gene symbol, gene identifier, of a concatenate of both.
Although not essential, this implicitly defines the primary piece of information displayed for genes in the live app.
No information is lost in the process, as the original Ensembl identifier and the corresponding gene symbol are both stored in the rowData()
of the object.
library("org.Hs.eg.db")
library("scater")
rowData(airway)[["ENSEMBL"]] <- rownames(airway)
rowData(airway)[["SYMBOL"]] <- mapIds(org.Hs.eg.db, rownames(airway), "SYMBOL", "ENSEMBL")
rowData(airway)[["uniquifyFeatureNames"]] <- uniquifyFeatureNames(
ID = rowData(airway)[["ENSEMBL"]],
names = rowData(airway)[["SYMBOL"]]
)
rownames(airway) <- rowData(airway)[["uniquifyFeatureNames"]]
We also compute log-transformed counts, for a better visualisation of differential expression in the live app.
library("scuttle")
airway <- logNormCounts(airway)
We run a standard Limma-Voom analysis using limma::voom()
, limma::lmFit()
, limma::makeContrasts()
, and limma::eBayes()
.
library("edgeR")
counts <- assay(airway, "counts")
design <- model.matrix(~ 0 + dex + cell, data = colData(airway))
keep <- filterByExpr(counts, design)
v <- voom(counts[keep,], design, plot=FALSE)
fit <- lmFit(v, design)
contr <- makeContrasts("dextrt - dexuntrt", levels = colnames(coef(fit)))
tmp <- contrasts.fit(fit, contr)
tmp <- eBayes(tmp)
res_limma <- topTable(tmp, sort.by = "P", n = Inf)
head(res_limma)
#> logFC AveExpr t P.Value adj.P.Val B
#> CACNB2 3.205598 3.682244 36.49009 2.370569e-11 3.996779e-07 16.03257
#> DUSP1 2.864775 6.644455 28.95296 2.000429e-10 9.657686e-07 14.66891
#> MAOA 3.256099 5.950559 28.29381 2.472372e-10 9.657686e-07 14.44796
#> SPARCL1 4.489075 4.166904 27.99497 2.725926e-10 9.657686e-07 14.01663
#> PRSS35 -2.828186 3.224885 -27.47899 3.234271e-10 9.657686e-07 13.80831
#> STEAP2 1.894563 6.790009 26.91396 3.914512e-10 9.657686e-07 14.03974
Then, we embed this set of differential expression results in the ?airway
object using the iSEEde::embedContrastResults()
method.
library("iSEEde")
airway <- iSEEde::embedContrastResults(res_limma, airway, name = "Limma-Voom", class = "limma")
rowData(airway)
#> DataFrame with 63677 rows and 14 columns
#> gene_id gene_name entrezid gene_biotype gene_seq_start gene_seq_end
#> <character> <character> <integer> <character> <integer> <integer>
#> TSPAN6 ENSG00000000003 TSPAN6 NA protein_coding 99883667 99894988
#> TNMD ENSG00000000005 TNMD NA protein_coding 99839799 99854882
#> DPM1 ENSG00000000419 DPM1 NA protein_coding 49551404 49575092
#> SCYL3 ENSG00000000457 SCYL3 NA protein_coding 169818772 169863408
#> FIRRM ENSG00000000460 C1orf112 NA protein_coding 169631245 169823221
#> ... ... ... ... ... ... ...
#> ENSG00000273489 ENSG00000273489 RP11-180C16.1 NA antisense 131178723 131182453
#> ENSG00000273490 ENSG00000273490 TSEN34 NA protein_coding 54693789 54697585
#> ENSG00000273491 ENSG00000273491 RP11-138A9.2 NA lincRNA 130600118 130603315
#> APP-DT ENSG00000273492 AP000230.1 NA lincRNA 27543189 27589700
#> ENSG00000273493 ENSG00000273493 RP11-80H18.4 NA lincRNA 58315692 58315845
#> seq_name seq_strand seq_coord_system symbol ENSEMBL SYMBOL
#> <character> <integer> <integer> <character> <character> <character>
#> TSPAN6 X -1 NA TSPAN6 ENSG00000000003 TSPAN6
#> TNMD X 1 NA TNMD ENSG00000000005 TNMD
#> DPM1 20 -1 NA DPM1 ENSG00000000419 DPM1
#> SCYL3 1 -1 NA SCYL3 ENSG00000000457 SCYL3
#> FIRRM 1 1 NA C1orf112 ENSG00000000460 FIRRM
#> ... ... ... ... ... ... ...
#> ENSG00000273489 7 -1 NA RP11-180C16.1 ENSG00000273489 NA
#> ENSG00000273490 HSCHR19LRC_LRC_J_CTG1 1 NA TSEN34 ENSG00000273490 NA
#> ENSG00000273491 HG1308_PATCH 1 NA RP11-138A9.2 ENSG00000273491 NA
#> APP-DT 21 1 NA AP000230.1 ENSG00000273492 APP-DT
#> ENSG00000273493 3 1 NA RP11-80H18.4 ENSG00000273493 NA
#> uniquifyFeatureNames iSEEde
#> <character> <DataFrame>
#> TSPAN6 TSPAN6 <iSEELimmaResults>
#> TNMD TNMD <iSEELimmaResults>
#> DPM1 DPM1 <iSEELimmaResults>
#> SCYL3 SCYL3 <iSEELimmaResults>
#> FIRRM FIRRM <iSEELimmaResults>
#> ... ... ...
#> ENSG00000273489 ENSG00000273489 <iSEELimmaResults>
#> ENSG00000273490 ENSG00000273490 <iSEELimmaResults>
#> ENSG00000273491 ENSG00000273491 <iSEELimmaResults>
#> APP-DT APP-DT <iSEELimmaResults>
#> ENSG00000273493 ENSG00000273493 <iSEELimmaResults>
We prepare Gene Ontology gene sets of biological pathways using org.Hs.eg.db.
Due to the use of uniquifyFeatureNames()
above, we must first map pathway identifiers to the unique Ensembl gene identifier, to accurately perform pathway analysis using the feature identifiers matching those of the embedded differential expression results.
library("org.Hs.eg.db")
pathways <- select(org.Hs.eg.db, keys(org.Hs.eg.db, "ENSEMBL"), c("GOALL"), keytype = "ENSEMBL")
#> 'select()' returned 1:many mapping between keys and columns
pathways <- subset(pathways, ONTOLOGYALL == "BP")
pathways <- unique(pathways[, c("ENSEMBL", "GOALL")])
pathways <- merge(pathways, rowData(airway)[, c("ENSEMBL", "uniquifyFeatureNames")])
pathways <- split(pathways$uniquifyFeatureNames, pathways$GOALL)
Separately, we define and register a function that fetches the gene identifiers associated with a given pathway identifier. This function is required to transmit selections from pathway-level panels to feature-level panels.
Due to the use of uniquifyFeatureNames()
above, the function must first map to the unique Ensembl gene identifier, to accurately identify the corresponding value in rownames(airway)
.
map_GO <- function(pathway_id, se) {
pathway_ensembl <- mapIds(org.Hs.eg.db, pathway_id, "ENSEMBL", keytype = "GOALL", multiVals = "CharacterList")[[pathway_id]]
pathway_rownames <- rownames(se)[rowData(se)[["gene_id"]] %in% pathway_ensembl]
pathway_rownames
}
airway <- registerAppOptions(airway, Pathways.map.functions = list(GO = map_GO))
We run a standard GSEA analysis using fgsea.
library("fgsea")
set.seed(42)
stats <- na.omit(log2FoldChange(contrastResults(airway, "Limma-Voom")))
fgseaRes <- fgsea(pathways = pathways,
stats = stats,
minSize = 15,
maxSize = 500)
#> Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.05% of the list).
#> The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaRes[order(pval), ])
#> pathway pval padj log2err ES NES size leadingEdge
#> <char> <num> <num> <num> <num> <num> <int> <list>
#> 1: GO:0046323 2.542340e-06 0.01102613 0.6272567 0.6490865 2.230894 59 KLF15, L....
#> 2: GO:0046324 1.357804e-05 0.01277105 0.5933255 0.6682559 2.153907 46 KLF15, L....
#> 3: GO:1904659 1.573689e-05 0.01277105 0.5756103 0.5635679 2.039292 84 KLF15, L....
#> 4: GO:0010827 1.764834e-05 0.01277105 0.5756103 0.6201464 2.124828 58 KLF15, L....
#> 5: GO:0050886 1.889844e-05 0.01277105 0.5756103 0.6343317 2.129959 52 INHBB, L....
#> 6: GO:0031589 2.019441e-05 0.01277105 0.5756103 0.3847176 1.641614 287 FAM107A,....
Then, we embed this set of pathway analysis results in the airway
object, using the ?iSEEpathways::embedPathwaysResults
method.
But first, we reorder the results by increasing p-value. Although not essential, this implicitly defines the default ordering of the table in the live app.
library("iSEEpathways")
fgseaRes <- fgseaRes[order(pval), ]
airway <- embedPathwaysResults(
fgseaRes, airway, name = "fgsea (p-value)", class = "fgsea",
pathwayType = "GO", pathwaysList = pathways, featuresStats = stats)
airway
#> class: RangedSummarizedExperiment
#> dim: 63677 8
#> metadata(3): '' iSEE iSEEpathways
#> assays(2): counts logcounts
#> rownames(63677): TSPAN6 TNMD ... APP-DT ENSG00000273493
#> rowData names(14): gene_id gene_name ... uniquifyFeatureNames iSEEde
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample
To showcase a choice of pathway analysis results in the live app, we repeat the process above, this time sorting by a different score that combines the log-transformed p-value and the absolute log-transformed fold-change.
stats <- na.omit(
log2FoldChange(contrastResults(airway, "Limma-Voom")) *
-log10(pValue(contrastResults(airway, "Limma-Voom")))
)
set.seed(42)
fgseaRes <- fgsea(pathways = pathways,
stats = na.omit(stats),
minSize = 15,
maxSize = 500)
fgseaRes <- fgseaRes[order(pval), ]
airway <- embedPathwaysResults(
fgseaRes, airway, name = "fgsea (p-value & fold-change)", class = "fgsea",
pathwayType = "GO", pathwaysList = pathways, featuresStats = stats)
airway
#> class: RangedSummarizedExperiment
#> dim: 63677 8
#> metadata(3): '' iSEE iSEEpathways
#> assays(2): counts logcounts
#> rownames(63677): TSPAN6 TNMD ... APP-DT ENSG00000273493
#> rowData names(14): gene_id gene_name ... uniquifyFeatureNames iSEEde
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample
For further user-friendliness in the live app, we define and register a function that displays details for the selected Gene Ontology gene set using the GO.db package.
library("GO.db")
library("shiny")
library("iSEE")
go_details <- function(x) {
info <- select(GO.db, x, c("TERM", "ONTOLOGY", "DEFINITION"), "GOID")
html <- list(p(strong(info$GOID), ":", info$TERM, paste0("(", info$ONTOLOGY, ")")))
if (!is.na(info$DEFINITION)) {
html <- append(html, list(p(info$DEFINITION)))
}
tagList(html)
}
airway <- registerAppOptions(airway, PathwaysTable.select.details = go_details)
Finally, we configure the initial state and launch the live app.
app <- iSEE(airway, initial = list(
PathwaysTable(ResultName="fgsea (p-value)", Selected = "GO:0046324", PanelWidth = 4L),
VolcanoPlot(RowSelectionSource = "PathwaysTable1", ColorBy = "Row selection", PanelWidth = 4L),
ComplexHeatmapPlot(RowSelectionSource = "PathwaysTable1",
PanelWidth = 4L, PanelHeight = 700L,
CustomRows = FALSE, ColumnData = "dex",
ClusterRows = TRUE, ClusterRowsDistance = "euclidean", AssayCenterRows = TRUE),
FgseaEnrichmentPlot(ResultName="fgsea (p-value)", PathwayId = "GO:0046324", PanelWidth = 12L)
))
if (interactive()) {
shiny::runApp(app)
}