1 Scenario

In this vignette, we use the example of a differential expression and pathway analysis workflow on a real data set to demonstrate how a selection made in a panel of pathway analysis results may be transmitted to other row-oriented panels in the iSEE application.

2 Demonstration

2.1 Example data

2.2 Experimental metadata

We use the ?airway data set.

We briefly adjust the reference level of the treatment factor to the untreated condition.

library("airway")
data("airway")
airway$dex <- relevel(airway$dex, "untrt")

2.3 Feature identifiers

We also map the Ensembl gene identifiers to more recognisable gene symbols, setting row names to a unique identifier composed of either gene symbol, gene identifier, of a concatenate of both.

Although not essential, this implicitly defines the primary piece of information displayed for genes in the live app. No information is lost in the process, as the original Ensembl identifier and the corresponding gene symbol are both stored in the rowData() of the object.

library("org.Hs.eg.db")
library("scater")
rowData(airway)[["ENSEMBL"]] <- rownames(airway)
rowData(airway)[["SYMBOL"]] <- mapIds(org.Hs.eg.db, rownames(airway), "SYMBOL", "ENSEMBL")
rowData(airway)[["uniquifyFeatureNames"]] <- uniquifyFeatureNames(
  ID = rowData(airway)[["ENSEMBL"]],
  names = rowData(airway)[["SYMBOL"]]
)
rownames(airway) <- rowData(airway)[["uniquifyFeatureNames"]]

2.4 Gene expression

We also compute log-transformed counts, for a better visualisation of differential expression in the live app.

library("scuttle")
airway <- logNormCounts(airway)

2.5 Differential expression analysis

We run a standard Limma-Voom analysis using limma::voom(), limma::lmFit(), limma::makeContrasts(), and limma::eBayes().

library("edgeR")

counts <- assay(airway, "counts")
design <- model.matrix(~ 0 + dex + cell, data = colData(airway))

keep <- filterByExpr(counts, design)
v <- voom(counts[keep,], design, plot=FALSE)
fit <- lmFit(v, design)
contr <- makeContrasts("dextrt - dexuntrt", levels = colnames(coef(fit)))
tmp <- contrasts.fit(fit, contr)
tmp <- eBayes(tmp)
res_limma <- topTable(tmp, sort.by = "P", n = Inf)
head(res_limma)
#>             logFC  AveExpr         t      P.Value    adj.P.Val        B
#> CACNB2   3.205598 3.682244  36.49009 2.370569e-11 3.996779e-07 16.03257
#> DUSP1    2.864775 6.644455  28.95296 2.000429e-10 9.657686e-07 14.66891
#> MAOA     3.256099 5.950559  28.29381 2.472372e-10 9.657686e-07 14.44796
#> SPARCL1  4.489075 4.166904  27.99497 2.725926e-10 9.657686e-07 14.01663
#> PRSS35  -2.828186 3.224885 -27.47899 3.234271e-10 9.657686e-07 13.80831
#> STEAP2   1.894563 6.790009  26.91396 3.914512e-10 9.657686e-07 14.03974

Then, we embed this set of differential expression results in the ?airway object using the iSEEde::embedContrastResults() method.

library("iSEEde")
airway <- iSEEde::embedContrastResults(res_limma, airway, name = "Limma-Voom", class = "limma")
rowData(airway)
#> DataFrame with 63677 rows and 14 columns
#>                         gene_id     gene_name  entrezid   gene_biotype gene_seq_start gene_seq_end
#>                     <character>   <character> <integer>    <character>      <integer>    <integer>
#> TSPAN6          ENSG00000000003        TSPAN6        NA protein_coding       99883667     99894988
#> TNMD            ENSG00000000005          TNMD        NA protein_coding       99839799     99854882
#> DPM1            ENSG00000000419          DPM1        NA protein_coding       49551404     49575092
#> SCYL3           ENSG00000000457         SCYL3        NA protein_coding      169818772    169863408
#> FIRRM           ENSG00000000460      C1orf112        NA protein_coding      169631245    169823221
#> ...                         ...           ...       ...            ...            ...          ...
#> ENSG00000273489 ENSG00000273489 RP11-180C16.1        NA      antisense      131178723    131182453
#> ENSG00000273490 ENSG00000273490        TSEN34        NA protein_coding       54693789     54697585
#> ENSG00000273491 ENSG00000273491  RP11-138A9.2        NA        lincRNA      130600118    130603315
#> APP-DT          ENSG00000273492    AP000230.1        NA        lincRNA       27543189     27589700
#> ENSG00000273493 ENSG00000273493  RP11-80H18.4        NA        lincRNA       58315692     58315845
#>                              seq_name seq_strand seq_coord_system        symbol         ENSEMBL      SYMBOL
#>                           <character>  <integer>        <integer>   <character>     <character> <character>
#> TSPAN6                              X         -1               NA        TSPAN6 ENSG00000000003      TSPAN6
#> TNMD                                X          1               NA          TNMD ENSG00000000005        TNMD
#> DPM1                               20         -1               NA          DPM1 ENSG00000000419        DPM1
#> SCYL3                               1         -1               NA         SCYL3 ENSG00000000457       SCYL3
#> FIRRM                               1          1               NA      C1orf112 ENSG00000000460       FIRRM
#> ...                               ...        ...              ...           ...             ...         ...
#> ENSG00000273489                     7         -1               NA RP11-180C16.1 ENSG00000273489          NA
#> ENSG00000273490 HSCHR19LRC_LRC_J_CTG1          1               NA        TSEN34 ENSG00000273490          NA
#> ENSG00000273491          HG1308_PATCH          1               NA  RP11-138A9.2 ENSG00000273491          NA
#> APP-DT                             21          1               NA    AP000230.1 ENSG00000273492      APP-DT
#> ENSG00000273493                     3          1               NA  RP11-80H18.4 ENSG00000273493          NA
#>                 uniquifyFeatureNames             iSEEde
#>                          <character>        <DataFrame>
#> TSPAN6                        TSPAN6 <iSEELimmaResults>
#> TNMD                            TNMD <iSEELimmaResults>
#> DPM1                            DPM1 <iSEELimmaResults>
#> SCYL3                          SCYL3 <iSEELimmaResults>
#> FIRRM                          FIRRM <iSEELimmaResults>
#> ...                              ...                ...
#> ENSG00000273489      ENSG00000273489 <iSEELimmaResults>
#> ENSG00000273490      ENSG00000273490 <iSEELimmaResults>
#> ENSG00000273491      ENSG00000273491 <iSEELimmaResults>
#> APP-DT                        APP-DT <iSEELimmaResults>
#> ENSG00000273493      ENSG00000273493 <iSEELimmaResults>

2.6 Pathways

We prepare Gene Ontology gene sets of biological pathways using org.Hs.eg.db.

Due to the use of uniquifyFeatureNames() above, we must first map pathway identifiers to the unique Ensembl gene identifier, to accurately perform pathway analysis using the feature identifiers matching those of the embedded differential expression results.

library("org.Hs.eg.db")
pathways <- select(org.Hs.eg.db, keys(org.Hs.eg.db, "ENSEMBL"), c("GOALL"), keytype = "ENSEMBL")
#> 'select()' returned 1:many mapping between keys and columns
pathways <- subset(pathways, ONTOLOGYALL == "BP")
pathways <- unique(pathways[, c("ENSEMBL", "GOALL")])
pathways <- merge(pathways, rowData(airway)[, c("ENSEMBL", "uniquifyFeatureNames")])
pathways <- split(pathways$uniquifyFeatureNames, pathways$GOALL)

2.7 Mapping pathways to genes

Separately, we define and register a function that fetches the gene identifiers associated with a given pathway identifier. This function is required to transmit selections from pathway-level panels to feature-level panels.

Due to the use of uniquifyFeatureNames() above, the function must first map to the unique Ensembl gene identifier, to accurately identify the corresponding value in rownames(airway).

map_GO <- function(pathway_id, se) {
    pathway_ensembl <- mapIds(org.Hs.eg.db, pathway_id, "ENSEMBL", keytype = "GOALL", multiVals = "CharacterList")[[pathway_id]]
    pathway_rownames <- rownames(se)[rowData(se)[["gene_id"]] %in% pathway_ensembl]
    pathway_rownames
}
airway <- registerAppOptions(airway, Pathways.map.functions = list(GO = map_GO))

2.8 Gene set enrichment analysis

We run a standard GSEA analysis using fgsea.

library("fgsea")
set.seed(42)
stats <- na.omit(log2FoldChange(contrastResults(airway, "Limma-Voom")))
fgseaRes <- fgsea(pathways = pathways, 
                  stats    = stats,
                  minSize  = 15,
                  maxSize  = 500)
#> Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.05% of the list).
#> The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaRes[order(pval), ])
#>       pathway         pval       padj   log2err        ES      NES  size  leadingEdge
#>        <char>        <num>      <num>     <num>     <num>    <num> <int>       <list>
#> 1: GO:0046323 2.542340e-06 0.01102613 0.6272567 0.6490865 2.230894    59 KLF15, L....
#> 2: GO:0046324 1.357804e-05 0.01277105 0.5933255 0.6682559 2.153907    46 KLF15, L....
#> 3: GO:1904659 1.573689e-05 0.01277105 0.5756103 0.5635679 2.039292    84 KLF15, L....
#> 4: GO:0010827 1.764834e-05 0.01277105 0.5756103 0.6201464 2.124828    58 KLF15, L....
#> 5: GO:0050886 1.889844e-05 0.01277105 0.5756103 0.6343317 2.129959    52 INHBB, L....
#> 6: GO:0031589 2.019441e-05 0.01277105 0.5756103 0.3847176 1.641614   287 FAM107A,....

Then, we embed this set of pathway analysis results in the airway object, using the ?iSEEpathways::embedPathwaysResults method.

But first, we reorder the results by increasing p-value. Although not essential, this implicitly defines the default ordering of the table in the live app.

library("iSEEpathways")
fgseaRes <- fgseaRes[order(pval), ]
airway <- embedPathwaysResults(
  fgseaRes, airway, name = "fgsea (p-value)", class = "fgsea",
  pathwayType = "GO", pathwaysList = pathways, featuresStats = stats)
airway
#> class: RangedSummarizedExperiment 
#> dim: 63677 8 
#> metadata(3): '' iSEE iSEEpathways
#> assays(2): counts logcounts
#> rownames(63677): TSPAN6 TNMD ... APP-DT ENSG00000273493
#> rowData names(14): gene_id gene_name ... uniquifyFeatureNames iSEEde
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample

To showcase a choice of pathway analysis results in the live app, we repeat the process above, this time sorting by a different score that combines the log-transformed p-value and the absolute log-transformed fold-change.

stats <- na.omit(
  log2FoldChange(contrastResults(airway, "Limma-Voom")) *
  -log10(pValue(contrastResults(airway, "Limma-Voom")))
)
set.seed(42)
fgseaRes <- fgsea(pathways = pathways, 
                  stats    = na.omit(stats),
                  minSize  = 15,
                  maxSize  = 500)
fgseaRes <- fgseaRes[order(pval), ]
airway <- embedPathwaysResults(
  fgseaRes, airway, name = "fgsea (p-value & fold-change)", class = "fgsea",
  pathwayType = "GO", pathwaysList = pathways, featuresStats = stats)
airway
#> class: RangedSummarizedExperiment 
#> dim: 63677 8 
#> metadata(3): '' iSEE iSEEpathways
#> assays(2): counts logcounts
#> rownames(63677): TSPAN6 TNMD ... APP-DT ENSG00000273493
#> rowData names(14): gene_id gene_name ... uniquifyFeatureNames iSEEde
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample

2.9 Displaying additional pathway information

For further user-friendliness in the live app, we define and register a function that displays details for the selected Gene Ontology gene set using the GO.db package.

library("GO.db")
library("shiny")
library("iSEE")
go_details <- function(x) {
    info <- select(GO.db, x, c("TERM", "ONTOLOGY", "DEFINITION"), "GOID")
    html <- list(p(strong(info$GOID), ":", info$TERM, paste0("(", info$ONTOLOGY, ")")))
    if (!is.na(info$DEFINITION)) {
        html <- append(html, list(p(info$DEFINITION)))
    }
    tagList(html)
}
airway <- registerAppOptions(airway, PathwaysTable.select.details = go_details)

2.10 Live app

Finally, we configure the initial state and launch the live app.

app <- iSEE(airway, initial = list(
  PathwaysTable(ResultName="fgsea (p-value)", Selected = "GO:0046324", PanelWidth = 4L),
  VolcanoPlot(RowSelectionSource = "PathwaysTable1", ColorBy = "Row selection", PanelWidth = 4L),
  ComplexHeatmapPlot(RowSelectionSource = "PathwaysTable1",
      PanelWidth = 4L, PanelHeight = 700L,
      CustomRows = FALSE, ColumnData = "dex",
      ClusterRows = TRUE, ClusterRowsDistance = "euclidean", AssayCenterRows = TRUE),
  FgseaEnrichmentPlot(ResultName="fgsea (p-value)", PathwayId = "GO:0046324", PanelWidth = 12L)
))

if (interactive()) {
  shiny::runApp(app)
}