## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"-------------------- BiocStyle::latex() ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install(c("PADOG", "GSVA", "AnnotationDbi", "topGO", "pathview", "gage", # "globaltest", "limma", "edgeR", "safe", "org.Hs.eg.db", "org.Mm.eg.db", # "org.Rn.eg.db")) ## ----echo=TRUE, eval=FALSE,tidy=TRUE-------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("EGSEAdata") ## ----echo=TRUE, eval=FALSE,tidy=TRUE-------------------------------------- # library(devtools) # install_github("malhamdoosh/EGSEAdata") ## ----echo=TRUE, eval=FALSE,tidy=TRUE-------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("EGSEA") ## ----echo=TRUE, eval=FALSE,tidy=TRUE-------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("EGSEA", version="devel") ## ----echo=TRUE, eval=FALSE,tidy=TRUE-------------------------------------- # library(devtools) # install_github("malhamdoosh/EGSEA") ## ----echo=TRUE, eval=TRUE,tidy=TRUE, message=F, warning=F----------------- library(EGSEA) ## ----echo=TRUE, eval=TRUE,tidy=TRUE--------------------------------------- library(EGSEAdata) data(il13.data) v = il13.data$voom names(v) v$design contrasts = il13.data$contra contrasts ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- gs.annots = buildIdx(entrezIDs=rownames(v$E), species="human", msigdb.gsets="c5", kegg.exclude = c("Metabolism")) names(gs.annots) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- summary(gs.annots$kegg) summary(gs.annots$c5) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- baseMethods = egsea.base()[-c(2, 12)] baseMethods ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- egsea.sort() ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # perform the EGSEA analysis # set report = TRUE to generate HTML report. # set display.top = 20 to display more gene sets. It takes longer time to run. gsa = egsea(voom.results=v, contrasts=contrasts, gs.annots=gs.annots, symbolsMap=v$genes, baseGSEAs=baseMethods, report.dir="./il13-egsea-report", sort.by="avg.rank", num.threads = 4, report=FALSE) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- summary(gsa) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- gs.annots = buildIdx(entrezIDs=rownames(v$E), species="human", gsdb.gsets="all") names(gs.annots) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- show(gsa) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- topSets(gsa, contrast=1, gs.label="kegg", number = 10) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- t = topSets(gsa, contrast=1, gs.label="c5", sort.by="ora", number = 10, names.only=FALSE) t ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- showSetByName(gsa, "c5", rownames(t)[1]) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- t = topSets(gsa, contrast="comparison", gs.label="kegg", number = 10) t ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- showSetByName(gsa, "kegg", rownames(t)[1]) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotMethods(gsa, gs.label = "kegg", contrast=1, file.name="X24IL13-X24-kegg-methods") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotSummary(gsa, gs.label="kegg", contrast = 1, file.name="X24IL13-X24-kegg-summary") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- showSetByID(gsa, gs.label="kegg", c("hsa04060", "hsa04640")) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotGOGraph(gsa, gs.label="c5", file.name="X24IL13-X24-c5-top-", sort.by="avg.rank") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotHeatmap(gsa, "Asthma", gs.label="kegg", contrast=1, file.name="asthma-hm") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotPathway(gsa, "Asthma", gs.label="kegg", file.name="asthma-pathway") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotSummary(gsa, gs.label="kegg", contrast=c(1,2), file.name="kegg-summary-cmp") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotSummaryHeatmap(gsa, gs.label="kegg", show.vals = "p.adj", file.name="il13-sum-heatmap") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotHeatmap(gsa, "Asthma", gs.label="kegg", contrast="comparison", file.name="asthma-hm-cmp") ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE,width.cutoff=60)---- plotPathway(gsa, "Asthma", gs.label="kegg", contrast=0, file.name="asthma-pathway-cmp") ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # # load the mammary dataset # library(EGSEA) # library(EGSEAdata) # data(mam.data) # v = mam.data$voom # names(v) # v$design # contrasts = mam.data$contra # contrasts # # build the gene set collections # gs.annots = buildIdx(entrezIDs=rownames(v$E), species="mouse", # msigdb.gsets = "c2", # kegg.exclude = "all") # names(gs.annots) # # create Entrez IDs - Symbols map # symbolsMap = v$genes[,c(1,3)] # colnames(symbolsMap) = c("FeatureID", "Symbols") # symbolsMap[, "Symbols"] = as.character(symbolsMap[, "Symbols"]) # # replace NA Symbols with IDs # na.sym = is.na(symbolsMap[, "Symbols"]) # na.sym # symbolsMap[na.sym, "Symbols"] = symbolsMap[na.sym, "FeatureID"] # # perform the EGSEA analysis # # set report = TRUE to generate the EGSEA interactive report # baseMethods = c("camera", "safe", "gage", "padog", "zscore", # "gsva", "globaltest", "ora") # gsa = egsea(voom.results=v, contrasts=contrasts, gs.annots=gs.annots, # symbolsMap=symbolsMap, baseGSEAs=baseMethods, # sort.by="med.rank", # num.threads=4, report=FALSE) # # show top 20 comparative gene sets in C2 collection # summary(gsa) # topSets(gsa, gs.label="c2", contrast="comparison", number = 20) ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # # load the count matrix and other relevant data # library(EGSEAdata) # data(il13.data.cnt) # cnt = il13.data.cnt$counts # group = il13.data.cnt$group # group # design = il13.data.cnt$design # contrasts = il13.data.cnt$contra # genes = il13.data.cnt$genes # # build the gene set collections # gs.annots = buildIdx(entrezIDs=rownames(cnt), species="human", msigdb.gsets="none", # kegg.exclude = c("Metabolism")) # # perform the EGSEA analysis # # set report = TRUE to generate the EGSEA interactive report # gsa = egsea.cnt(counts=cnt, group=group, design=design, contrasts=contrasts, # gs.annots=gs.annots, symbolsMap=genes, baseGSEAs=egsea.base()[-c(2,12)], # sort.by="avg.rank", num.threads = 4, report=FALSE) ## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)---- # load IL-13 dataset library(EGSEAdata) data(il13.data) voom.results = il13.data$voom contrast = il13.data$contra # find Differentially Expressed genes library(limma) vfit = lmFit(voom.results, voom.results$design) vfit = contrasts.fit(vfit, contrast) vfit = eBayes(vfit) # select DE genes (Entrez IDs and logFC) at p-value <= 0.05 and |logFC| >= 1 top.Table = topTable(vfit, coef=1, number=Inf, p.value=0.05, lfc=1) deGenes = as.character(top.Table$FeatureID) logFC = top.Table$logFC names(logFC) = deGenes # build the gene set collection index gs.annots = buildIdx(entrezIDs=deGenes, species="human", msigdb.gsets="none", kegg.exclude = c("Metabolism")) # perform the ORA analysis # set report = TRUE to generate the EGSEA interactive report gsa = egsea.ora(geneIDs=deGenes, universe= as.character(voom.results$genes[,1]), logFC =logFC, title="X24IL13-X24", gs.annots=gs.annots, symbolsMap=top.Table[, c(1,2)], display.top = 5, report.dir="./il13-egsea-ora-report", num.threads = 4, report=FALSE) ## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)---- library(EGSEAdata) data(il13.data) v = il13.data$voom # load KEGG pathways data(kegg.pathways) # select 50 pathways gsets = kegg.pathways$human$kg.sets[1:50] gsets[1] # build custom gene set collection using these 50 pathways gs.annot = buildCustomIdx(geneIDs=rownames(v$E), gsets= gsets, species="human") class(gs.annot) show(gs.annot) ## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)---- sessionInfo()