GSEA.R

################################################### ### chunk number 1: Setup ################################################### library("Biobase") library("annotate") library("Category") library("hgu95av2") library("genefilter")

################################################### ### chunk number 2: Example-subset ################################################### library("ALL") data(ALL) Bcell <- grep("^B", as.character(ALL$BT)) bcrAblOrNegIdx <- which(as.character(ALL$mol.biol) %in% c("NEG", "BCR/ABL")) eset <- ALL[, intersect(Bcell, bcrAblOrNegIdx)] eset$mol.biol <- factor(eset$mol.biol) numBN <- length(eset$mol.biol)

################################################### ### chunk number 3: noEntrez ################################################### entrezIds <- mget(geneNames(eset), envir=hgu95av2LOCUSID) haveEntrezId <- names(entrezIds)[sapply(entrezIds, function(x) !is.na(x))] numNoEntrezId <- length(geneNames(eset)) - length(haveEntrezId) eset <- eset[haveEntrezId, ]

################################################### ### chunk number 4: simplefiltering ################################################### ## Non-specific filtering based on IQR lowQ = rowQ(eset, floor(0.25 numBN)) upQ = rowQ(eset, ceiling(0.75 numBN)) iqrs = upQ - lowQ

selected <- iqrs > 0.5

nsFiltered <- eset[selected, ]

################################################### ### chunk number 5: reduceto1to1 ################################################### ## Reduce to unique probe <--> gene mapping by keeping largest IQR ## This gives us "unique genes" in the non-specific filtered gene ## set which simplifies further calculations. nsFilteredIqr <- iqrs[selected] uniqGenes <- findLargest(geneNames(nsFiltered), nsFilteredIqr, "hgu95av2") nsFiltered <- nsFiltered[uniqGenes, ]

## basic stats on our non-specific filter result numSelected <- length(geneNames(nsFiltered)) numBcrAbl <- sum(nsFiltered$mol.biol == "BCR/ABL") numNeg <- sum(nsFiltered$mol.biol == "NEG")

################################################### ### chunk number 6: noKEGG ################################################### ## Remove genes with no PATH mapping havePATH <- sapply(mget(geneNames(nsFiltered), hgu95av2PATH), function(x) if (length(x) == 1 && is.na(x)) FALSE else TRUE) numNoPATH<- sum(!havePATH) nsF <- nsFiltered[havePATH, ]

################################################### ### chunk number 7: compAmat ###################################################

Am = PWAmat("hgu95av2") egN = unlist(mget(geneNames(nsF), hgu95av2LOCUSID))

sub1 = match(egN, row.names(Am))

Am = Am[sub1,] dim(Am) table(colSums(Am))

################################################### ### chunk number 8: compttests ###################################################

rtt = rowttests(nsF, "mol.biol") rttStat = rtt$statistic

################################################### ### chunk number 9: reducetoInt ################################################### Amat = t(Am) rs = rowSums(Amat) Amat2 = Amat[rs>10,] rs2 = rs[rs>10] nCats = length(rs2)

################################################### ### chunk number 10: pctests ###################################################

tA = as.vector(Amat2 %*% rttStat) tAadj = tA/sqrt(rs2)

names(tA) = names(tAadj) = row.names(Amat2)

################################################### ### chunk number 11: qqplot ################################################### qqnorm(tAadj)

################################################### ### chunk number 12: findSmPW ###################################################

smPW = tAadj[tAadj < -5] pwName = KEGGPATHID2NAME[[names(smPW)]] pwName

################################################### ### chunk number 13: mnplot ################################################### KEGGmnplot(names(smPW), nsF, "hgu95av2", nsF$"mol.biol")

################################################### ### chunk number 14: heatmap ################################################### KEGG2heatmap(names(smPW), nsF, "hgu95av2")

################################################### ### chunk number 15: ttperm ###################################################

NPERM = 100 ttp = ttperm(exprs(nsF), nsF$mol.biol, NPERM)

permDm = do.call("cbind", lapply(ttp$perms, function(x) x$statistic))

permD = Amat2 %*% permDm

pvals = matrix(NA, nr=nCats, ncol=2) dimnames(pvals) = list(row.names(Amat2), c("Lower", "Upper"))

for(i in 1:nCats) { pvals[i,1] = sum(permD[i,] < tA[i])/NPERM pvals[i,2] = sum(permD[i,] > tA[i])/NPERM }

ord1 = order(pvals[,1]) lowC = (row.names(pvals)[ord1])[pvals[ord1,1]< 0.05]

highC = row.names(pvals)[pvals[,2] < 0.05]

getPathNames(lowC)

getPathNames(highC)

lnhC = length(highC)

################################################### ### chunk number 16: findAmap ###################################################

AmChr = MAPAmat("hgu95av2", minCount=5)

################################################### ### chunk number 17: sub2ourData ###################################################

subC = row.names(AmChr) %in% egN

AmChr = AmChr[subC,] dim(AmChr) table(colSums(AmChr))

################################################### ### chunk number 18: ################################################### sessionInfo()

bioconductor.org

Sections

GSEA.R

highC = row.names(pvals)[pvals[,2] < 0.05]

Navigation

News