
## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------
options(max.print=1000)
stopifnot(BiocInstaller::biocVersion() == "2.13")
BiocStyle::latex()
library(knitr)
opts_chunk$set(cache=TRUE, tidy=FALSE)


## ----packages, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE-------
suppressPackageStartupMessages({
    library(org.Hs.eg.db)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    library(BSgenome.Hsapiens.UCSC.hg19)
    library(rtracklayer)
    library(biomaRt)
})


## ----select-setup--------------------------------------------------------
egids <- c("3183", "91828", "81537", "4776", "283624", "4053", "85446", 
           "10484", "55701", "1112")


## ----load-org------------------------------------------------------------
library(org.Hs.eg.db)


## ----select--------------------------------------------------------------
keytypes(org.Hs.eg.db)
columns(org.Hs.eg.db)
cols <- c("SYMBOL", "GENENAME")
select(org.Hs.eg.db, keys=egids, columns=cols, keytype="ENTREZID")


## ----merge-setup, echo=FALSE---------------------------------------------
fl <- system.file("extdata", "E-MTAB-1147-toptable.csv", package="SummerX")


## ----merge-user-setup, eval=FALSE----------------------------------------
## fl <- file.choose()


## ----file-exists---------------------------------------------------------
file.exists(fl)


## ----merge---------------------------------------------------------------
csv <- read.csv(fl, row.names=1)
class(csv)                              # data.frame
dim(csv)                                # 528 genes x 6 columns
head(csv)                               # log (base 2) fold change, # adjusted P-values


## ----anno----------------------------------------------------------------
cols <- c("CHR", "SYMBOL", "GENENAME")
anno <- select(org.Hs.eg.db, rownames(csv), cols)
class(anno)
dim(anno)
head(anno)
all(anno$CHR %in% "14")                 # all on CHR 14?


## ----merge-annotated-----------------------------------------------------
annotated <- merge(csv, anno, by.x=0, by.y="ENTREZID")
class(annotated)
dim(annotated)
head(annotated)


## ----order-annotated-----------------------------------------------------
o <- order(abs(annotated$log2FoldChange), decreasing=TRUE)


## ----head-annotated------------------------------------------------------
annotated[head(o),]


## ----biomaRt1, eval=FALSE, results="hide"--------------------------------
## ## NEEDS INTERNET ACCESS !!
## library(biomaRt)
## head(listMarts(), 3)                      ## list the marts
## head(listDatasets(useMart("ensembl")), 3) ## mart datasets
## ensembl <-                                ## fully specified mart
##     useMart("ensembl", dataset = "hsapiens_gene_ensembl")
## 
## head(listFilters(ensembl), 3)             ## filters
## myFilter <- "chromosome_name"
## head(filterOptions(myFilter, ensembl), 3) ## return values
## myValues <- c("21", "22")
## head(listAttributes(ensembl), 3)          ## attributes
## myAttributes <- c("ensembl_gene_id","chromosome_name")
## 
## ## assemble and query the mart
## res <- getBM(attributes =  myAttributes, filters =  myFilter,
##              values =  myValues, mart = ensembl)


## ----SYMBOL-to-ENTREZID--------------------------------------------------
library(org.Hs.eg.db)
egid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")$ENTREZID
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
egToTx <- select(txdb, egid, "TXNAME", "GENEID")


## ----cdsBy---------------------------------------------------------------
cdsByTx <- cdsBy(txdb, "tx", use.names=TRUE)[egToTx$TXNAME]


## ----getsequence---------------------------------------------------------
library(BSgenome.Hsapiens.UCSC.hg19)
txx <- extractTranscriptsFromGenome(Hsapiens, cdsByTx)


## ----translate-----------------------------------------------------------
all(width(txx) %% 3 == 0)  # sanity check
translate(txx)             # amino acid sequence


## ----rtracklayer-roi-----------------------------------------------------
roi <- GRanges("chr10", IRanges(92106877, 112106876, names="ENSG00000099194"))


## ----rtracklayer-session-------------------------------------------------
library(rtracklayer) 
session <- browserSession()


## ----rtracklayer-marks---------------------------------------------------
trackName <- "wgEncodeRegTfbsClusteredV2"
tableName <- "wgEncodeRegTfbsClusteredV2"
trFactor <- "ERalpha_a"
ucscTable <- getTable(ucscTableQuery(session, track=trackName,
    range=roi, table=tableName, name=trFactor))


## ----rtracklayer-plot, fig.height=3--------------------------------------
plot(score ~ chromStart, ucscTable, pch="+")
abline(v=start(roi) + (end(roi) - start(roi) + 1) / 2, col="blue")


