## http://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1147/

## data and annotation sources

## library(RNAseqData.HNRNPC.bam.chr14)
## RNAseqData.HNRNPC.bam.chr14_BAMFILES
fls <- dir(".", pattern="bam$")
names(fls) <- sub("_chr14.bam", "", fls)

library(TxDb.Hsapiens.UCSC.hg19.knownGene)
ex <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")

## count overlaps

library(GenomicRanges)
library(Rsamtools)
counts <- summarizeOverlaps(ex, fls)
counts <-                               # drop zero-count rows
    counts[rowSums(assay(counts)) != 0,]

heatmap(as.matrix((dist(t(assay(counts))))))

## sample description, from ArrayExpress

url <- "http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-1147/E-MTAB-1147.sdrf.txt"
df0 <- read.delim(url, stringsAsFactors=FALSE)
idx <- # drop columns that refer to fastq files; we're dealing with BAM
    sapply(df0, function(x) length(unique(x))) != 16
df <- unique(df0[, idx])                # remove duplicate rows
rownames(df) <- df[,"Comment.ENA_RUN."] # set rownames to names of BAM files

df$Replicate <- factor(sub(".*Replicate ", "Rep.", df$Extract.Name))
df$Treatment <- factor(df$Factor.Value.RNAI.)

colData(counts) <- # add to 'counts'
    as(df[colnames(counts),], "DataFrame")

m <- as.matrix((dist(t(assay(counts)))))
dimnames(m) <- list(counts$Treatment, counts$Replicate)
heatmap(m)

## differential expression

library(DESeq2)
dds <- DESeqDataSet(counts, ~ Replicate + Treatment)
dds <- DESeq(dds)
res <- results(dds)

plotMA(dds)
idx <- order(abs(res$log2FoldChange), decreasing=TRUE)
res <- res[idx,]
head(res)

## annotation

library(org.Hs.eg.db)
anno <- select(org.Hs.eg.db, rownames(res), c("SYMBOL", "GENENAME"))
idx <- match(anno$ENTREZID, rownames(res))
res <- cbind(res, anno[idx,])
head(res)

## packages used

sessionInfo()
