## ----experimentPrep, eval = FALSE--------------------------------------------- # library(BiocParallel) # dir.create("fastq", showWarnings = FALSE) # # extractSRA <- function(sra_accession, exe_path = 'fastq-dump', # args = '--split-3 --gzip', outdir = 'fastq', # dry_run = FALSE) { # cmdline = sprintf('%s %s --outdir %s %s', # exe_path, args, outdir, sra_accession) # if (dry_run) { # message("will run with this command line:\n", cmdline) # } else { # return(system(cmdline)) # } # } # # samples <- c("SRR5273705", "SRR5273689", "SRR5273699", "SRR5273683") # # bplapply(samples, extractSRA, BPPARAM = MulticoreParam(4)) # # annotation <- data.frame(samples, # tissue = c("brain", "brain", "heart", "heart")) ## ----downloadReference, eval = FALSE------------------------------------------ # dir.create("reference/raw", recursive = TRUE, showWarnings = FALSE) # download.file("ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_mouse/release_M16/gencode.vM16.transcripts.fa.gz", # destfile = "reference/raw/transcriptome.fa.gz") ## ----indexBuild, eval = FALSE------------------------------------------------- # dir.create("reference/index", showWarnings = FALSE) # # system("kallisto index -i reference/index/kallistoIdx.idx reference/raw/transcriptome.fa.gz") # system("salmon index -t reference/raw/transcriptome.fa.gz -i reference/index/salmon_index") # system("gunzip -c reference/raw/transcriptome.fa.gz > reference/raw/transcriptome.fa && sailfish index -t reference/raw/transcriptome.fa -o reference/index/sailfish_index") # # library(Biostrings) # # dnSt <- names(readDNAStringSet("reference/raw/transcriptome.fa.gz")) # dnSt <- sapply(strsplit(dnSt, "\\||\\."), "[[", 1) ## ----versions, eval = FALSE--------------------------------------------------- # library(SummarizedBenchmark) # # getKallistoVersion <- function() { # vers <- # suppressWarnings(system("kallisto", intern = TRUE)[1]) # strsplit(vers, " ")[[1]][2] # } # # getSalmonVersion <- function() { # vers <- # suppressWarnings(system("salmon --version 2>&1", intern = TRUE)[1]) # strsplit(vers, " ")[[1]][2] # } # # getSailfishVersion <- function() { # vers <- # suppressWarnings(system("sailfish --version 2>&1", intern = TRUE)[1]) # strsplit(vers, " ")[[1]][3] # } ## ----eval = FALSE------------------------------------------------------------- # dir.create("out/kallisto", showWarnings = FALSE) # dir.create("out/salmon", showWarnings = FALSE) # dir.create("out/sailfish", showWarnings = FALSE) # # runKallisto <- function(sample, args = "") { # fastqFile1 <- sprintf( "fastq/%s_1.fastq.gz", sample) # fastqFile2 <- gsub( "_1", "_2", fastqFile1) # output <- sprintf("out/kallisto/%s.out", sample) # cmd <- sprintf("kallisto quant -i reference/index/kallistoIdx.idx -o %s %s %s %s", # output, args, fastqFile1, fastqFile2) # system(cmd) # require(tximport) # ab <- tximport(file.path(output, "abundance.h5"), # type = "kallisto", txOut = TRUE) # counts <- ab$counts[, 1] # names(counts) <- sapply(strsplit(names(counts), "\\||\\."), "[[", 1) # counts # } # # runSalmon <- function(sample, args = "-l A -p 4") { # fastqFile1 <- sprintf("fastq/%s_1.fastq.gz", sample) # fastqFile2 <- gsub( "_1", "_2", fastqFile1) # output <- sprintf("out/salmon/%s.out", sample) # cmd <- sprintf("salmon quant -i reference/index/salmon_index %s -o %s -1 %s -2 %s", # args, output, fastqFile1, fastqFile2) # system(cmd) # require(tximport) # counts <- tximport(file.path(output, "quant.sf"), # type = "salmon", txOut = TRUE)$counts[, 1] # names(counts) <- sapply(strsplit(names(counts), "\\||\\."), "[[", 1) # counts # } # # runSailfish <- function(sample, args = "-l IU") { # fastqFile1 <- sprintf( "fastq/%s_1.fastq.gz", sample) # fastqFile2 <- gsub( "_1", "_2", fastqFile1) # output <- sprintf("out/sailfish/%s.out", sample) # cmd <- sprintf("echo \"sailfish quant -i reference/index/sailfish_index %s -o %s -1 <(zcat %s) -2 <(zcat %s)\" | bash", # args, output, fastqFile1, fastqFile2) # cat(cmd) # system(cmd) # counts <- tximport(file.path(output, "quant.sf"), # type = "sailfish", txOut = TRUE)$counts[, 1] # names(counts) <- sapply(strsplit(names(counts), "\\||\\."), "[[", 1) # counts # } ## ----eval=FALSE--------------------------------------------------------------- # library(SummarizedBenchmark) # library(tximport) # # b <- BenchDesign() %>% # addMethod(label = "kallisto-default", # func = runKallisto, # params = rlang::quos(sample = sample, # args = "-t 16"), # meta = list(pkg_vers = rlang::quo(getKallistoVersion())) # ) %>% # addMethod(label = "kallisto-bias", # func = runKallisto, # params = rlang::quos(sample = sample, # args = "--bias -t 16"), # meta = list(pkg_vers = rlang::quo(getKallistoVersion())) # ) %>% # addMethod(label = "salmon-default", # func = runSalmon, # params = rlang::quos(sample = sample, # args = "-l IU -p 16"), # meta = list(pkg_vers = rlang::quo(getSalmonVersion())) # ) %>% # addMethod(label = "salmon-gcBias", # func = runSalmon, # params = rlang::quos(sample=sample, # args="-l IU --gcBias -p 16"), # meta = list(pkg_vers = rlang::quo(getSalmonVersion())) # ) %>% # addMethod(label = "sailfish-default", # func = runSailfish, # params = rlang::quos(sample=sample, # args="-l IU -p 16"), # meta = list(pkg_vers = rlang::quo(getSailfishVersion())) # ) # # printMethods(b) ## ----runBenchmark, eval=FALSE------------------------------------------------- # dat <- list(txIDs = dnSt) # allSB <- lapply(samples, function(sample) { # dat[["sample"]] <- sample # sb <- buildBench(b, data = dat, sortIDs = "txIDs", # catchErrors = FALSE, parallel = TRUE, # BPPARAM = SerialParam()) # colData( sb )$sample <- sample # colData( sb )$tissue <- as.character(annotation$tissue[annotation$sample == sample]) # sb # }) ## ----subsample, eval=FALSE---------------------------------------------------- # keepRows <- sapply(allSB, function(x) { rowSums(is.na(assay(x))) }) # keepRows <- rowSums(keepRows) == 0 # allSB <- lapply(allSB, function(x) { x[keepRows, ] }) # # set.seed(12) # keepRows <- sample(seq_len(nrow(allSB[[1]])), 50000) # # allSB <- lapply(allSB, function(x) { x[keepRows, ] }) # # #save(allSB, file = "../data/quantSB.rda", # # compress = "xz", compression_level = 9) ## ----loadSB, message=FALSE---------------------------------------------------- library(SummarizedBenchmark) data("quantSB") ## ----metadata----------------------------------------------------------------- colData(allSB[[1]])[, c("param.args", "meta.version", "sample", "tissue")] ## ----pcaPlot------------------------------------------------------------------ keep <- !rowSums( is.na( assays( allSB[[1]] )[["default"]] ) ) > 0 pcaRes <- prcomp( log10( t( assays( allSB[[1]] )[["default"]][keep,] ) + 1 ) ) varExp <- round( 100*(pcaRes$sdev/sum( pcaRes$sdev )), 2) tidyData <- data.frame( PC1=pcaRes$x[,"PC1"], PC2=pcaRes$x[,"PC2"], sample=colData( allSB[[1]] )$sample, label=gsub("\\d+$", "", rownames( colData(allSB[[1]]) ) ) ) tidyData <- tidyData %>% dplyr::mutate( method=gsub( "-.*$", "", label) ) tidyData %>% ggplot( aes( PC1, PC2, colour=label ) ) + geom_point() + coord_fixed() + ylab(sprintf( "PC2 (%0.2f %%)", varExp[2]) ) + xlab(sprintf( "PC1 (%0.2f %%)", varExp[1]) ) + theme(legend.pos="top") + guides(col = guide_legend(nrow = 5), shape = guide_legend(nrow = 4)) ## ----rnaseqcomp, message=FALSE, eval=FALSE------------------------------------ # library(rnaseqcomp) # library(biomaRt) # data(simdata) # houseHuman <- simdata$meta$gene[simdata$meta$house] # houseHuman <- gsub("\\.\\d+", "", houseHuman ) # # mart <- useMart( "ensembl", "mmusculus_gene_ensembl" ) # geneMap <- getBM( c( "ensembl_transcript_id", # "hsapiens_homolog_ensembl_gene", # "hsapiens_homolog_orthology_type" ), # mart = mart) # geneMap <- # geneMap[geneMap$`hsapiens_homolog_orthology_type` == "ortholog_one2one",] # houseMouse <- # geneMap$ensembl_transcript_id[geneMap$hsapiens_homolog_ensembl_gene %in% # houseHuman] # # allSB <- do.call( cbind, allSB ) # colData( allSB )$label <- gsub("\\d+$", "", rownames( colData( allSB ) ) ) # # condInfo <- # colData( allSB )[colData( allSB )$label == "kallisto-default","tissue"] # # replicateInfo <- condInfo # evaluationFeature <- rep( TRUE, length.out = nrow( allSB ) ) # # calibrationFeature <- rownames(allSB) %in% houseMouse # # unitReference <- 1 # quantificationList <- lapply( # split( seq_along( colnames( allSB ) ), colData( allSB )$label ), # function(x) { assay( allSB )[,x] }) # # compObject <- signalCalibrate( # quantificationList, # factor(condInfo), factor(replicateInfo), # evaluationFeature, calibrationFeature, unitReference, # calibrationFeature2 = calibrationFeature) # # compObject