## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width = 100, max.print = 1000) knitr::opts_chunk$set( eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), tidy.opts = list(width.cutoff = 100), tidy = FALSE) ## ----setting, eval=TRUE, echo=FALSE--------------------------------------------------------------- if (file.exists("spr_project")) unlink("spr_project", recursive = TRUE) is_win <- Sys.info()[['sysname']] != "Windows" ## ----load_library, eval=TRUE, include=FALSE------------------------------------------------------- suppressPackageStartupMessages({ library(systemPipeR) }) ## ----utilities, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Relevant features in `systemPipeR`. Workflow design concepts are illustrated under (A). Examples of `systemPipeR's` visualization functionalities are given under (B).", warning=FALSE---- knitr::include_graphics("images/utilities.png") ## ----general, warning= FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Overview of `systemPipeR` workflows management instances. A) A typical analysis workflow requires multiple software tools (red), as well the description of the input (green) and output files, including analysis reports (purple). B) `systemPipeR` provides multiple utilities to design and build a workflow, allowing multi-instance, integration of R code and command-line software, a simple and efficient annotation system, that allows automatic control of the input and output data, and multiple resources to manage the entire workflow. c) Options are provided to execute single or multiple workflow steps, while enabling scalability, checkpoints, and generation of technical and scientific reports.", warning=FALSE---- knitr::include_graphics("images/general.png") ## ----sysargslistImage, warning= FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Workflow steps with input/output file operations are controlled by the _`SYSargsList`_ container. Each of its components (_`SYSargs2`_) are constructed from an optional *targets* and two *param* files. Alternatively, _`LineWise`_ instances containing pure R code can be used.", warning=FALSE---- knitr::include_graphics("images/SYSargsList.png") ## ----sprandCWL, warning=FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Example of 'Hello World' message using CWL syntax and demonstrating the connectivity with `systemPipeR`. (A) This file describes the command-line tool, here using 'echo' command. (B) This file describes all the parameters variables connected with the tool specification file. Here the reference value of the input parameter can be specific or can be filled dynamically, adding a variable that connects with the targets files from `systemPipeR`. (C) `SYSargsList` function provides the 'inputvars' arguments to build the connectivity between the CWL description of parameters and the targets files. The argument requires a named vector where each vector element is required to be defined in the CWL description of parameters file (B), and the names of the elements are needed to match the column names defined in the targets file (D).", warning=FALSE---- knitr::include_graphics("images/SPR_CWL_hello.png") ## ----install, eval=FALSE-------------------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager") # BiocManager::install("systemPipeR") # BiocManager::install("systemPipeRdata") ## ----documentation, eval=FALSE-------------------------------------------------------------------- # library("systemPipeR") # Loads the package # library(help="systemPipeR") # Lists package info # vignette("systemPipeR") # Opens vignette ## ----generate_workenvir, eval=FALSE--------------------------------------------------------------- # genWorkenvir(workflow="rnaseq") # setwd("rnaseq") ## ----dir, eval=TRUE, echo=FALSE, warning= FALSE, out.width="100%", fig.align = "center", fig.cap= "*systemPipeR's* preconfigured directory structure.", warning=FALSE---- knitr::include_graphics("images/spr_project.png") ## ----targetsSE, eval=TRUE------------------------------------------------------------------------- targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") showDF(read.delim(targetspath, comment.char = "#")) ## ----targetsPE, eval=TRUE------------------------------------------------------------------------- targetspath <- system.file("extdata", "targetsPE.txt", package = "systemPipeR") showDF(read.delim(targetspath, comment.char = "#")) ## ----comment_lines, echo=TRUE--------------------------------------------------------------------- readLines(targetspath)[1:4] ## ----targetscomp, eval=TRUE----------------------------------------------------------------------- readComp(file = targetspath, format = "vector", delim = "-") ## ----targetsFig, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "_`systemPipeR`_ automatically creates the downstream `targets` files based on the previous steps outfiles. A) Usually, users provide the initial `targets` files, and this step will generate some outfiles, as demonstrated on B. Then, those files are used to build the new `targets` files as inputs in the next step. _`systemPipeR`_ (C) manages this connectivity among the steps automatically for the users.", warning=FALSE---- knitr::include_graphics("images/targets_con.png") ## ----cleaning1, eval=TRUE, include=FALSE---------------------------------------------------------- if (file.exists(".SPRproject")) unlink(".SPRproject", recursive = TRUE) ## NOTE: Removing previous project create in the quick starts section ## ----genNew_wf, eval=TRUE------------------------------------------------------------------------- systemPipeRdata::genWorkenvir(workflow = "new", mydirname = "spr_project") ## ----eval=FALSE, warning=FALSE-------------------------------------------------------------------- # setwd("spr_project") ## ----setting_dir, include=FALSE, warning=FALSE---------------------------------------------------- knitr::opts_knit$set(root.dir = 'spr_project') ## ----SPRproject_ex, eval=TRUE--------------------------------------------------------------------- sal <- SPRproject() ## ----importwf_example, eval=TRUE------------------------------------------------------------------ sal <- importWF(sal, file_path = system.file("extdata", "spr_simple_wf.Rmd", package = "systemPipeR"), verbose = FALSE) ## ----run_echo, eval=is_win------------------------------------------------------------------------ sal <- runWF(sal) ## ----plot_echo, eval=TRUE------------------------------------------------------------------------- plotWF(sal, width = "80%", rstudio = TRUE) ## ----status_echo, eval=TRUE----------------------------------------------------------------------- statusWF(sal) ## ----logs_echo, eval=FALSE------------------------------------------------------------------------ # sal <- renderLogs(sal) ## ----cleaning2, eval=TRUE, include=FALSE---------------------------------------------------------- if (file.exists(".SPRproject")) unlink(".SPRproject", recursive = TRUE) ## NOTE: Removing previous project create in the quick starts section ## ----SPRproject1, eval=TRUE----------------------------------------------------------------------- sal <- SPRproject(projPath = getwd(), overwrite = TRUE) ## ----SPRproject_logs, eval=FALSE------------------------------------------------------------------ # sal <- SPRproject(logs.dir= ".SPRproject", sys.file=".SPRproject/SYSargsList.yml") ## ----SPRproject_dir, eval=FALSE------------------------------------------------------------------- # sal <- SPRproject(data = "data", param = "param", results = "results") ## ----SPRproject_env, eval=FALSE------------------------------------------------------------------- # sal <- SPRproject(envir = new.env()) ## ----projectInfo, eval=TRUE----------------------------------------------------------------------- sal projectInfo(sal) ## ----length, eval=TRUE---------------------------------------------------------------------------- length(sal) ## ----sal_check, eval=TRUE------------------------------------------------------------------------- sal ## ----firstStep_R, eval=TRUE----------------------------------------------------------------------- appendStep(sal) <- LineWise(code = { mapply(function(x, y) write.csv(x, y), split(iris, factor(iris$Species)), file.path("results", paste0(names(split(iris, factor(iris$Species))), ".csv")) ) }, step_name = "export_iris") ## ----show, eval=TRUE------------------------------------------------------------------------------ sal ## ----codeLine, eval=TRUE-------------------------------------------------------------------------- codeLine(sal) ## ----gzip_secondStep, eval=TRUE------------------------------------------------------------------- targetspath <- system.file("extdata/cwl/gunzip", "targets_gunzip.txt", package = "systemPipeR") appendStep(sal) <- SYSargsList(step_name = "gzip", targets = targetspath, dir = TRUE, wf_file = "gunzip/workflow_gzip.cwl", input_file = "gunzip/gzip.yml", dir_path = system.file("extdata/cwl", package = "systemPipeR"), inputvars = c(FileName = "_FILE_PATH_", SampleName = "_SampleName_"), dependency = "export_iris") ## ------------------------------------------------------------------------------------------------- sal ## ------------------------------------------------------------------------------------------------- cmdlist(sal, step = "gzip") ## ------------------------------------------------------------------------------------------------- outfiles(sal) ## ----gunzip, eval=TRUE---------------------------------------------------------------------------- appendStep(sal) <- SYSargsList(step_name = "gunzip", targets = "gzip", dir = TRUE, wf_file = "gunzip/workflow_gunzip.cwl", input_file = "gunzip/gunzip.yml", dir_path = system.file("extdata/cwl", package = "systemPipeR"), inputvars = c(gzip_file = "_FILE_PATH_", SampleName = "_SampleName_"), rm_targets_col = "FileName", dependency = "gzip") ## ----targetsWF_3, eval=TRUE----------------------------------------------------------------------- targetsWF(sal[3]) ## ----outfiles_2, eval=TRUE------------------------------------------------------------------------ outfiles(sal[3]) ## ------------------------------------------------------------------------------------------------- sal ## ----eval=TRUE------------------------------------------------------------------------------------ cmdlist(sal["gzip"], targets = 1) ## ----getColumn, eval=TRUE------------------------------------------------------------------------- getColumn(sal, step = "gunzip", 'outfiles') ## ----iris_stats, eval=TRUE------------------------------------------------------------------------ appendStep(sal) <- LineWise(code = { df <- lapply(getColumn(sal, step = "gunzip", 'outfiles'), function(x) read.delim(x, sep = ",")[-1]) df <- do.call(rbind, df) stats <- data.frame(cbind(mean = apply(df[,1:4], 2, mean), sd = apply(df[,1:4], 2, sd))) stats$species <- rownames(stats) plot <- ggplot2::ggplot(stats, ggplot2::aes(x = species, y = mean, fill = species)) + ggplot2::geom_bar(stat = "identity", color = "black", position = ggplot2::position_dodge()) + ggplot2::geom_errorbar(ggplot2::aes(ymin = mean-sd, ymax = mean+sd), width = .2, position = ggplot2::position_dodge(.9)) }, step_name = "iris_stats", dependency = "gzip") ## ----importWF_rmd, eval=TRUE---------------------------------------------------------------------- sal_rmd <- SPRproject(logs.dir = ".SPRproject_rmd") sal_rmd <- importWF(sal_rmd, file_path = system.file("extdata", "spr_simple_wf.Rmd", package = "systemPipeR")) ## ----importWF_details----------------------------------------------------------------------------- stepsWF(sal_rmd) dependency(sal_rmd) codeLine(sal_rmd) targetsWF(sal_rmd) ## ----fromFile_example_rules_cmd, eval=FALSE------------------------------------------------------- # targetspath <- system.file("extdata/cwl/example/targets_example.txt", package = "systemPipeR") # appendStep(sal) <- SYSargsList(step_name = "Example", # targets = targetspath, # wf_file = "example/example.cwl", input_file = "example/example.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = c(Message = "_STRING_", SampleName = "_SAMPLE_")) ## ----fromFile_example_rules_r, eval=FALSE--------------------------------------------------------- # appendStep(sal) <- LineWise(code = { # library(systemPipeR) # }, # step_name = "load_lib") ## ----runWF, eval=is_win--------------------------------------------------------------------------- sal <- runWF(sal) ## ----runWF_error, eval=FALSE---------------------------------------------------------------------- # sal <- runWF(sal, steps = c(1,3)) ## ----runWF_force, eval=FALSE---------------------------------------------------------------------- # sal <- runWF(sal, force = TRUE, warning.stop = FALSE, error.stop = TRUE) ## ----runWF_env, eval=FALSE------------------------------------------------------------------------ # viewEnvir(sal) ## ----runWF_saveenv, eval=FALSE-------------------------------------------------------------------- # sal <- runWF(sal, saveEnv = TRUE) ## ----show_statusWF, eval=TRUE--------------------------------------------------------------------- sal ## ----statusWF, eval=TRUE-------------------------------------------------------------------------- statusWF(sal) ## ----runWF_cluster, eval=FALSE-------------------------------------------------------------------- # resources <- list(conffile=".batchtools.conf.R", # template="batchtools.slurm.tmpl", # Njobs=18, # walltime=120, ## minutes # ntasks=1, # ncpus=4, # memory=1024, ## Mb # partition = "short" # ) # sal <- addResources(sal, c("hisat2_mapping"), resources = resources) # sal <- runWF(sal) ## ----eval=TRUE------------------------------------------------------------------------------------ plotWF(sal, show_legend = TRUE, width = "80%", rstudio = TRUE) ## ----eval=FALSE----------------------------------------------------------------------------------- # sal <- renderLogs(sal) ## ----eval=FALSE----------------------------------------------------------------------------------- # sal2rmd(sal) ## ----eval=FALSE----------------------------------------------------------------------------------- # sal2bash(sal) ## ----SPR_resume, eval=FALSE----------------------------------------------------------------------- # sal <- SPRproject(resume = TRUE, logs.dir = ".SPRproject", # sys.file = ".SPRproject/SYSargsList.yml") ## ----resume_load, eval=FALSE---------------------------------------------------------------------- # sal <- SPRproject(resume = TRUE, load.envir = TRUE) ## ----envir, eval=FALSE---------------------------------------------------------------------------- # viewEnvir(sal) # copyEnvir(sal, list="plot", new.env = globalenv()) ## ----restart_load, eval=FALSE--------------------------------------------------------------------- # sal <- SPRproject(restart = TRUE, load.envir = FALSE) ## ----SPR_overwrite, eval=FALSE-------------------------------------------------------------------- # sal <- SPRproject(overwrite = TRUE) ## ------------------------------------------------------------------------------------------------- names(sal) ## ------------------------------------------------------------------------------------------------- length(sal) ## ------------------------------------------------------------------------------------------------- stepsWF(sal) ## ------------------------------------------------------------------------------------------------- cmdlist(sal, step = c(2,3), targets = 1) ## ------------------------------------------------------------------------------------------------- statusWF(sal) ## ------------------------------------------------------------------------------------------------- targetsWF(sal[2]) ## ------------------------------------------------------------------------------------------------- outfiles(sal[2]) ## ------------------------------------------------------------------------------------------------- dependency(sal) ## ----eval=FALSE----------------------------------------------------------------------------------- # targetsheader(sal, step = "Quality") ## ------------------------------------------------------------------------------------------------- stepName(sal) ## ------------------------------------------------------------------------------------------------- SampleName(sal, step = "gzip") SampleName(sal, step = "iris_stats") ## ------------------------------------------------------------------------------------------------- getColumn(sal, "outfiles", step = "gzip", column = "gzip_file") getColumn(sal, "targetsWF", step = "gzip", column = "FileName") ## ------------------------------------------------------------------------------------------------- codeLine(sal, step = "export_iris") ## ------------------------------------------------------------------------------------------------- viewEnvir(sal) ## ------------------------------------------------------------------------------------------------- copyEnvir(sal, list = c("plot"), new.env = globalenv(), silent = FALSE) ## ------------------------------------------------------------------------------------------------- yamlinput(sal, step = "gzip") ## ------------------------------------------------------------------------------------------------- sal[1] sal[1:3] sal[c(1,3)] ## ------------------------------------------------------------------------------------------------- sal_sub <- subset(sal, subset_steps = c( 2,3), input_targets = ("SE"), keep_steps = TRUE) stepsWF(sal_sub) targetsWF(sal_sub) outfiles(sal_sub) ## ----eval=FALSE----------------------------------------------------------------------------------- # sal[1] + sal[2] + sal[3] ## ----eval=TRUE------------------------------------------------------------------------------------ sal_c <- sal ## check values yamlinput(sal_c, step = "gzip") ## check on command-line cmdlist(sal_c, step = "gzip", targets = 1) ## Replace yamlinput(sal_c, step = "gzip", paramName = "ext") <- "txt.gz" ## check NEW values yamlinput(sal_c, step = "gzip") ## Check on command-line cmdlist(sal_c, step = "gzip", targets = 1) ## ----sal_lw_rep, eval=TRUE------------------------------------------------------------------------ appendCodeLine(sal_c, step = "export_iris", after = 1) <- "log_cal_100 <- log(100)" codeLine(sal_c, step = "export_iris") replaceCodeLine(sal_c, step="export_iris", line = 2) <- LineWise(code={ log_cal_100 <- log(50) }) codeLine(sal_c, step = 1) ## ------------------------------------------------------------------------------------------------- renameStep(sal_c, step = 1) <- "newStep" renameStep(sal_c, c(1, 2)) <- c("newStep2", "newIndex") sal_c names(outfiles(sal_c)) names(targetsWF(sal_c)) dependency(sal_c) ## ----eval=FALSE----------------------------------------------------------------------------------- # sal_test <- sal[c(1,2)] # replaceStep(sal_test, step = 1, step_name = "gunzip" ) <- sal[3] # sal_test ## ------------------------------------------------------------------------------------------------- sal_test <- sal[-2] sal_test ## ------------------------------------------------------------------------------------------------- dir_path <- system.file("extdata/cwl", package = "systemPipeR") cwl <- yaml::read_yaml(file.path(dir_path, "example/example.cwl")) ## ------------------------------------------------------------------------------------------------- cwl[1:2] ## ------------------------------------------------------------------------------------------------- cwl[3] ## ------------------------------------------------------------------------------------------------- cwl[4] ## ------------------------------------------------------------------------------------------------- cwl[5] ## ------------------------------------------------------------------------------------------------- cwl[6] ## ------------------------------------------------------------------------------------------------- cwl.wf <- yaml::read_yaml(file.path(dir_path, "example/workflow_example.cwl")) ## ------------------------------------------------------------------------------------------------- cwl.wf[1:2] ## ------------------------------------------------------------------------------------------------- cwl.wf[3] ## ------------------------------------------------------------------------------------------------- cwl.wf[4] ## ------------------------------------------------------------------------------------------------- cwl.wf[5] ## ------------------------------------------------------------------------------------------------- yaml::read_yaml(file.path(dir_path, "example/example_single.yml")) ## ----fromFile, eval=TRUE-------------------------------------------------------------------------- HW <- SYSargsList(wf_file = "example/workflow_example.cwl", input_file = "example/example_single.yml", dir_path = system.file("extdata/cwl", package = "systemPipeR")) HW cmdlist(HW) ## ------------------------------------------------------------------------------------------------- yml <- yaml::read_yaml(file.path(dir_path, "example/example.yml")) yml ## ------------------------------------------------------------------------------------------------- targetspath <- system.file("extdata/cwl/example/targets_example.txt", package = "systemPipeR") read.delim(targetspath, comment.char = "#") ## ----fromFile_example, eval=TRUE------------------------------------------------------------------ HW_mul <- SYSargsList(step_name = "echo", targets=targetspath, wf_file="example/workflow_example.cwl", input_file="example/example.yml", dir_path = dir_path, inputvars = c(Message = "_STRING_", SampleName = "_SAMPLE_")) HW_mul cmdlist(HW_mul) ## ----cmd, eval=TRUE------------------------------------------------------------------------------- command <- " hisat2 \ -S \ -x \ -k \ -min-intronlen \ -max-intronlen \ -threads \ -U " ## ------------------------------------------------------------------------------------------------- cmd <- createParam(command, writeParamFiles = FALSE) ## ----saving, eval=FALSE--------------------------------------------------------------------------- # writeParamFiles(cmd, overwrite = TRUE) ## ------------------------------------------------------------------------------------------------- printParam(cmd, position = "baseCommand") ## Print a baseCommand section printParam(cmd, position = "outputs") printParam(cmd, position = "inputs", index = 1:2) ## Print by index printParam(cmd, position = "inputs", index = -1:-2) ## Negative indexing printing to exclude certain indices in a position ## ------------------------------------------------------------------------------------------------- cmd2 <- subsetParam(cmd, position = "inputs", index = 1:2, trim = TRUE) cmdlist(cmd2) cmd2 <- subsetParam(cmd, position = "inputs", index = c("S", "x"), trim = TRUE) cmdlist(cmd2) ## ------------------------------------------------------------------------------------------------- cmd3 <- replaceParam(cmd, "base", index = 1, replace = list(baseCommand = "bwa")) cmdlist(cmd3) ## ------------------------------------------------------------------------------------------------- new_inputs <- new_inputs <- list( "new_input1" = list(type = "File", preF="-b", yml ="myfile"), "new_input2" = "-L " ) cmd4 <- replaceParam(cmd, "inputs", index = 1:2, replace = new_inputs) cmdlist(cmd4) ## ------------------------------------------------------------------------------------------------- newIn <- new_inputs <- list( "new_input1" = list(type = "File", preF="-b1", yml ="myfile1"), "new_input2" = list(type = "File", preF="-b2", yml ="myfile2"), "new_input3" = "-b3 " ) cmd5 <- appendParam(cmd, "inputs", index = 1:2, append = new_inputs) cmdlist(cmd5) cmd6 <- appendParam(cmd, "inputs", index = 1:2, after=0, append = new_inputs) cmdlist(cmd6) ## ------------------------------------------------------------------------------------------------- new_outs <- list( "sam_out" = "" ) cmd7 <- replaceParam(cmd, "outputs", index = 1, replace = new_outs) output(cmd7) ## ----sysargs2, eval=TRUE-------------------------------------------------------------------------- cmd <- " hisat2 \ -S \ -x \ -k \ -min-intronlen \ -max-intronlen \ -threads \ -U " WF <- createParam(cmd, overwrite = TRUE, writeParamFiles = TRUE, confirm = TRUE) targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") WF_test <- loadWorkflow(targets = targetspath, wf_file="hisat2.cwl", input_file="hisat2.yml", dir_path = "param/cwl/hisat2/") WF_test <- renderWF(WF_test, inputvars = c(FileName = "_FASTQ_PATH1_")) WF_test cmdlist(WF_test)[1:2] ## ----sysargs2_cwl_structure, echo = FALSE, eval=FALSE--------------------------------------------- # hisat2.cwl <- system.file("extdata", "cwl/hisat2/hisat2-mapping-se.cwl", package = "systemPipeR") # yaml::read_yaml(hisat2.cwl) ## ----sysargs2_yaml_structure, echo = FALSE, eval=FALSE-------------------------------------------- # hisat2.yml <- system.file("extdata", "cwl/hisat2/hisat2-mapping-se.yml", package = "systemPipeR") # yaml::read_yaml(hisat2.yml) ## ----SYSargs2_structure, eval=TRUE---------------------------------------------------------------- library(systemPipeR) targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") dir_path <- system.file("extdata/cwl", package = "systemPipeR") WF <- loadWF(targets = targetspath, wf_file = "hisat2/hisat2-mapping-se.cwl", input_file = "hisat2/hisat2-mapping-se.yml", dir_path = dir_path) WF <- renderWF(WF, inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_")) ## ----names_WF, eval=TRUE-------------------------------------------------------------------------- names(WF) ## ----cmdlist, eval=TRUE--------------------------------------------------------------------------- cmdlist(WF)[1] ## ----output_WF, eval=TRUE------------------------------------------------------------------------- output(WF)[1] ## ----targets_WF, eval=TRUE------------------------------------------------------------------------ targets(WF)[1] as(WF, "DataFrame") ## ----module_WF, eval=TRUE------------------------------------------------------------------------- modules(WF) ## ----other_WF, eval=FALSE------------------------------------------------------------------------- # files(WF) # inputvars(WF) ## ----lw, eval=TRUE-------------------------------------------------------------------------------- rmd <- system.file("extdata", "spr_simple_lw.Rmd", package = "systemPipeR") sal_lw <- SPRproject(overwrite = TRUE) sal_lw <- importWF(sal_lw, rmd, verbose = FALSE) codeLine(sal_lw) ## ----lw_coerce, eval=TRUE------------------------------------------------------------------------- lw <- stepsWF(sal_lw)[[2]] ## Coerce ll <- as(lw, "list") class(ll) lw <- as(ll, "LineWise") lw ## ----lw_access, eval=TRUE------------------------------------------------------------------------- length(lw) names(lw) codeLine(lw) codeChunkStart(lw) rmdPath(lw) ## ----lw_sub, eval=TRUE---------------------------------------------------------------------------- l <- lw[2] codeLine(l) l_sub <- lw[-2] codeLine(l_sub) ## ----lw_rep, eval=TRUE---------------------------------------------------------------------------- replaceCodeLine(lw, line = 2) <- "5+5" codeLine(lw) appendCodeLine(lw, after = 0) <- "6+7" codeLine(lw) ## ----sal_rep_append, eval=FALSE------------------------------------------------------------------- # replaceCodeLine(sal_lw, step = 2, line = 2) <- LineWise(code={ # "5+5" # }) # codeLine(sal_lw, step = 2) # # appendCodeLine(sal_lw, step = 2) <- "66+55" # codeLine(sal_lw, step = 2) # # appendCodeLine(sal_lw, step = 1, after = 1) <- "66+55" # codeLine(sal_lw, step = 1) ## ----eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Workflow design structure of *`systemPipeR`* using previous version of *`SYSargs`*", warning=FALSE---- # knitr::include_graphics(system.file("extdata/images", "SystemPipeR_Workflow.png", package = "systemPipeR")) ## ----table_tools, echo=FALSE, message=FALSE------------------------------------------------------- library(magrittr) SPR_software <- system.file("extdata", "SPR_software.csv", package = "systemPipeR") software <- read.delim(SPR_software, sep = ",", comment.char = "#") colors <- colorRampPalette((c("darkseagreen", "indianred1")))(length(unique(software$Category))) id <- as.numeric(c((unique(software$Category)))) software %>% dplyr::mutate(Step = kableExtra::cell_spec(Step, color = "white", bold = TRUE, background = factor(Category, id, colors) )) %>% dplyr::select(Tool, Description, Step) %>% dplyr::arrange(Tool) %>% kableExtra::kable(escape = FALSE, align = "c", col.names = c("Tool Name", "Description", "Step")) %>% kableExtra::kable_styling(c("striped", "hover", "condensed"), full_width = TRUE) %>% kableExtra::scroll_box(width = "80%", height = "500px") ## ----cleaning3, eval=TRUE, include=FALSE---------------------------------------------------------- if (file.exists(".SPRproject")) unlink(".SPRproject", recursive = TRUE) ## NOTE: Removing previous project create in the quick starts section ## ----SPRproject2, eval=FALSE---------------------------------------------------------------------- # sal <- SPRproject() ## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------------ # appendStep(sal) <- LineWise({ # library(systemPipeR) # }, # step_name = "load_SPR") ## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------- # appendStep(sal) <- SYSargsList( # step_name = "preprocessing", # targets = "targetsPE.txt", dir = TRUE, # wf_file = "preprocessReads/preprocessReads-pe.cwl", # input_file = "preprocessReads/preprocessReads-pe.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_" # ), # dependency = c("load_SPR")) ## ----custom_preprocessing_function, eval=FALSE---------------------------------------------------- # appendStep(sal) <- LineWise( # code = { # filterFct <- function(fq, cutoff = 20, Nexceptions = 0) { # qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm = TRUE) # # Retains reads where Phred scores are >= cutoff with N exceptions # fq[qcount <= Nexceptions] # } # save(list = ls(), file = "param/customFCT.RData") # }, # step_name = "custom_preprocessing_function", # dependency = "preprocessing" # ) ## ----editing_preprocessing, message=FALSE, eval=FALSE--------------------------------------------- # yamlinput(sal, "preprocessing")$Fct # yamlinput(sal, "preprocessing", "Fct") <- "'filterFct(fq, cutoff=20, Nexceptions=0)'" # yamlinput(sal, "preprocessing")$Fct ## check the new function # cmdlist(sal, "preprocessing", targets = 1) ## check if the command line was updated with success ## ----trimGalore, eval=FALSE, spr=TRUE------------------------------------------------------------- # targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") # appendStep(sal) <- SYSargsList(step_name = "trimGalore", # targets = targetspath, dir = TRUE, # wf_file = "trim_galore/trim_galore-se.cwl", # input_file = "trim_galore/trim_galore-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_"), # dependency = "load_SPR", # run_step = "optional") ## ----trimmomatic, eval=FALSE, spr=TRUE------------------------------------------------------------ # targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") # appendStep(sal) <- SYSargsList(step_name = "trimmomatic", # targets = targetspath, dir = TRUE, # wf_file = "trimmomatic/trimmomatic-se.cwl", # input_file = "trimmomatic/trimmomatic-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_"), # dependency = "load_SPR", # run_step = "optional") ## ----fastq_report, eval=FALSE, message=FALSE, spr=TRUE-------------------------------------------- # appendStep(sal) <- LineWise(code = { # fastq <- getColumn(sal, step = "preprocessing", "targetsWF", column = 1) # fqlist <- seeFastq(fastq = fastq, batchsize = 10000, klength = 8) # pdf("./results/fastqReport.pdf", height = 18, width = 4*length(fqlist)) # seeFastqPlot(fqlist) # dev.off() # }, step_name = "fastq_report", # dependency = "preprocessing") ## ----hisat_index, eval=FALSE, spr=TRUE------------------------------------------------------------ # appendStep(sal) <- SYSargsList(step_name = "hisat_index", # targets = NULL, dir = FALSE, # wf_file = "hisat2/hisat2-index.cwl", # input_file = "hisat2/hisat2-index.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = NULL, # dependency = "preprocessing") ## ----hisat_mapping_samtools, eval=FALSE, spr=TRUE------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "hisat_mapping", # targets = "preprocessing", dir = TRUE, # wf_file = "workflow-hisat2/workflow_hisat2-se.cwl", # input_file = "workflow-hisat2/workflow_hisat2-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"), # dependency = c("hisat_index"), # run_session = "remote") ## ----bowtie_index, eval=FALSE, spr=TRUE----------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "bowtie_index", # targets = NULL, dir = FALSE, # wf_file = "bowtie2/bowtie2-index.cwl", # input_file = "bowtie2/bowtie2-index.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = NULL, # dependency = "preprocessing", # run_step = "optional") ## ----tophat2_mapping, eval=FALSE, spr=TRUE-------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "tophat2_mapping", # targets = "preprocessing", dir = TRUE, # wf_file = "tophat2/workflow_tophat2-mapping-se.cwl", # input_file = "tophat2/tophat2-mapping-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"), # dependency = c("bowtie_index"), # run_session = "remote", # run_step = "optional") ## ----bowtie2_mapping, eval=FALSE, spr=TRUE-------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "bowtie2_mapping", # targets = "preprocessing", dir = TRUE, # wf_file = "bowtie2/workflow_bowtie2-mapping-se.cwl", # input_file = "bowtie2/bowtie2-mapping-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"), # dependency = c("bowtie_index"), # run_session = "remote", # run_step = "optional") ## ----bwa_index, eval=FALSE, spr=TRUE-------------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "bwa_index", # targets = NULL, dir = FALSE, # wf_file = "bwa/bwa-index.cwl", # input_file = "bwa/bwa-index.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = NULL, # dependency = "preprocessing", # run_step = "optional") ## ----bwa_mapping, eval=FALSE, spr=TRUE------------------------------------------------------------ # appendStep(sal) <- SYSargsList(step_name = "bwa_mapping", # targets = "preprocessing", dir = TRUE, # wf_file = "bwa/bwa-se.cwl", # input_file = "bwa/bwa-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"), # dependency = c("bwa_index"), # run_session = "remote", # run_step = "optional") ## ----rsubread_index, eval=FALSE, spr=TRUE--------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "rsubread_index", # targets = NULL, dir = FALSE, # wf_file = "rsubread/rsubread-index.cwl", # input_file = "rsubread/rsubread-index.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = NULL, # dependency = "preprocessing", # run_step = "optional") ## ----rsubread_mapping, eval=FALSE, spr=TRUE------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "rsubread", # targets = "preprocessing", dir = TRUE, # wf_file = "rsubread/rsubread-mapping-se.cwl", # input_file = "rsubread/rsubread-mapping-se.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"), # dependency = c("rsubread_index"), # run_session = "remote", # run_step = "optional") ## ----gsnap_index, eval=FALSE, spr=TRUE------------------------------------------------------------ # appendStep(sal) <- SYSargsList(step_name = "gsnap_index", # targets = NULL, dir = FALSE, # wf_file = "gsnap/gsnap-index.cwl", # input_file = "gsnap/gsnap-index.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = NULL, # dependency = "preprocessing", # run_step = "optional") ## ----gsnap_mapping, eval=FALSE, spr=TRUE---------------------------------------------------------- # appendStep(sal) <- SYSargsList(step_name = "gsnap", # targets = "targetsPE.txt", dir = TRUE, # wf_file = "gsnap/gsnap-mapping-pe.cwl", # input_file = "gsnap/gsnap-mapping-pe.yml", # dir_path = system.file("extdata/cwl", package = "systemPipeR"), # inputvars = c(FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", SampleName = "_SampleName_"), # dependency = c("gsnap_index"), # run_session = "remote", # run_step = "optional") ## ----bam_IGV, eval=FALSE, spr=TRUE---------------------------------------------------------------- # appendStep(sal) <- LineWise( # code = { # bampaths <- getColumn(sal, step = "hisat2_mapping", "outfiles", # column = "samtools_sort_bam") # symLink2bam( # sysargs = bampaths, htmldir = c("~/.html/", "somedir/"), # urlbase = "http://cluster.hpcc.ucr.edu/~tgirke/", # urlfile = "./results/IGVurl.txt") # }, # step_name = "bam_IGV", # dependency = "hisat2_mapping", # run_step = "optional" # ) ## ----create_txdb, message=FALSE, eval=FALSE, spr=TRUE--------------------------------------------- # appendStep(sal) <- LineWise(code = { # library(txdbmaker) # txdb <- makeTxDbFromGFF(file="data/tair10.gff", format="gff", # dataSource="TAIR", organism="Arabidopsis thaliana") # saveDb(txdb, file="./data/tair10.sqlite") # }, # step_name = "create_txdb", # dependency = "hisat_mapping") ## ----read_counting, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------- # appendStep(sal) <- LineWise({ # library(BiocParallel) # txdb <- loadDb("./data/tair10.sqlite") # eByg <- exonsBy(txdb, by="gene") # outpaths <- getColumn(sal, step = "hisat_mapping", 'outfiles', column = 2) # bfl <- BamFileList(outpaths, yieldSize=50000, index=character()) # multicoreParam <- MulticoreParam(workers=4); register(multicoreParam); registered() # counteByg <- bplapply(bfl, function(x) summarizeOverlaps(eByg, x, mode="Union", # ignore.strand=TRUE, # inter.feature=TRUE, # singleEnd=TRUE)) # # Note: for strand-specific RNA-Seq set 'ignore.strand=FALSE' and for PE data set 'singleEnd=FALSE' # countDFeByg <- sapply(seq(along=counteByg), # function(x) assays(counteByg[[x]])$counts) # rownames(countDFeByg) <- names(rowRanges(counteByg[[1]])) # colnames(countDFeByg) <- names(bfl) # rpkmDFeByg <- apply(countDFeByg, 2, function(x) returnRPKM(counts=x, ranges=eByg)) # write.table(countDFeByg, "results/countDFeByg.xls", # col.names=NA, quote=FALSE, sep="\t") # write.table(rpkmDFeByg, "results/rpkmDFeByg.xls", # col.names=NA, quote=FALSE, sep="\t") # }, # step_name = "read_counting", # dependency = "create_txdb") ## ----align_stats, message=FALSE, eval=FALSE, spr=TRUE--------------------------------------------- # appendStep(sal) <- LineWise({ # read_statsDF <- alignStats(args) # write.table(read_statsDF, "results/alignStats.xls", # row.names = FALSE, quote = FALSE, sep = "\t") # }, # step_name = "align_stats", # dependency = "hisat_mapping") ## ----align_stats2, eval=TRUE---------------------------------------------------------------------- read.table(system.file("extdata", "alignStats.xls", package = "systemPipeR"), header = TRUE)[1:4,] ## ----read_counting_mirna, message=FALSE, eval=FALSE, spr=TRUE------------------------------------- # appendStep(sal) <- LineWise({ # system("wget https://www.mirbase.org/ftp/CURRENT/genomes/ath.gff3 -P ./data/") # gff <- rtracklayer::import.gff("./data/ath.gff3") # gff <- split(gff, elementMetadata(gff)$ID) # bams <- getColumn(sal, step = "bowtie2_mapping", 'outfiles', column = 2) # bfl <- BamFileList(bams, yieldSize=50000, index=character()) # countDFmiR <- summarizeOverlaps(gff, bfl, mode="Union", # ignore.strand = FALSE, inter.feature = FALSE) # countDFmiR <- assays(countDFmiR)$counts # # Note: inter.feature=FALSE important since pre and mature miRNA ranges overlap # rpkmDFmiR <- apply(countDFmiR, 2, function(x) returnRPKM(counts = x, ranges = gff)) # write.table(assays(countDFmiR)$counts, "results/countDFmiR.xls", # col.names=NA, quote=FALSE, sep="\t") # write.table(rpkmDFmiR, "results/rpkmDFmiR.xls", col.names=NA, quote=FALSE, sep="\t") # }, # step_name = "read_counting_mirna", # dependency = "bowtie2_mapping") ## ----sample_tree_rlog, message=FALSE, eval=FALSE, spr=TRUE---------------------------------------- # appendStep(sal) <- LineWise({ # library(DESeq2, warn.conflicts=FALSE, quietly=TRUE) # library(ape, warn.conflicts=FALSE) # countDFpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR") # countDF <- as.matrix(read.table(countDFpath)) # colData <- data.frame(row.names = targetsWF(sal)[[2]]$SampleName, # condition=targetsWF(sal)[[2]]$Factor) # dds <- DESeqDataSetFromMatrix(countData = countDF, colData = colData, # design = ~ condition) # d <- cor(assay(rlog(dds)), method = "spearman") # hc <- hclust(dist(1-d)) # plot.phylo(as.phylo(hc), type = "p", edge.col = 4, edge.width = 3, # show.node.label = TRUE, no.margin = TRUE) # }, # step_name = "sample_tree_rlog", # dependency = "read_counting") ## ----edger, message=FALSE, eval=FALSE, spr=TRUE--------------------------------------------------- # appendStep(sal) <- LineWise({ # targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") # targets <- read.delim(targetspath, comment = "#") # cmp <- readComp(file = targetspath, format = "matrix", delim = "-") # countDFeBygpath <- system.file("extdata", "countDFeByg.xls", package = "systemPipeR") # countDFeByg <- read.delim(countDFeBygpath, row.names = 1) # edgeDF <- run_edgeR(countDF = countDFeByg, targets = targets, cmp = cmp[[1]], # independent = FALSE, mdsplot = "") # DEG_list <- filterDEGs(degDF = edgeDF, filter = c(Fold = 2, FDR = 10)) # }, # step_name = "edger", # dependency = "read_counting") ## ----deseq2, message=FALSE, eval=FALSE, spr=TRUE-------------------------------------------------- # appendStep(sal) <- LineWise({ # degseqDF <- run_DESeq2(countDF=countDFeByg, targets=targets, cmp=cmp[[1]], # independent=FALSE) # DEG_list2 <- filterDEGs(degDF=degseqDF, filter=c(Fold=2, FDR=10)) # }, # step_name = "deseq2", # dependency = "read_counting") ## ----vennplot, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------------ # appendStep(sal) <- LineWise({ # vennsetup <- overLapper(DEG_list$Up[6:9], type="vennsets") # vennsetdown <- overLapper(DEG_list$Down[6:9], type="vennsets") # vennPlot(list(vennsetup, vennsetdown), mymain="", mysub="", # colmode=2, ccol=c("blue", "red")) # }, # step_name = "vennplot", # dependency = "edger") ## ----get_go_biomart, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------ # appendStep(sal) <- LineWise({ # library("biomaRt") # listMarts() # To choose BioMart database # listMarts(host="plants.ensembl.org") # m <- useMart("plants_mart", host="https://plants.ensembl.org") # listDatasets(m) # m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="https://plants.ensembl.org") # listAttributes(m) # Choose data types you want to download # go <- getBM(attributes=c("go_id", "tair_locus", "namespace_1003"), mart=m) # go <- go[go[,3]!="",]; go[,3] <- as.character(go[,3]) # go[go[,3]=="molecular_function", 3] <- "F" # go[go[,3]=="biological_process", 3] <- "P" # go[go[,3]=="cellular_component", 3] <- "C" # go[1:4,] # dir.create("./data/GO") # write.table(go, "data/GO/GOannotationsBiomart_mod.txt", # quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t") # catdb <- makeCATdb(myfile="data/GO/GOannotationsBiomart_mod.txt", # lib=NULL, org="", colno=c(1,2,3), idconv=NULL) # save(catdb, file="data/GO/catdb.RData") # }, # step_name = "get_go_biomart", # dependency = "edger") ## ----go_enrichment, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------- # appendStep(sal) <- LineWise({ # load("data/GO/catdb.RData") # DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=50), plot=FALSE) # up_down <- DEG_list$UporDown; names(up_down) <- paste(names(up_down), "_up_down", sep="") # up <- DEG_list$Up; names(up) <- paste(names(up), "_up", sep="") # down <- DEG_list$Down; names(down) <- paste(names(down), "_down", sep="") # DEGlist <- c(up_down, up, down) # DEGlist <- DEGlist[sapply(DEGlist, length) > 0] # BatchResult <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="all", # id_type="gene", CLSZ=2, cutoff=0.9, # gocats=c("MF", "BP", "CC"), recordSpecGO=NULL) # library("biomaRt") # m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="https://plants.ensembl.org") # goslimvec <- as.character(getBM(attributes=c("goslim_goa_accession"), mart=m)[,1]) # BatchResultslim <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="slim", # id_type="gene", myslimv=goslimvec, CLSZ=10, # cutoff=0.01, gocats=c("MF", "BP", "CC"), # recordSpecGO=NULL) # gos <- BatchResultslim[grep("M6-V6_up_down", BatchResultslim$CLID), ] # gos <- BatchResultslim # pdf("GOslimbarplotMF.pdf", height=8, width=10); goBarplot(gos, gocat="MF"); dev.off() # goBarplot(gos, gocat="BP") # goBarplot(gos, gocat="CC") # }, # step_name = "go_enrichment", # dependency = "get_go_biomart") ## ----hierarchical_clustering, message=FALSE, eval=FALSE, spr=TRUE--------------------------------- # appendStep(sal) <- LineWise({ # library(pheatmap) # geneids <- unique(as.character(unlist(DEG_list[[1]]))) # y <- assay(rlog(dds))[geneids, ] # pdf("heatmap1.pdf") # pheatmap(y, scale="row", clustering_distance_rows="correlation", # clustering_distance_cols="correlation") # dev.off() # }, # step_name = "hierarchical_clustering", # dependency = c("sample_tree_rlog", "edgeR")) ## ------------------------------------------------------------------------------------------------- plotWF(sal, show_legend = TRUE, width = "80%") ## ----runWF_steps, eval=FALSE---------------------------------------------------------------------- # sal <- runWF(sal) ## ----runWF_cluster_steps, eval=FALSE-------------------------------------------------------------- # resources <- list(conffile=".batchtools.conf.R", # template="batchtools.slurm.tmpl", # Njobs=18, # walltime=120, ## minutes # ntasks=1, # ncpus=4, # memory=1024, ## Mb # partition = "short" # ) # sal <- addResources(sal, c("hisat2_mapping"), resources = resources) # sal <- runWF(sal) ## ----statusWF_steps, eval=FALSE------------------------------------------------------------------- # sal # statusWF(sal) ## ----logsWF_steps, eval=FALSE--------------------------------------------------------------------- # sal <- renderLogs(sal) ## ----quiting, eval=TRUE, echo=FALSE--------------------------------------------------------------- knitr::opts_knit$set(root.dir = '../') unlink("rnaseq", recursive = TRUE) ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()