## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## -----------------------------------------------------------------------------
library(SpliceWiz)

## ----eval = FALSE-------------------------------------------------------------
# ref_path <- "./Reference"

## ----eval=FALSE---------------------------------------------------------------
# buildRef(
#     reference_path = ref_path,
#     fasta = "genome.fa", gtf = "transcripts.gtf",
#     genome_type = "hg38"
# )

## ----eval=FALSE---------------------------------------------------------------
# getResources(
#     reference_path = ref_path,
#     fasta = "genome.fa",
#     gtf = "transcripts.gtf"
# )
# 
# buildRef(
#     reference_path = ref_path,
#     fasta = "", gtf = "",
#     genome_type = "hg38"
# )

## ----eval=FALSE---------------------------------------------------------------
# buildRef(
#     reference_path = ref_path,
#     fasta = "genome.fa",
#     gtf = "transcripts.gtf"
#     genome_type = "hg38"
# )

## ----eval=FALSE---------------------------------------------------------------
# # Assuming hg38 genome:
# 
# buildRef(
#     reference_path = ref_path,
#     genome_type = "hg38",
#     overwrite = TRUE
# )

## ----eval=FALSE---------------------------------------------------------------
# FTP <- "ftp://ftp.ensembl.org/pub/release-94/"
# 
# buildRef(
#     reference_path = ref_path,
#     fasta = paste0(FTP, "fasta/homo_sapiens/dna/",
#         "Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"),
#     gtf = paste0(FTP, "gtf/homo_sapiens/",
#         "Homo_sapiens.GRCh38.94.chr.gtf.gz"),
#     genome_type = "hg38"
# )

## -----------------------------------------------------------------------------
require(AnnotationHub)

ah <- AnnotationHub()
query(ah, "Ensembl")

## -----------------------------------------------------------------------------
query(ah, c("Homo Sapiens", "release-94"))

## ----eval=FALSE---------------------------------------------------------------
# buildRef(
#     reference_path = ref_path,
#     fasta = "AH65745",
#     gtf = "AH64631",
#     genome_type = "hg38"
# )

## ----eval=FALSE---------------------------------------------------------------
# buildRef(
#     reference_path = ref_path,
#     fasta = "genome.fa", gtf = "transcripts.gtf",
#     genome_type = ""
# )

## -----------------------------------------------------------------------------
getAvailableGO()

## ----eval=FALSE---------------------------------------------------------------
# buildRef(
#     reference_path = ref_path,
#     fasta = "genome.fa", gtf = "transcripts.gtf",
#     genome_type = "",
#     ontologySpecies = "Arabidopsis thaliana"
# )

## -----------------------------------------------------------------------------
STAR_version()

## ----eval = FALSE-------------------------------------------------------------
# ref_path = "./Reference"
# 
# # Ensure genome resources are prepared from genome FASTA and GTF file:
# 
# if(!dir.exists(file.path(ref_path, "resource"))) {
#     getResources(
#         reference_path = ref_path,
#         fasta = "genome.fa",
#         gtf = "transcripts.gtf"
#     )
# }
# 
# # Generate a STAR genome reference:
# STAR_BuildRef(
#     reference_path = ref_path,
#     n_threads = 8
# )
# 

## ----eval = FALSE-------------------------------------------------------------
# STAR_BuildRef(
#     reference_path = ref_path,
#     STAR_ref_path = "/path/to/another/directory",
#     n_threads = 8
# )

## ----eval = FALSE-------------------------------------------------------------
# # Generate a STAR genome reference:
# STAR_buildGenome(
#     reference_path = ref_path,
#     STAR_ref_path = "/path/to/hg38"
#     n_threads = 8
# )

## ----eval = FALSE-------------------------------------------------------------
# STAR_new_ref <- STAR_loadGenomeGTF(
#     reference_path = ref_path,
#     STAR_ref_path = "/path/to/hg38",
#     STARgenome_output = file.path(tempdir(), "STAR"),
#     n_threads = 8,
#     sjdbOverhang = 100,
#     extraFASTA = "./ercc.fasta"
# )

## ----eval = FALSE-------------------------------------------------------------
# STAR_mappability(
#   reference_path = ref_path,
#   STAR_ref_path = file.path(ref_path, "STAR"),
#   map_depth_threshold = 4,
#   n_threads = 8,
#   read_len = 70,
#   read_stride = 10,
#   error_pos = 35
# )

## ----eval=FALSE---------------------------------------------------------------
# buildFullRef(
#     reference_path = ref_path,
#     fasta = "genome.fa", gtf = "transcripts.gtf",
#     genome_type = "",
#     use_STAR_mappability = TRUE,
#     n_threads = 8
# )

## ----eval = FALSE-------------------------------------------------------------
# require(Rsubread)
# 
# # (1a) Creates genome resource files
# 
# ref_path <- file.path(tempdir(), "Reference")
# 
# getResources(
#     reference_path = ref_path,
#     fasta = chrZ_genome(),
#     gtf = chrZ_gtf()
# )
# 
# # (1b) Systematically generate reads based on the SpliceWiz example genome:
# 
# generateSyntheticReads(
#     reference_path = ref_path
# )
# 
# # (2) Align the generated reads using Rsubread:
# 
# # (2a) Build the Rsubread genome index:
# 
# subreadIndexPath <- file.path(ref_path, "Rsubread")
# if(!dir.exists(subreadIndexPath)) dir.create(subreadIndexPath)
# Rsubread::buildindex(
#     basename = file.path(subreadIndexPath, "reference_index"),
#     reference = chrZ_genome()
# )
# 
# # (2b) Align the synthetic reads using Rsubread::subjunc()
# 
# Rsubread::subjunc(
#     index = file.path(subreadIndexPath, "reference_index"),
#     readfile1 = file.path(ref_path, "Mappability", "Reads.fa"),
#     output_file = file.path(ref_path, "Mappability", "AlignedReads.bam"),
#     useAnnotation = TRUE,
#     annot.ext = chrZ_gtf(),
#     isGTF = TRUE
# )
# 
# # (3) Analyse the aligned reads in the BAM file for low-mappability regions:
# 
# calculateMappability(
#     reference_path = ref_path,
#     aligned_bam = file.path(ref_path, "Mappability", "AlignedReads.bam")
# )
# 
# # (4) Build the SpliceWiz reference using the calculated Mappability Exclusions
# 
# buildRef(ref_path)
# 

## ----eval = FALSE-------------------------------------------------------------
# buildRef(ref_path, genome_type = "hg38")

## -----------------------------------------------------------------------------
STAR_version()

## ----eval = FALSE-------------------------------------------------------------
# STAR_alignReads(
#     fastq_1 = "sample1_1.fastq", fastq_2 = "sample1_2.fastq",
#     STAR_ref_path = file.path(ref_path, "STAR"),
#     BAM_output_path = "./bams/sample1",
#     n_threads = 8,
#     trim_adaptor = "AGATCGGAAG"
# )

## ----eval = FALSE-------------------------------------------------------------
# Experiment <- data.frame(
#     sample = c("sample_A", "sample_B"),
#     forward = file.path("raw_data", c("sample_A", "sample_B"),
#         c("sample_A_1.fastq", "sample_B_1.fastq")),
#     reverse = file.path("raw_data", c("sample_A", "sample_B"),
#         c("sample_A_2.fastq", "sample_B_2.fastq"))
# )
# 
# STAR_alignExperiment(
#     Experiment = Experiment,
#     STAR_ref_path = file.path("Reference_FTP", "STAR"),
#     BAM_output_path = "./bams",
#     n_threads = 8,
#     two_pass = FALSE
# )

## ----eval = FALSE-------------------------------------------------------------
# # Assuming sequencing files are named by their respective sample names
# fastq_files <- findFASTQ(
#     sample_path = "./sequencing_files",
#     paired = TRUE,
#     fastq_suffix = ".fq.gz", level = 0
# )

## ----eval = FALSE-------------------------------------------------------------
# STAR_alignExperiment(
#     Experiment = fastq_files,
#     STAR_ref_path = file.path("Reference_FTP", "STAR"),
#     BAM_output_path = "./bams",
#     n_threads = 8,
#     two_pass = FALSE
# )

## ----eval=FALSE---------------------------------------------------------------
# bams <- findBAMS("./bams", level = 1)

## ----eval=FALSE---------------------------------------------------------------
# # assume SpliceWiz reference has been generated in `ref_path` using the
# # `buildRef()` function.
# 
# processBAM(
#     bamfiles = bams$path,
#     sample_names = bams$sample,
#     reference_path = ref_path,
#     output_path = "./pb_output",
#     n_threads = 4,
#     useOpenMP = TRUE
# )

## ----eval=FALSE---------------------------------------------------------------
# BAM2COV(
#     bamfiles = bams$path,
#     sample_names = bams$sample,
#     output_path = "./cov_output",
#     n_threads = 4,
#     useOpenMP = TRUE
# )

## -----------------------------------------------------------------------------
se <- SpliceWiz_example_NxtSE()

cov_file <- covfile(se)[1]

cov_negstrand <- getCoverage(cov_file, strand = "-")
bw_file <- file.path(tempdir(), "sample_negstrand.bw")
rtracklayer::export(cov_negstrand, bw_file, "bw")

## ----eval=FALSE---------------------------------------------------------------
# expr <- findSpliceWizOutput("./pb_output")

## ----eval = FALSE-------------------------------------------------------------
# collateData(
#     Experiment = expr,
#     reference_path = ref_path,
#     output_path = "./NxtSE_output"
# )

## ----eval = FALSE-------------------------------------------------------------
# collateData(
#     Experiment = expr,
#     reference_path = ref_path,
#     output_path = "./NxtSE_output_novelSplicing",
#     novelSplicing = TRUE
# )

## ----eval = FALSE-------------------------------------------------------------
# se <- makeSE("./NxtSE_output")

## -----------------------------------------------------------------------------
sessionInfo()