## ----GenSetup, include = FALSE------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ## Related to ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html ) ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), BiocStyle = citation("BiocStyle")[1], knitr = citation("knitr")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], ISAnalytics = citation("ISAnalytics")[1], VISPA2 = BibEntry( bibtype = "Article", title = paste( "VISPA2: a scalable pipeline for", "high-throughput identification and", "annotation of vector integration sites" ), author = "Giulio Spinozzi, Andrea Calabria, Stefano Brasca", journaltitle = "BMC Bioinformatics", date = "2017-11-25", doi = "10.1186/s12859-017-1937-9" ), eulerr = citation("eulerr")[1] ) library(ISAnalytics) library(BiocStyle) library(DT) main_fig <- fs::path("../man", "figures", "dyn_vars_general.png") ## ----------------------------------------------------------------------------- inspect_tags("chromosome") ## ----echo=FALSE--------------------------------------------------------------- all_tags <- available_tags() all_tags <- all_tags |> dplyr::mutate(needed_in = purrr::map_chr( .data$needed_in, ~ paste0(.x, collapse = ", ") )) mand_tags <- all_tags |> dplyr::filter(.data$dyn_vars_tbl == "mand_vars") |> dplyr::select(dplyr::all_of(c("tag", "needed_in", "description"))) annot_tags <- all_tags |> dplyr::filter(.data$dyn_vars_tbl == "annot_vars") |> dplyr::select(dplyr::all_of(c("tag", "needed_in", "description"))) af_tags <- all_tags |> dplyr::filter(.data$dyn_vars_tbl == "af_vars") |> dplyr::select(dplyr::all_of(c("tag", "needed_in", "description"))) iss_tags <- all_tags |> dplyr::filter(.data$dyn_vars_tbl == "iss_vars") |> dplyr::select(dplyr::all_of(c("tag", "needed_in", "description"))) ## ----echo=FALSE--------------------------------------------------------------- datatable( mand_tags, class = "stripe", options = list(dom = "t"), rownames = FALSE ) ## ----echo=FALSE--------------------------------------------------------------- datatable( annot_tags, class = "stripe", options = list(dom = "t"), rownames = FALSE ) ## ----echo=FALSE--------------------------------------------------------------- datatable( af_tags, class = "stripe", options = list(dom = "t"), rownames = FALSE ) ## ----echo=FALSE--------------------------------------------------------------- datatable( iss_tags, class = "stripe", options = list(dom = "t"), rownames = FALSE ) ## ----------------------------------------------------------------------------- mandatory_IS_vars(TRUE) ## ----------------------------------------------------------------------------- new_mand_vars <- tibble::tribble( ~names, ~types, ~transform, ~flag, ~tag, "chrom", "char", ~ stringr::str_replace_all(.x, "chr", ""), "required", "chromosome", "position", "int", NULL, "required", "locus", "strand", "char", NULL, "required", "is_strand", "gap", "int", NULL, "required", NA_character_, "junction", "int", NULL, "required", NA_character_ ) ## ----------------------------------------------------------------------------- set_mandatory_IS_vars(new_mand_vars) mandatory_IS_vars(TRUE) ## ----------------------------------------------------------------------------- new_mand_vars[1, ]$tag <- NA_character_ set_mandatory_IS_vars(new_mand_vars) mandatory_IS_vars(TRUE) ## ----------------------------------------------------------------------------- reset_mandatory_IS_vars() mandatory_IS_vars(TRUE) ## ----------------------------------------------------------------------------- matrix_file_suffixes() ## ----eval=FALSE--------------------------------------------------------------- # enable_progress_bars() ## ----echo=FALSE--------------------------------------------------------------- sample_sparse_matrix <- tibble::tribble( ~chr, ~integration_locus, ~strand, ~GeneName, ~GeneStrand, ~exp1, ~exp2, ~exp3, "1", 12324, "+", "NFATC3", "+", 4553, 5345, NA_integer_, "6", 657532, "+", "LOC100507487", "+", 76, 545, 5, "7", 657532, "+", "EDIL3", "-", NA_integer_, 56, NA_integer_, ) print(sample_sparse_matrix, width = Inf) ## ----------------------------------------------------------------------------- fs_path <- generate_default_folder_structure() withr::with_options(list(ISAnalytics.reports = FALSE), code = { af <- import_association_file(fs_path$af, root = fs_path$root) }) ## ----echo=FALSE--------------------------------------------------------------- print(head(af)) ## ----results='hide'----------------------------------------------------------- vispa_stats <- import_Vispa2_stats( association_file = af, join_with_af = FALSE, report_path = NULL ) ## ----echo=FALSE--------------------------------------------------------------- print(head(vispa_stats)) ## ----message=FALSE, results='hide'-------------------------------------------- matrix_path <- fs::path( fs_path$root, "PJ01", "quantification", "POOL01-1", "PJ01_POOL01-1_seqCount_matrix.no0.annotated.tsv.gz" ) matrix <- import_single_Vispa2Matrix(matrix_path) ## ----echo=FALSE--------------------------------------------------------------- matrix ## ----------------------------------------------------------------------------- withr::with_options(list(ISAnalytics.reports = FALSE), { matrices <- import_parallel_Vispa2Matrices(af, c("seqCount", "fragmentEstimate"), mode = "AUTO" ) }) ## ----echo=FALSE--------------------------------------------------------------- print(head(matrices)) ## ----echo=FALSE--------------------------------------------------------------- library(ISAnalytics) ex_coll <- tibble::tribble( ~chr, ~integration_locus, ~strand, ~seqCount, ~CompleteAmplificationID, ~SubjectID, ~ProjectID, "1", 123454, "+", 653, "SAMPLE1", "SUBJ01", "PJ01", "1", 123454, "+", 456, "SAMPLE2", "SUBJ02", "PJ01" ) knitr::kable(ex_coll, caption = paste( "Example of collisions: the same", "integration (1, 123454, +) is found", "in 2 different independent samples", "((SUBJ01, PJ01) & (SUBJ02, PJ01))" )) ## ----------------------------------------------------------------------------- data("integration_matrices", package = "ISAnalytics") data("association_file", package = "ISAnalytics") ## Multi quantification matrix no_coll <- remove_collisions( x = integration_matrices, association_file = association_file, report_path = NULL ) ## Matrix list separated <- separate_quant_matrices(integration_matrices) no_coll_list <- remove_collisions( x = separated, association_file = association_file, report_path = NULL ) ## Only sequence count no_coll_single <- remove_collisions( x = separated$seqCount, association_file = association_file, quant_cols = c(seqCount = "Value"), report_path = NULL ) ## ----realign------------------------------------------------------------------ other_realigned <- realign_after_collisions( sc_matrix = no_coll_single, other_matrices = list(fragmentEstimate = separated$fragmentEstimate) ) ## ----echo=FALSE--------------------------------------------------------------- library(ISAnalytics) print(default_meta_agg(), width = Inf) ## ----------------------------------------------------------------------------- data("association_file", package = "ISAnalytics") aggregated_meta <- aggregate_metadata(association_file = association_file) ## ----echo=FALSE--------------------------------------------------------------- print(aggregated_meta) ## ----------------------------------------------------------------------------- data("integration_matrices", package = "ISAnalytics") data("association_file", package = "ISAnalytics") aggreg <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(aggreg, width = Inf) ## ----------------------------------------------------------------------------- agg1 <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, key = c("SubjectID", "ProjectID"), value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(agg1, width = Inf) ## ----------------------------------------------------------------------------- agg2 <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, key = "SubjectID", lambda = list(mean = ~ mean(.x, na.rm = TRUE)), value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(agg2, width = Inf) ## ----------------------------------------------------------------------------- agg3 <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, key = "SubjectID", lambda = list(describe = ~ list(psych::describe(.x))), value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(agg3, width = Inf) ## ----------------------------------------------------------------------------- agg4 <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, key = "SubjectID", lambda = list(sum = sum, mean = mean), value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(agg4, width = Inf) ## ----------------------------------------------------------------------------- agg5 <- aggregate_values_by_key( x = integration_matrices, association_file = association_file, key = "SubjectID", lambda = list(sum = sum, mean = mean), group = c(mandatory_IS_vars()), value_cols = c("seqCount", "fragmentEstimate") ) ## ----echo=FALSE--------------------------------------------------------------- print(agg5, width = Inf) ## ----------------------------------------------------------------------------- ## Aggregation by standard key agg <- aggregate_values_by_key(integration_matrices, association_file, value_cols = c("seqCount", "fragmentEstimate") ) agg <- agg |> dplyr::filter(TimePoint %in% c("0030", "0060")) ## ----echo=FALSE--------------------------------------------------------------- print(agg, width = Inf) ## ----------------------------------------------------------------------------- sharing_1 <- is_sharing(agg, group_key = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), n_comp = 2, is_count = TRUE, relative_is_sharing = TRUE, minimal = TRUE, include_self_comp = FALSE, keep_genomic_coord = TRUE ) sharing_1 ## ----------------------------------------------------------------------------- sharing_1_a <- is_sharing(agg, group_key = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), n_comp = 3, is_count = TRUE, relative_is_sharing = TRUE, minimal = TRUE, include_self_comp = FALSE, keep_genomic_coord = TRUE ) sharing_1_a ## ----------------------------------------------------------------------------- sharing_1_b <- is_sharing(agg, group_key = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), n_comp = 2, is_count = TRUE, relative_is_sharing = TRUE, minimal = FALSE, include_self_comp = TRUE ) sharing_1_b heatmaps <- sharing_heatmap(sharing_1_b) ## ----------------------------------------------------------------------------- heatmaps$absolute heatmaps$on_g1 heatmaps$on_union ## ----------------------------------------------------------------------------- sharing_2 <- is_sharing(agg, group_keys = list( g1 = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), g2 = c("SubjectID", "CellMarker"), g3 = c("CellMarker", "Tissue") ) ) sharing_2 ## ----------------------------------------------------------------------------- first_sample <- agg |> dplyr::filter( SubjectID == "PT001", CellMarker == "MNC", Tissue == "BM", TimePoint == "0030" ) second_sample <- agg |> dplyr::filter( SubjectID == "PT001", CellMarker == "MNC", Tissue == "BM", TimePoint == "0060" ) sharing_3 <- is_sharing(first_sample, second_sample, group_key = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), is_count = TRUE, relative_is_sharing = TRUE, minimal = TRUE ) sharing_3 ## ----------------------------------------------------------------------------- sharing_3_a <- is_sharing( first_sample, second_sample, group_key = c( "CellMarker", "Tissue" ), is_count = TRUE, relative_is_sharing = TRUE, minimal = FALSE ) sharing_3_a ## ----------------------------------------------------------------------------- df1 <- agg |> dplyr::filter(TimePoint == "0030") df2 <- agg |> dplyr::filter(TimePoint == "0060") df3 <- agg |> dplyr::filter(Tissue == "BM") keys <- list( g1 = c("SubjectID", "CellMarker", "Tissue"), g2 = c("SubjectID", "Tissue"), g3 = c("SubjectID", "CellMarker", "Tissue") ) sharing_4 <- is_sharing(df1, df2, df3, group_keys = keys) sharing_4 ## ----------------------------------------------------------------------------- sharing_5 <- is_sharing(agg, group_keys = list( g1 = c( "SubjectID", "CellMarker", "Tissue", "TimePoint" ), g2 = c("SubjectID", "CellMarker"), g3 = c("CellMarker", "Tissue") ), table_for_venn = TRUE ) sharing_5 ## ----------------------------------------------------------------------------- sharing_plots1 <- sharing_venn(sharing_5, row_range = 1, euler = TRUE) sharing_plots2 <- sharing_venn(sharing_5, row_range = 1, euler = FALSE) ## ----------------------------------------------------------------------------- plot(sharing_plots1[[1]]) plot(sharing_plots2[[1]]) ## ----reproduce3, echo=FALSE------------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info() ## ----vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE--------------------------------- ## Print bibliography PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))