% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collision-removal.R
\name{remove_collisions}
\alias{remove_collisions}
\title{Identifies and removes collisions.}
\usage{
remove_collisions(
  x,
  association_file,
  independent_sample_id = c("ProjectID", "SubjectID"),
  date_col = "SequencingDate",
  reads_ratio = 10,
  quant_cols = c(seqCount = "seqCount", fragmentEstimate = "fragmentEstimate"),
  report_path = default_report_path(),
  max_workers = NULL
)
}
\arguments{
\item{x}{Either a multi-quantification matrix (recommended) or a
named list of matrices (names must be quantification types)}

\item{association_file}{The association file imported via
\code{import_association_file()}}

\item{independent_sample_id}{A character vector of column names that
identify independent samples}

\item{date_col}{The date column that should be considered.}

\item{reads_ratio}{A single numeric value that represents the ratio that has
to be considered when deciding between \code{seqCount} value.}

\item{quant_cols}{A named character vector where names are
quantification types and
values are the names of the corresponding columns. The quantification
\code{seqCount} MUST be included in the vector.}

\item{report_path}{The path where the report file should be saved.
Can be a folder or \code{NULL} if no report should be produced.
Defaults to \code{{user_home}/ISAnalytics_reports}.}

\item{max_workers}{Maximum number of parallel workers to distribute the
workload. If \code{NULL} (default) produces the maximum amount of workers allowed,
a numeric value is requested otherwise. WARNING: a higher number of workers
speeds up computation at the cost of memory consumption! Tune this parameter
accordingly.}
}
\value{
Either a multi-quantification matrix or a list of data frames
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
A collision is an integration (aka a unique combination of the provided
\code{mandatory_IS_vars()}) which is observed in more than one
independent sample.
The function tries to decide to which independent sample should
an integration event be assigned to, and if no
decision can be taken, the integration is completely removed from the data
frame.
For more details refer to the vignette "Collision removal functionality":
\code{vignette("workflow_start", package = "ISAnalytics")}
}
\section{Required tags}{

The function will explicitly check for the presence of these tags:
\itemize{
\item project_id
\item pool_id
\item pcr_replicate
}
}

\examples{
data("integration_matrices", package = "ISAnalytics")
data("association_file", package = "ISAnalytics")
no_coll <- remove_collisions(
    x = integration_matrices,
    association_file = association_file,
    report_path = NULL
)
head(no_coll)
}
\seealso{
Other Data cleaning and pre-processing: 
\code{\link{aggregate_metadata}()},
\code{\link{aggregate_values_by_key}()},
\code{\link{compute_near_integrations}()},
\code{\link{default_meta_agg}()},
\code{\link{outlier_filter}()},
\code{\link{outliers_by_pool_fragments}()},
\code{\link{purity_filter}()},
\code{\link{realign_after_collisions}()},
\code{\link{threshold_filter}()}
}
\concept{Data cleaning and pre-processing}
