% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/main.R
\name{establish_bijection1d}
\alias{establish_bijection1d}
\title{Finds One-to-One Correspondence between Peaks from Replicate 1
and 2}
\usage{
establish_bijection1d(
  rep1_df,
  rep2_df,
  ambiguity_resolution_method = c("overlap", "midpoint", "value"),
  max_gap = -1L
)
}
\arguments{
\item{rep1_df}{data frame of observations (i.e., genomic peaks) of
replicate 1, with at least the following columns (position of columns
matter, column names are irrelevant):
\tabular{rll}{
  column 1:  \tab \code{chr} \tab character; genomic location of peak -
  chromosome (e.g., \code{"chr3"})\cr
  column 2:  \tab \code{start} \tab integer; genomic location of peak -
  start coordinate\cr
  column 3:  \tab \code{end} \tab integer; genomic location of peak -
  end coordinate\cr
  column 4:  \tab \code{value} \tab numeric; p-value, FDR, or heuristic used
  to rank the interactions
}}

\item{rep2_df}{data frame of observations (i.e., genomic peaks) of
replicate 2, with the following columns (position of columns
matter, column names are irrelevant):
\tabular{rll}{
  column 1:  \tab \code{chr} \tab character; genomic location of peak -
  chromosome (e.g., \code{"chr3"})\cr
  column 2:  \tab \code{start} \tab integer; genomic location of peak -
  start coordinate\cr
  column 3:  \tab \code{end} \tab integer; genomic location of peak -
  end coordinate\cr
  column 4:  \tab \code{value} \tab numeric; p-value, FDR, or heuristic used
  to rank the interactions
}}

\item{ambiguity_resolution_method}{defines how ambiguous assignments
(when one interaction in replicate 1 overlaps with multiple interactions in
replicate 2 or vice versa)
are resolved. Available methods:
\tabular{rl}{
  \code{"value"} \tab interactions are prioritized by ascending or descending
  \code{value} column (see \code{sorting_direction}), e.g., if two
  interactions in replicate 1 overlap with one interaction in replicate 2,
  the interaction from replicate 1 is chosen which has a lower (if
  \code{sorting_direction} is \code{"ascending"}) or higher (if
  \code{"descending"}) value \cr
  \code{"overlap"} \tab the interaction pair is chosen which has the highest
  relative overlap, i.e., overlap in nucleotides of replicate 1 interaction
  anchor A and replicate 2 interaction anchor A,
  plus replicate 1 interaction anchor B and replicate 2 interaction anchor B,
  normalized by their lengths\cr
  \code{"midpoint"} \tab the interaction pair is chosen which has the
  smallest
  distance between their anchor midpoints, i.e., distance from midpoint of
  replicate 1 interaction anchor A to midpoint of
  replicate 2 interaction anchor A, plus distance from midpoint of
  replicate 1 interaction anchor B to midpoint of
  replicate 2 interaction anchor B
}}

\item{max_gap}{integer; maximum gap in nucleotides allowed between two
anchors for them to be considered as overlapping
(defaults to -1, i.e., overlapping anchors)}
}
\value{
Data frames \code{rep1_df} and \code{rep2_df} with
the following columns:
\tabular{rll}{
  column 1: \tab \code{chr} \tab character; genomic location of peak -
  chromosome (e.g., \code{"chr3"})\cr
  column 2: \tab \code{start} \tab integer; genomic location of peak  -
  start coordinate\cr
  column 3: \tab \code{end} \tab integer; genomic location of peak -
  end coordinate\cr
  column 4: \tab \code{value} \tab numeric; p-value, FDR, or heuristic used
  to rank the peaks\cr
  column 5: \tab \code{rep_value} \tab numeric; value of corresponding
  replicate peak. If no corresponding peak was found, \code{rep_value} is set
  to \code{NA}.\cr
  column 6: \tab \code{rank} \tab integer; rank of the peak, established by
  value column, ascending order\cr
  column 7: \tab \code{rep_rank} \tab integer; rank of corresponding
  replicate peak. If no corresponding peak was found, \code{rep_rank} is
  set to \code{NA}.\cr
  column 8: \tab \code{idx} \tab integer; peak index, primary key\cr
  column 9: \tab \code{rep_idx} \tab integer; specifies the index of the
  corresponding peak in the other replicate (foreign key). If no
  corresponding peak was found, \code{rep_idx} is set to \code{NA}.
}
}
\description{
This method establishes a bijective assignment between peaks from
replicate 1 and 2. A peak in replicate 1 is assigned to a
peak in replicate 2 if and only if (1) they overlap (or the gap between the
peaks is less than or equal to \code{max_gap}), and (2) there is no other
peak in
replicate 2 that overlaps with the peak in replicate 1 and has a
lower \emph{ambiguity resolution value}.
}
\examples{
rep1_df <- idr2d:::chipseq$rep1_df
rep1_df$value <- preprocess(rep1_df$value, "log")

rep2_df <- idr2d:::chipseq$rep2_df
rep2_df$value <- preprocess(rep2_df$value, "log")

mapping <- establish_bijection1d(rep1_df, rep2_df)

}
