% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/Matched.R
\name{addMatches}
\alias{addMatches}
\alias{endoapply}
\alias{filterMatches}
\alias{matchedData}
\alias{queryVariables}
\alias{targetVariables}
\alias{Matched}
\alias{Matched-class}
\alias{[,Matched-method}
\alias{length,Matched-method}
\alias{show,Matched-method}
\alias{[,Matched,ANY,ANY,ANY-method}
\alias{matches}
\alias{target}
\alias{query,Matched-method}
\alias{targetIndex}
\alias{queryIndex}
\alias{whichTarget}
\alias{whichQuery}
\alias{$,Matched-method}
\alias{colnames,Matched-method}
\alias{scoreVariables}
\alias{queryVariables,Matched-method}
\alias{targetVariables,Matched-method}
\alias{matchedData,Matched-method}
\alias{pruneTarget}
\alias{filterMatches,Matched,missing-method}
\alias{SelectMatchesParam}
\alias{TopRankedMatchesParam}
\alias{ScoreThresholdParam}
\alias{filterMatches,Matched,SelectMatchesParam-method}
\alias{filterMatches,Matched,TopRankedMatchesParam-method}
\alias{filterMatches,Matched,ScoreThresholdParam-method}
\alias{SingleMatchParam}
\alias{filterMatches,Matched,SingleMatchParam-method}
\alias{addMatches,Matched-method}
\alias{endoapply,ANY-method}
\alias{endoapply,Matched-method}
\alias{lapply,Matched-method}
\title{Representation of generic objects matches}
\usage{
addMatches(object, ...)

endoapply(X, FUN, ...)

filterMatches(object, param, ...)

matchedData(object, ...)

queryVariables(object, ...)

targetVariables(object, ...)

Matched(
  query = list(),
  target = list(),
  matches = data.frame(query_idx = integer(), target_idx = integer(), score = numeric()),
  queryAssay = character(),
  targetAssay = character(),
  metadata = list()
)

\S4method{length}{Matched}(x)

\S4method{show}{Matched}(object)

\S4method{[}{Matched,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE)

matches(object)

target(object)

\S4method{query}{Matched}(x, pattern, ...)

targetIndex(object)

queryIndex(object)

whichTarget(object)

whichQuery(object)

\S4method{$}{Matched}(x, name)

\S4method{colnames}{Matched}(x)

scoreVariables(object)

\S4method{queryVariables}{Matched}(object)

\S4method{targetVariables}{Matched}(object)

\S4method{matchedData}{Matched}(object, columns = colnames(object), ...)

pruneTarget(object)

\S4method{filterMatches}{Matched,missing}(
  object,
  queryValue = integer(),
  targetValue = integer(),
  queryColname = character(),
  targetColname = character(),
  index = integer(),
  keep = TRUE,
  ...
)

SelectMatchesParam(
  queryValue = numeric(),
  targetValue = numeric(),
  queryColname = character(),
  targetColname = character(),
  index = integer(),
  keep = TRUE
)

TopRankedMatchesParam(n = 1L, decreasing = FALSE)

ScoreThresholdParam(threshold = 0, above = FALSE, column = "score")

\S4method{filterMatches}{Matched,SelectMatchesParam}(object, param, ...)

\S4method{filterMatches}{Matched,TopRankedMatchesParam}(object, param, ...)

\S4method{filterMatches}{Matched,ScoreThresholdParam}(object, param, ...)

SingleMatchParam(
  duplicates = c("remove", "closest", "top_ranked"),
  column = "score",
  decreasing = TRUE
)

\S4method{filterMatches}{Matched,SingleMatchParam}(object, param, ...)

\S4method{addMatches}{Matched}(
  object,
  queryValue = integer(),
  targetValue = integer(),
  queryColname = character(),
  targetColname = character(),
  score = rep(NA_real_, length(queryValue)),
  isIndex = FALSE
)

\S4method{endoapply}{ANY}(X, FUN, ...)

\S4method{endoapply}{Matched}(X, FUN, ...)

\S4method{lapply}{Matched}(X, FUN, ...)
}
\arguments{
\item{object}{a \code{Matched} object.}

\item{...}{additional parameters.}

\item{X}{\code{Matched} object.}

\item{FUN}{for \code{lapply} and \code{endoapply}: user defined \code{function} that takes a
\code{Matched} object as a first parameter and possibly additional parameters
(that need to be provided in the \code{lapply} or \code{endoapply} call. For lapply
\code{FUN} can return any object while for \code{endoapply} it must return a
\code{Matched} object.}

\item{param}{for \code{filterMatches}: parameter object to select and customize
the filtering procedure.}

\item{query}{object with the query elements.}

\item{target}{object with the elements against which \code{query} has been
matched.}

\item{matches}{\code{data.frame} with columns \code{"query_idx"} (\code{integer}),
\code{"target_idx"} (\code{integer}) and \code{"score"} (\code{numeric}) representing the n:m
mapping of elements between the \code{query} and the \code{target} objects.}

\item{queryAssay}{\code{character} that needs to be specified when \code{query} is
a \code{QFeatures}. In this case, \code{queryAssay} is expected to be the name of
one of the assays in \code{query} (the one on which the matching was performed).}

\item{targetAssay}{\code{character} that needs to be specified when \code{target} is
a \code{QFeatures}. In this case, \code{targetAssay} is expected to be the name of
one of the assays in \code{target} (the one on which the matching was
performed).}

\item{metadata}{\code{list} with optional additional metadata.}

\item{x}{\code{Matched} object.}

\item{i}{\code{integer} or \code{logical} defining the \code{query} elements to keep.}

\item{j}{for \code{[}: ignored.}

\item{drop}{for \code{[}: ignored.}

\item{pattern}{for \code{query}: ignored.}

\item{name}{for \code{$}: the name of the column (or variable) to extract.}

\item{columns}{for \code{matchedData}: \code{character} vector with column names of
variables that should be extracted.}

\item{queryValue}{for \code{SelectMatchesParam}: vector of values to search for in
\code{query} (if \code{query} is 1-dimensional) or in column \code{queryColname} of
\code{query} (if \code{query} is 2-dimensional). For \code{addMatches}: either an index
in \code{query} or value in column \code{queryColname} of \code{query} defining (together
with \code{targetValue}) the pair of query and target elements for which a
match should be manually added. Lengths of \code{queryValue} and
\code{targetValue} have to match.}

\item{targetValue}{for \code{SelectMatchesParam}: vector of values to search for
in \code{target} (if \code{target} is 1-dimensional) or in column \code{targetColname} of
\code{target} (if \code{target} is 2-dimensional). For \code{addMatches}: either an
index in \code{target} or value in column \code{targetColname} of \code{target} defining
(together with \code{queryValue}) the pair of query and target elements for
which a match should be manually added. Lengths of \code{queryValue} and
\code{targetValue} have to match.}

\item{queryColname}{for \code{SelectMatchesParam}: if \code{query} is 2-dimensional it
represents the column of \code{query} against which elements of \code{queryValue}
are compared.}

\item{targetColname}{for \code{SelectMatchesParam}: if \code{query} is 2-dimensional it
represents the column of \code{target} against which elements of \code{targetValue}
are compared.}

\item{index}{for \code{SelectMatchesParam}: indices of the matches to keep (if
\code{keep = TRUE}) or to drop if (\code{keep = FALSE}).}

\item{keep}{for \code{SelectMatchesParam}: \code{logical}. If \code{keep = TRUE} the matches
are kept, if \code{keep = FALSE} they are removed.}

\item{n}{for \code{TopRankedMatchesParam}: \code{integer(1)} with number of best
ranked matches to keep for each \code{query} element.}

\item{decreasing}{for \code{TopRankedMatchesParam}: \code{logical(1)} whether scores
should be ordered increasing or decreasing. Defaults to
\code{decreasing = FALSE}.}

\item{threshold}{for \code{ScoreThresholdParam} : \code{numeric(1)} specifying the
threshold to consider for the filtering.}

\item{above}{for \code{ScoreThresholdParam} : \code{logical(1)} specifying whether
to keep matches above (\code{above = TRUE}) or below (\code{above = FALSE}) a certain
threshold.}

\item{column}{for \code{ScoreThresholdParam}: \code{character(1)} specifying the name
of the score variable to consider for the filtering (the default is
\code{column = "score"}). For \code{SingleMatchParam}: \code{character(1)} defining the
name of the column to be used for de-duplication. See description of
\code{SingleMatchParam} in the \emph{Filtering and subsetting} section for details.}

\item{duplicates}{for \code{SingleMatchParam}: \code{character(1)} defining the
\emph{de-duplication} strategy. See the description of \code{SingleMatchParam} in
the \emph{Filtering and subsetting} subsection for choices and details.}

\item{score}{for \code{addMatches}: \code{numeric} (same length than \code{queryValue}) or
\code{data.frame} (same number of rows than \code{queryValue}) specifying the scores
for the matches to add. If not specified, a \code{NA} will be used as score.}

\item{isIndex}{for \code{addMatches}: specifies if \code{queryValue} and
\code{targetValue} are expected to be vectors of indices.}
}
\value{
See individual method description above for details.
}
\description{
Matches between \emph{query} and \emph{target} generic objects can be represented by
the \code{Matched} object. By default, all data accessors work as
\emph{left joins} between the \emph{query} and the \emph{target} object, i.e. values are
returned for each \emph{query} object with eventual duplicated entries (values)
if the \emph{query} object matches more than one \emph{target} object. See also
\emph{Creation and subsetting} as well as \emph{Extracting data} sections below for
details and more information.

The \code{Matched} object allows to represent matches between one-dimensional
\code{query} and \code{target} objects (being e.g. \code{numeric} or \code{list}),
two-dimensional objects (\code{data.frame} or \code{matrix}) or more complex
structures such as \code{SummarizedExperiments} or \code{QFeatures}. Combinations of
all these different data types are also supported. Matches are represented
between elements of one-dimensional objects, or rows for two-dimensional
objects (including \code{SummarizedExperiment} or \code{QFeatures}). For
\code{\link[QFeatures:QFeatures-class]{QFeatures::QFeatures()}} objects matches to only one of the \emph{assays}
within the object is supported.
}
\section{Creation and general handling}{


\code{Matched} object is returned as result from the \code{\link[=matchValues]{matchValues()}} function.

Alternatively, \code{Matched} objects can also be created with the \code{Matched}
function providing the \code{query} and \code{target} objects as well as the \code{matches}
\code{data.frame} with two columns of integer indices defining which elements
from \emph{query} match which element from \emph{target}.
\itemize{
\item \code{addMatches}: add new matches to an existing object. Parameters
\code{queryValue} and \code{targetValue} allow to define which element(s) in
\code{query} and \code{target} should be considered matching. If \code{isIndex = TRUE},
both \code{queryValue} and \code{targetValue} are considered to be integer indices
identifying the matching elements in \code{query} and \code{target}, respectively.
Alternatively (with \code{isIndex = FALSE}) \code{queryValue} and \code{targetValue} can
be elements in columns \code{queryColname} or \code{targetColname} which can be used
to identify the matching elements. Note that in this case
\strong{only the first} matching pair is added. Parameter \code{score} allows to
provide the score for the match. It can be a numeric with the score or a
\code{data.frame} with additional information on the manually added matches. In
both cases its length (or number of rows) has to match the length of
\code{queryValue}. See examples below for more information.
\item \code{endoapply}: applies a user defined function \code{FUN} to each subset of
matches in a \code{Matched} object corresponding to a \code{query} element (i.e. for
each \code{x[i]} with \code{i} being 1 to \code{length(x)}). The results are then combined
in a single \code{Matched} object representing updated matches. Note that \code{FUN}
has to return a \code{Matched} object.
\item \code{lapply}: applies a user defined function \code{FUN} to each subset of
matches in a \code{Matched} object for each \code{query} element (i.e. to each \code{x[i]}
with \code{i} from \code{1} to \code{length(x)}). It returns a \code{list} of \code{length(object)}
elements where each element is the output of \code{FUN} applied to each subset
of matches.
}
}

\section{Filtering and subsetting}{

\itemize{
\item \code{[}: subset the object selecting \code{query} object elements to keep with
parameter \code{i}. The resulting object will contain all the matches
for the selected query elements. The \code{target} object will by default be
returned as-is.
\item \code{filterMatches}: filter matches in a \code{Matched} object using different
approaches depending on the class of \code{param}:
\itemize{
\item \code{ScoreThresholdParam}: keeps only the matches whose score is strictly
above or strictly below a certain threshold (respectively when parameter
\code{above = TRUE} and \code{above = FALSE}). The name of the column containing
the scores to be used for the filtering can be specified with parameter
\code{column}. The default for \code{column} is \code{"score"}. Such variable is present
in each \code{Matched} object. The name of other score variables (if present)
can be provided (the names of all score variables can be obtained with
\code{scoreVariables()} function). For example \code{column = "score_rt"} can be
used to filter matches based on retention time scores for \code{Matched}
objects returned by \code{\link[=matchValues]{matchValues()}} when \code{param} objects involving a
retention time comparison are used.
\item \code{SelectMatchesParam}: keeps or removes (respectively when parameter
\code{keep = TRUE} and \code{keep = FALSE}) matches corresponding to certain
indices or values of \code{query} and \code{target}. If \code{queryValue} and
\code{targetValue} are provided, matches for these value pairs are kept or
removed. Parameter index\verb{allows to filter matches providing their index in the [matches()] matrix. Note that}filterMatches\verb{removes only matches from the [matches()] matrix from the}Matched\verb{object but thus not alter the}query\code{or}target` in the object. See examples below for more
information.
\item \code{SingleMatchParam}: reduces matches to keep only (at most) a
single match per query. The deduplication strategy can be defined with
parameter \code{duplicates}:
\itemize{
\item \code{duplicates = "remove"}: all matches for query elements matching more
than one target element will be removed.
\item \code{duplicates = "closest"}: keep only the \emph{closest} match for each
query element. The closest match is defined by the value(s) of
\emph{score} (and eventually \emph{score_rt}, if present). The one match with
the smallest value for this (these) column(s) is retained. This is
equivalent to \code{TopRankedMatchesParam(n = 1L, decreasing = FALSE)}.
\item \code{duplicates = "top_ranked"}: select the \emph{best ranking} match for each
query element. Parameter \code{column} allows to specify the column by
which matches are ranked (use \code{targetVariables(object)} or
\code{scoreVariables(object)} to list possible columns). Parameter
\code{decreasing} allows to define whether the match with the highest
(\code{decreasing = TRUE}) or lowest (\code{decreasing = FALSE}) value in
\code{column} for each \emph{query} will be selected.
}
\item \code{TopRankedMatchesParam}: for each query element the matches are ranked
according to their score and only the \code{n} best of them are kept (if \code{n}
is larger than the number of matches for a given query element all the
matches are returned). For the ranking (ordering) R's \code{rank} function is
used on the absolute values of the scores (variable \code{"score"}), thus,
smaller score values (representing e.g. smaller differences between
expected and observed m/z values) are considered \emph{better}. By
setting parameter \code{decreasing = TRUE} matches can be ranked in decreasing
order (i.e. higher scores are ranked higher and are thus selected).
If besides variable \code{"score"} also variable \code{"score_rt"} is available in
the \code{Matched} object (which is the case for the \code{Matched} object
returned by \code{\link[=matchValues]{matchValues()}} for \code{param} objects involving a retention
time comparison), the ordering of the matches is based on the product of
the ranks of the two variables (ranking of retention time differences
is performed on the absolute value of \code{"score_rt"}). Thus, matches with
small (or, depending on parameter \code{decreasing}, large) values for
\code{"score"} \strong{and} \code{"score_rt"} are returned.
}
\item \code{pruneTarget}: \emph{cleans} the object by removing non-matched
\strong{target} elements.
}
}

\section{Extracting data}{

\itemize{
\item \code{$} extracts a single variable from the \code{Matched} \code{x}. The variables that
can be extracted can be listed using \code{colnames(x)}. These variables can
belong to \emph{query}, \emph{target} or be related to the matches (e.g. the
score of each match). If the \emph{query} (\emph{target}) object is two dimensional,
its columns can be extracted (prefix\code{ "target_"} is used for columns in the
\emph{target} object) otherwise if \emph{query} (\emph{target}) has only a single
dimension (e.g. is a \code{list} or a \code{character}) the whole object can be
extracted with \code{x$query} (\code{x$target}). More precisely, when
\emph{query} (\emph{target}) is a \code{SummarizedExperiment} the columns from
\code{rowData(query)} (rowData(\code{target})) are extracted; when \emph{query} (\emph{target})
is a \code{\link[QFeatures:QFeatures-class]{QFeatures::QFeatures()}} the columns from \code{rowData} of the assay
specified in the \code{queryAssay} (\code{targetAssay}) slot are extracted.
The matching scores
are available as \emph{variable} \code{"score"}. Similar to a left join between the
query and target elements, this function returns a value for each query
element, with eventual duplicated values for query elements matching more
than one target element. If variables from the target \code{data.frame} are
extracted, an \code{NA} is reported for the entries corresponding to \emph{query}
elements that don't match any target element. See examples below for
more details.
\item \code{length} returns the number of \strong{query} elements.
\item \code{matchedData} allows to extract multiple variables contained in the
\code{Matched} object as a \code{DataFrame}. Parameter \code{columns} allows to
define which columns (or variables) should be returned (defaults to
\code{columns = colnames(object)}). Each single column in the returned
\code{DataFrame} is constructed in the same way as in \code{$}. That is, like \code{$},
this function performs a \emph{left join} of variables from the \emph{query} and
\emph{target} objects returning all values for all \emph{query} elements
(eventually returning duplicated elements for query elements matching
multiple target elements) and the values for the target elements matched
to the respective query elements (or \code{NA} if the target element is not
matched to any query element).
\item \code{matches} returns a \code{data.frame} with the actual matching information with
columns \code{"query_idx"} (index of the element in \code{query}), \code{"target_idx"}
(index of the element in \code{target}) \code{"score"} (the score of the match) and
eventual additional columns.
\item \code{target} returns the \emph{target} object.
\item \code{targetIndex} returns the indices of the matched targets in the order they
are assigned to the query elements. The length of the returned \code{integer}
vector is equal to the total number of matches in the object. \code{targetIndex}
and \code{queryIndex} are aligned, i.e. each element in them represent a matched
query-target pair.
\item \code{query} returns the \emph{query} object.
\item \code{queryIndex} returns the indices of the query elements with matches to
target elements. The length of the returned \code{integer} vector is equal to
the total number of matches in the object. \code{targetIndex} and \code{queryIndex}
are aligned, i.e. each element in them represent a matched query-target
pair.
\item \code{queryVariables} returns the names of the variables (columns) in \emph{query}.
\item \code{scoreVariables} returns the names of the score variables stored in the
\code{Matched} object (precisely the names of the variables in \code{matches(object)}
containing the string "score" in their name ignoring the case).
\item \code{targetVariables} returns the names of the variables (columns) in \emph{target}
(prefixed with \code{"target_"}).
\item \code{whichTarget} returns an \code{integer} with the indices of the elements in
\emph{target} that match at least one element in \emph{query}.
\item \code{whichQuery} returns an \code{integer} with the indices of the elements in
\emph{query} that match at least one element in \emph{target}.
}
}

\examples{

## Creating a `Matched` object.
q1 <- data.frame(col1 = 1:5, col2 = 6:10)
t1 <- data.frame(col1 = 11:16, col2 = 17:22)
## Define matches between query row 1 with target row 2 and, query row 2
## with target rows 2,3,4 and query row 5 with target row 5.
mo <- Matched(
    q1, t1, matches = data.frame(query_idx = c(1L, 2L, 2L, 2L, 5L),
                                 target_idx = c(2L, 2L, 3L, 4L, 5L),
                                 score = seq(0.5, 0.9, by = 0.1)))
mo

## Which of the query elements (rows) match at least one target
## element (row)?
whichQuery(mo)

## Which target elements (rows) match at least one query element (row)?
whichTarget(mo)

## Extracting variable "col1" from query object .
mo$col1

## We have duplicated values for the entries of `col1` related to query
## elements (rows) matched to multiple rows of the target object). The
## value of `col1` is returned for each element (row) in the query.

## Extracting variable "col1" from target object. To access columns from
## target we have to prefix the name of the column by `"target_"`.
## Note that only values of `col1` for rows matching at least one query
## row are returned and an NA is reported for query rows without matching
## target rows.
mo$target_col1

## The 3rd and 4th query rows do not match any target row, thus `NA` is
## returned.

## `matchedData` can be used to extract all (or selected) columns
## from the object. Same as with `$`, a left join between the columns
## from the query and the target is performed. Below we extract selected
## columns from the object as a DataFrame.
res <- matchedData(mo, columns = c("col1", "col2", "target_col1",
                                   "target_col2"))
res
res$col1
res$target_col1

## With the `queryIndex` and `targetIndex` it is possible to extract the
## indices of the matched query-target pairs:
queryIndex(mo)
targetIndex(mo)

## Hence, the first match is between the query with index 1 to the target
## with index 2, then, query with index 2 is matched to target with index 2
## and so on.

## The example matched object contains all query and all target
## elements (rows). Below we subset the object keeping only query rows that
## are matched to at least one target row.
mo_sub <- mo[whichQuery(mo)]

## mo_sub contains now only 3 query rows:
nrow(query(mo_sub))

## while the original object contains all 5 query rows:
nrow(query(mo))

## Both objects contain however still the full target object:
nrow(target(mo))
nrow(target(mo_sub))

## With the `pruneTarget` we can however reduce also the target rows to
## only those that match at least one query row
mo_sub <- pruneTarget(mo_sub)
nrow(target(mo_sub))

########
## Creating a `Matched` object with a `data.frame` for `query` and a `vector`
## for `target`. The matches are specified in the same way as the example
## before.

q1 <- data.frame(col1 = 1:5, col2 = 6:10)
t2 <- 11:16
mo <- Matched(q1, t2, matches = data.frame(query_idx = c(1L, 2L, 2L, 2L, 5L),
    target_idx = c(2L, 2L, 3L, 4L, 5L), score = seq(0.5, 0.9, by = 0.1)))

## *target* is a simple vector and has thus no columns. The matched values
## from target, if it does not have dimensions and hence column names, can
## be retrieved with `$target`
mo$target

## Note that in this case "target" is returned by the function `colnames`
colnames(mo)

## As before, we can extract all data as a `DataFrame`
res <- matchedData(mo)
res

## Note that the columns of the obtained `DataFrame` are the same as the
## corresponding vectors obtained with `$`
res$col1
res$target

## Also subsetting and pruning works in the same way as the example above.

mo_sub <- mo[whichQuery(mo)]

## mo_sub contains now only 3 query rows:
nrow(query(mo_sub))

## while the original object contains all 5 query rows:
nrow(query(mo))

## Both object contain however still the full target object:
length(target(mo))
length(target(mo_sub))

## Reducing the target elements to only those that match at least one query
## row
mo_sub <- pruneTarget(mo_sub)
length(target(mo_sub))

########
## Filtering `Matched` with `filterMatches`

## Inspecting the matches in `mo`:
mo$col1
mo$target

## We have thus target *12* matched to both query elements with values 1 and
## 2, and query element 2 is matching 3 target elements. Let's assume we want
## to resolve this multiple mappings to keep from them only the match between
## query 1 (column `"col1"` containing value `1`) with target 1 (value `12`)
## and query 2 (column `"col1"` containing value `2`) with target 2 (value
## `13`). In addition we also want to keep query element 5 (value `5` in
## column `"col1"`) with the target with value `15`:
mo_sub <- filterMatches(mo,
    SelectMatchesParam(queryValue = c(1, 2, 5), queryColname = "col1",
                       targetValue = c(12, 13, 15)))
matchedData(mo_sub)

## Alternatively to specifying the matches to filter with `queryValue` and
## `targetValue` it is also possible to specify directly the index of the
## match(es) in the `matches` `data.frame`:
matches(mo)

## To keep only matches like in the example above we could use:
mo_sub <- filterMatches(mo, SelectMatchesParam(index = c(1, 3, 5)))
matchedData(mo_sub)

## Note also that, instead of keeping the specified matches, it would be
## possible to remove them by setting `keep = FALSE`. Below we remove
## selected matches from the object:
mo_sub <- filterMatches(mo,
    SelectMatchesParam(queryValue = c(2, 2), queryColname = "col1",
                       targetValue = c(12, 14), keep = FALSE))
mo_sub$col1
mo_sub$target

## As alternative to *manually* selecting matches it is also possible to
## filter matches keeping only the *best matches* using the
## `TopRankedMatchesParam`. This will rank matches for each query based on
## their *score* value and select the best *n* matches with lowest score
## values (i.e. smallest difference in m/z values).
mo_sub <- filterMatches(mo, TopRankedMatchesParam(n = 1L))
matchedData(mo_sub)

## Additionally it is possible to select matches based on a threshold
## for their *score*. Below we keep matches with score below 0.75 (one
## could select matches with *score* greater than the threshold by setting
## `ScoreThresholdParam` parameter `above = TRUE`.
mo_sub <- filterMatches(mo, ScoreThresholdParam(threshold = 0.75))
matchedData(mo_sub)

########
## Selecting the best match for each `query` element with `endoapply`

## It is also possible to select for each `query` element the match with the
## lowest score using `endoapply`. We manually define a function to select
## the best match for each query and give it as input to `endoapply`
## together with the `Matched` object itself. We obtain the same results as
## in the `filterMatches` example above.

FUN <- function(x) {
    if(nrow(x@matches) > 1)
        x@matches <- x@matches[order(x@matches$score)[1], , drop = FALSE]
    x
}

mo_sub <- endoapply(mo, FUN)
matchedData(mo_sub)

########
## Adding matches using `addMatches`

## `addMatches` allows to manually add matches. Below we add a new match
## between the `query` element with a value of `1` in column `"col1"` and
## the target element with a value of `15`. Parameter `score` allows to
## assign a score value to the match.
mo_add <- addMatches(mo, queryValue = 1, queryColname = "col1",
    targetValue = 15, score = 1.40)
matchedData(mo_add)
## Matches are always sorted by `query`, thus, the new match is listed as
## second match.

## Alternatively, we can also provide a `data.frame` with parameter `score`
## which enables us to add additional information to the added match. Below
## we define the score and an additional column specifying that this match
## was added manually. This information will then also be available in the
## `matchedData`.
mo_add <- addMatches(mo, queryValue = 1, queryColname = "col1",
    targetValue = 15, score = data.frame(score = 5, manual = TRUE))
matchedData(mo_add)

## The match will get a score of NA if we're not providing any score.
mo_add <- addMatches(mo, queryValue = 1, queryColname = "col1",
    targetValue = 15)
matchedData(mo_add)

## Creating a `Matched` object with a `SummarizedExperiment` for `query` and
## a `vector` for `target`. The matches are specified in the same way as
## the example before.
library(SummarizedExperiment)
q1 <- SummarizedExperiment(
  assays = data.frame(matrix(NA, 5, 2)),
  rowData = data.frame(col1 = 1:5, col2 = 6:10),
  colData = data.frame(cD1 = c(NA, NA), cD2 = c(NA, NA)))
t1 <- data.frame(col1 = 11:16, col2 = 17:22)
## Define matches between row 1 in rowData(q1) with target row 2 and,
## rowData(q1) row 2 with target rows 2,3,4 and rowData(q1) row 5 with target
## row 5.
mo <- Matched(
    q1, t1, matches = data.frame(query_idx = c(1L, 2L, 2L, 2L, 5L),
                                target_idx = c(2L, 2L, 3L, 4L, 5L),
                                 score = seq(0.5, 0.9, by = 0.1)))
mo

## Which of the query elements (rows) match at least one target
## element (row)?
whichQuery(mo)

## Which target elements (rows) match at least one query element (row)?
whichTarget(mo)

## Extracting variable "col1" from rowData(q1).
mo$col1

## We have duplicated values for the entries of `col1` related to rows of
## rowData(q1) matched to multiple rows of the target data.frame t1. The
## value of `col1` is returned for each row in the rowData of query.

## Extracting variable "col1" from target object. To access columns from
## target we have to prefix the name of the column by `"target_"`.
## Note that only values of `col1` for rows matching at least one row in
## rowData of query are returned and an NA is reported for those without
## matching target rows.
mo$target_col1

## The 3rd and 4th query rows do not match any target row, thus `NA` is
## returned.

## `matchedData` can be used to extract all (or selected) columns
## from the object. Same as with `$`, a left join between the columns
## from the query and the target is performed. Below we extract selected
## columns from the object as a DataFrame.
res <- matchedData(mo, columns = c("col1", "col2", "target_col1",
                                  "target_col2"))
res
res$col1
res$target_col1

## The example `Matched` object contains all rows in the
## `rowData` of the `SummarizedExperiment` and all target rows. Below we
## subset the object keeping only rows that are matched to at least one
## target row.
mo_sub <- mo[whichQuery(mo)]

## mo_sub contains now a `SummarizedExperiment` with only 3 rows:
nrow(query(mo_sub))

## while the original object contains a `SummarizedExperiment` with all 5
## rows:
nrow(query(mo))

## Both objects contain however still the full target object:
nrow(target(mo))
nrow(target(mo_sub))

## With the `pruneTarget` we can however reduce also the target rows to
## only those that match at least one in the `rowData` of query
mo_sub <- pruneTarget(mo_sub)
nrow(target(mo_sub))
}
\seealso{
\code{\link[=MatchedSpectra]{MatchedSpectra()}} for matched \code{\link[Spectra:Spectra]{Spectra::Spectra()}} objects.
}
\author{
Andrea Vicini, Johannes Rainer
}
