% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ScpModel-Class.R,
%   R/ScpModel-ComponentAnalysis.R
\docType{data}
\name{scpModelComponentMethods}
\alias{scpModelComponentMethods}
\alias{ScpModel-ComponentAnalysis}
\alias{scpComponentAnalysis}
\alias{scpComponentAggregate}
\alias{scpComponentPlot}
\alias{scpComponentBiplot}
\title{Component analysis for single cell proteomics}
\format{
An object of class \code{character} of length 3.
}
\usage{
scpModelComponentMethods

scpComponentAnalysis(
  object,
  method = NULL,
  effects = NULL,
  pcaFUN = "auto",
  residuals = TRUE,
  unmodelled = TRUE,
  name,
  ...
)

scpComponentAggregate(componentList, fcol, fun = colMedians, ...)

scpComponentPlot(
  componentList,
  comp = 1:2,
  pointParams = list(),
  maxLevels = NULL
)

scpComponentBiplot(
  scoreList,
  eigenvectorList,
  comp = 1:2,
  pointParams = list(),
  arrowParams = list(arrow = arrow(length = unit(0.2, "cm"))),
  labelParams = list(size = 2, max.overlaps = 10),
  textBy = "feature",
  top = 10,
  maxLevels = NULL
)
}
\arguments{
\item{object}{An object that inherits from the
\code{SummarizedExperiment} class. It must contain an estimated
\code{ScpModel} in its metadata.}

\item{method}{A \code{character()} indicating which approach(es) to use
for principal component analysis (PCA). Are allowed:
\code{"APCA"} (default), \code{"ASCA"} and/or \code{"ASCA.E"} (multiple
values are allowed). \code{"ASCA"}, \code{"APCA"}, \code{"ASCA.E"} are
iterated through each desired effects.}

\item{effects}{A \code{character()} indicating on which model variables
the component analysis should be performed. Default to all
modelled variables.}

\item{pcaFUN}{A \code{character(1)} indicating which function to use to
perform PCA. "nipals" will use \code{\link[nipals:nipals]{nipals::nipals()}} while "svd"
will use \code{\link[base:svd]{base::svd()}}. If "auto", the function uses "nipals"
if the data contain missing values and "svd" otherwise.}

\item{residuals}{A \code{logical(1)}, if \code{TRUE}, PCA is performed on
the residual matrix as well.}

\item{unmodelled}{A \code{logical(1)}, if \code{TRUE}, PCA is performed on
the input matrix as well.}

\item{name}{A \code{character(1)} providing the name to use to retrieve
the model results. When retrieving a model and \code{name} is
missing, the name of the first model found in \code{object} is used.}

\item{...}{For \code{scpComponentAnalysis()}, further arguments passed
to the PCA function. For \code{scpComponentAggregate()}, further
arguments passed to \code{\link[QFeatures:QFeatures-aggregate]{QFeatures::aggregateFeatures()}}.}

\item{componentList}{A list of components analysis results. This
is typically the \code{bySample} or \code{byFeature} element of the list
returned by \code{scpComponentAnalysis()}.}

\item{fcol}{A \code{character(1)} providing the name of the column
to use to group features.}

\item{fun}{A \code{function} that summarises the values for each group.
See \code{\link[QFeatures:QFeatures-aggregate]{QFeatures::aggregateFeatures()}} for a list of available
functions.}

\item{comp}{An \code{integer(2)} pointing to which components to fit.
The values of \code{comp} are not allowed to exceed the number of
computed components in \code{componentList}.}

\item{pointParams}{A \code{list} where each element is an argument that
is provided to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}. This is useful to
change point size, transparency, or assign colour based on an
annotation (see \code{\link[ggplot2:aes]{ggplot2::aes()}}).}

\item{maxLevels}{An \code{integer(1)} indicating how many colour levels
should be shown on the legend when colours are derived from a
discrete factor. If \code{maxLevels = NULL}, all levels are shown.
This parameters is useful to colour points based on a factor
with many levels that would otherwise overcrowd the legend.}

\item{scoreList}{A list of components analysis results. This
is typically the \code{bySample} element in the list returned by
\code{scpComponentAnalysis()}.}

\item{eigenvectorList}{A list of components analysis results. This
is typically the \code{byFeature} element in the list returned by
\code{scpComponentAnalysis()}.}

\item{arrowParams}{A \code{list} where each element is an argument that
is provided to \code{\link[ggplot2:geom_segment]{ggplot2::geom_segment()}}. This is useful to
change arrow head style, line width, transparency, or assign
colour based on an annotation (see \code{\link[ggplot2:aes]{ggplot2::aes()}}). Note
that changing the 'x', 'y', 'xend', and 'yend' aesthetics is
not allowed.}

\item{labelParams}{A \code{list} where each element is an argument that
is provided to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}. This is useful
to change  label size, transparency, or assign
colour based on an annotation (see \code{\link[ggplot2:aes]{ggplot2::aes()}}). Note
that changing the 'x', 'y', 'xend', and 'yend' aesthetics is
not allowed.}

\item{textBy}{A \code{character(1)} indicating the name of the column
to use to label arrow heads.}

\item{top}{An \code{integer(1)} indicating how many arrows should be
drawn. The arrows are sorted based on their size as determined
by the euclidean distance in the principal component space.}
}
\description{
Component analysis is a powerful tool for exploring data. The
package implements the ANOVA-principal component analysis
extended to linear models (APCA+) and derivatives (suggested by
Thiel at al. 2017). This framework is based on principal component
analysis (PCA) and allows exploring the data captured by each
model variable individually. Component analysis is part of the
\emph{scplainer} workflow.
}
\section{PCA - notation and algorithms}{


Given \eqn{A} a m x n matrix, PCA can be summarised as the
following decomposition:

\deqn{AA^T / (n - 1) = VLV^T}

Where \eqn{V} is a m x k orthogonal matrix, that is \eqn{VV^T = I},
with k the number of components. \eqn{V} is called the matrix of
eigenvectors. \eqn{L} is the k x k diagonal matrix of eigenvalues
that contains the variance associated to each component, ordered
from highest to lowest variance. The unscaled PC scores are given
by \eqn{S = A^TV}.

There are 2 available algorithm to perform PCA:
\itemize{
\item \code{nipals}: The non-linear iterative partial least squares
(NIPALS) algorithm \strong{can handle missing values} and
approximates classical PCA, although it does not explicitly
maximise the variance. This is implemented in \code{\link[nipals:nipals]{nipals::nipals()}}.
\item \code{svd}: The singular value decomposition (SVD) is used to perform
an exact PCA, but it \strong{cannot handle missing values}. This is
implemented in \code{\link[base:svd]{base::svd()}}.
}

Which algorithm to use is controlled by the \code{pcaFUN} argument, by
default (\code{"auto"}), the function automatically uses \code{svd} when
there is no missing values and \code{nipals} when there is at least
one missing value.
}

\section{Component analysis methods}{


\code{scpComponentAnalysis()} performs a PCA on the modelling output.
What modelling output the function will use depends on the
\code{method}. The are 3 PCA approaches:
\itemize{
\item \code{ASCA} performs a PCA on the effect matrix, that is
\eqn{A = \hat{M_f}} where \eqn{f} is one of the effects in the
model. This PCA is useful to explore the modelled effects and
the relationship between different levels of a factor.
\item \code{ASCA.E}: perform PCA on the effect matrix, just like ASCA. The
scores are then updated by projecting the effect matrix added to
the residuals using the eigenvectors, that is
\eqn{scores = (\hat{M_f} + \epsilon)^TV}. This PCA is useful
to explore the modelled effects while blurring these effects
with the unmodelled variability. Note however that for this
approach, the scores are no longer guaranteed to be orthogonal
and the eigenvalues are no longer meaningful. The percentage of
variation should not be interpreted.
\item \code{APCA} (default) performs PCA on the effect matrix plus the
residuals, that is \eqn{A = \hat{M_f} + \epsilon}. This PCA
is useful to explore the modelled effects in relation with the
unmodelled variability that is remaining in the residuals.
}

Available methods are listed in \code{scpModelComponentMethods}.
Note that for all three methods, a PCA on the residual matrix is
also performed when \code{residuals = TRUE}, that is
\eqn{A = \epsilon = Y - \hat{\beta}X^T}. A PCA on the residuals is
useful to explore residual effects that are not captured by any
effect in the model. Similarly, a PCA on the input data matrix,
that is on the data before modelling is also performed when
\code{unmodelled = TRUE}, that is \eqn{A = Y}.

\code{scpComponentAnalysis()} always returns a list with 2 elements.
The first element, \code{bySample} is a list where each element
contains the PC scores for the desired model variable(s). The
second element, \code{byFeature} is a list where each element
contains the eigenvectors for the desired model variable(s).
}

\section{Exploring component analysis results}{


\code{\link[=scpAnnotateResults]{scpAnnotateResults()}} adds annotations to the component
analysis results. The annotations are added to all elements of the
list returned by \code{scpComponentAnalysis()}. See the associated man
page for more information.

\code{scpComponentPlot()} takes one of the two elements of the list
generated by \code{scpComponentAnalysis()} and returns a list of
\code{ggplot2} scatter plots. Commonly, the first two components,
that bear most of the variance, are explored for visualisation,
but other components can be explored as well thanks to the \code{comp}
argument. Each point represents either a sample or a feature,
depending on the provided component analysis results
(see examples). Change the point aesthetics by providing ggplot
arguments in a list (see examples).

\code{scpComponentBiplot()} simultaneously explores the PC scores
(sample-space) and the eigenvectors (feature-space). Scores are
shown as points while eigenvectors are shown as arrows. Point
aesthetics and arrow aesthetics can be controlled with the
\code{pointParams} and the \code{arrowParams} arguments, respectively.
Moreover, arrows are also labelled and label aesthetics can be
controlled using \code{labelParams} and \code{textBy}. Plotting all
eigenvectors as arrows leads to overcrowded plots. You can limit the plotting to
the top longest arrows (default to the top 10) as defined by the
distance on the two selected PCs.

\code{scpComponentAggregate()} offers functionality to aggregate the
results from multiple features. This can be used to obtain, for
example, component analysis results for proteins when modelling at
the peptide level. The approach is inspired from
\href{https://bioconductor.org/packages/release/bioc/html/scuttle.html}{scuttle::aggregateAcrossCells()}
and combines, for each group, multiple values for each component
using \code{\link[QFeatures:QFeatures-aggregate]{QFeatures::aggregateFeatures()}}. By default, values are
aggregated using the median, but \code{QFeatures} offers other methods
as well. The annotation of the component results are automatically
aggregated as well. See the \code{aggregateFeatures()} man page for
more information on available methods and expected behavior.
}

\examples{
library("patchwork")
library("ggplot2")
data("leduc_minimal")
leduc_minimal$cell <- rownames(colData(leduc_minimal))

####---- Run component analysis ----####

(pcs <- scpComponentAnalysis(
    leduc_minimal, method = "ASCA", effects = "SampleType",
    pcaFUN = "auto", residuals = FALSE, unmodelled = FALSE
))

####---- Annotate results ----####

## Add cell annotation available from the colData
bySamplePCs <- scpAnnotateResults(
    pcs$bySample, colData(leduc_minimal), by = "cell"
)

## Add peptide annotations available from the rowData
byFeaturePCs <- scpAnnotateResults(
    pcs$byFeature, rowData(leduc_minimal), 
    by = "feature", by2 = "Sequence"
)

####---- Plot results ----####

## Plot result in cell-space, ie each dot is a cell
scpComponentPlot(
    bySamplePCs, 
    pointParams = list( ## ggplot arguments
        aes(colour = SampleType, shape = lcbatch), 
        alpha = 0.6
    )
) |>
    wrap_plots(guides = "collect")

## Plot result in peptide-space, ie each dot is a peptide
scpComponentPlot(
    byFeaturePCs, 
    pointParams = list(colour = "dodgerblue", alpha = 0.6)
) |>
    wrap_plots(guides = "collect")

## Plot both
scpComponentBiplot(
    bySamplePCs, byFeaturePCs, 
    pointParams = list(aes(colour = SampleType), alpha = 0.6),
    labelParams = list(max.overlaps = 20),
    textBy = "gene"
) |>
    wrap_plots(guides = "collect")

####---- Aggregate results ----####

## Aggregate to protein-level results
byProteinPCs <- scpComponentAggregate(
    byFeaturePCs, fcol = "Leading.razor.protein.id"
)

## Plot result in protein-space, ie each dot is a protein
scpComponentPlot(
    byProteinPCs, 
    pointParams = list(colour = "firebrick", alpha = 0.6)
) |>
    wrap_plots(guides = "collect")
}
\references{
Thiel, Michel, Baptiste Féraud, and Bernadette Govaerts. 2017.
"ASCA+ and APCA+: Extensions of ASCA and APCA in the Analysis of
Unbalanced Multifactorial Designs." Journal of Chemometrics 31
(6): e2895.

scplainer: using linear models to understand mass
spectrometry-based single-cell proteomics data Christophe
Vanderaa, Laurent Gatto bioRxiv 2023.12.14.571792; doi:
https://doi.org/10.1101/2023.12.14.571792.
}
\seealso{
This function is part of the \emph{scplainer} workflow, which also
consists of \link{ScpModel-Workflow} to run a model on SCP data
upstream of analysis of variance, and
\link{ScpModel-DifferentialAnalysis} and \link{ScpModel-VarianceAnalysis}
to explore the model results.

Other useful functions:
\itemize{
\item The \code{\link[nipals:nipals]{nipals::nipals()}} function and package for detailed
information about the algorithm and associated parameters.
\item The \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} function and associated tutorials to
manipulate and save the visualisation output
\item \code{\link[=scpAnnotateResults]{scpAnnotateResults()}} to annotate component analysis results.
}
}
\author{
Christophe Vanderaa, Laurent Gatto
}
\keyword{datasets}
