% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/machinelearning-functions-tagm-mcmc.R
\name{tagmMcmcTrain}
\alias{tagmMcmcTrain}
\alias{tagmMcmcPredict}
\alias{tagmPredict}
\alias{tagmMcmcProcess}
\title{Localisation of proteins using the TAGM MCMC method}
\usage{
tagmMcmcTrain(
  object,
  fcol = "markers",
  method = "MCMC",
  numIter = 1000L,
  burnin = 100L,
  thin = 5L,
  mu0 = NULL,
  lambda0 = 0.01,
  nu0 = NULL,
  S0 = NULL,
  beta0 = NULL,
  u = 2,
  v = 10,
  numChains = 4L,
  BPPARAM = BiocParallel::bpparam()
)

tagmMcmcPredict(
  object,
  params,
  fcol = "markers",
  probJoint = FALSE,
  probOutlier = TRUE
)

tagmPredict(
  object,
  params,
  fcol = "markers",
  probJoint = FALSE,
  probOutlier = TRUE
)

tagmMcmcProcess(params)
}
\arguments{
\item{object}{An \code{\link[MSnbase:MSnSet-class]{MSnbase::MSnSet}} containing the spatial
proteomics data to be passed to \code{tagmMcmcTrain} and
\code{tagmPredict}.}

\item{fcol}{The feature meta-data containing marker definitions.
Default is \code{markers}.}

\item{method}{A \code{charachter()} describing the inference method for
the TAGM algorithm. Default is \code{"MCMC"}.}

\item{numIter}{The number of iterations of the MCMC
algorithm. Default is 1000.}

\item{burnin}{The number of samples to be discarded from the
begining of the chain. Default is 100.}

\item{thin}{The thinning frequency to be applied to the MCMC
chain.  Default is 5.}

\item{mu0}{The prior mean. Default is \code{colMeans} of the expression
data.}

\item{lambda0}{The prior shrinkage. Default is 0.01.}

\item{nu0}{The prior degreed of freedom. Default is
\code{ncol(exprs(object)) + 2}}

\item{S0}{The prior inverse-wishart scale matrix. Empirical prior
used by default.}

\item{beta0}{The prior Dirichlet distribution
concentration. Default is 1 for each class.}

\item{u}{The prior shape parameter for Beta(u, v). Default is 2}

\item{v}{The prior shape parameter for Beta(u, v). Default is 10.}

\item{numChains}{The number of parrallel chains to be run. Default
it 4.}

\item{BPPARAM}{Support for parallel processing using the
\code{BiocParallel} infrastructure. When missing (default), the
default registered \code{BiocParallelParam} parameters are
used. Alternatively, one can pass a valid \code{BiocParallelParam}
parameter instance: \code{SnowParam}, \code{MulticoreParam},
\code{DoparParam}, ... see the \code{BiocParallel} package for
details.}

\item{params}{An instance of class \code{MCMCParams}, as generated by
\code{\link[=tagmMcmcTrain]{tagmMcmcTrain()}}.}

\item{probJoint}{A \code{logical(1)} indicating whether to return the
joint probability matrix, i.e. the probability for all classes
as a new \code{tagm.mcmc.joint} feature variable.}

\item{probOutlier}{A \code{logical(1)} indicating whether to return the
probability of being an outlier as a new \code{tagm.mcmc.outlier}
feature variable. A high value indicates that the protein is
unlikely to belong to any annotated class (and is hence
considered an outlier).}
}
\value{
\code{tagmMcmcTrain} returns an instance of class
\code{MCMCParams}.

\code{tagmMcmcPredict} returns an instance of class
\code{\link[MSnbase:MSnSet-class]{MSnbase::MSnSet}} containing the localisation predictions as
a new \code{tagm.mcmc.allocation} feature variable. The allocation
probability is encoded as \code{tagm.mcmc.probability}
(corresponding to the mean of the distribution
probability). In additionm the upper and lower quantiles of
the allocation probability distribution are available as
\code{tagm.mcmc.probability.lowerquantile} and
\code{tagm.mcmc.probability.upperquantile} feature variables. The
Shannon entropy is available in the \code{tagm.mcmc.mean.shannon}
feature variable, measuring the uncertainty in the allocations
(a high value representing high uncertainty; the highest value
is the natural logarithm of the number of classes).

\code{tagmMcmcProcess} returns an instance of class
\code{MCMCParams} with its summary slot populated.
}
\description{
These functions implement the T augmented Gaussian mixture (TAGM)
model for mass spectrometry-based spatial proteomics datasets
using Markov-chain Monte-Carlo (MCMC) for inference.
}
\details{
The \code{tagmMcmcTrain} function generates the samples from the
posterior distributions (object or class \code{MCMCParams}) based on an
annotated quantitative spatial proteomics dataset (object of class
\code{\link[MSnbase:MSnSet-class]{MSnbase::MSnSet}}). Both are then passed to the \code{tagmPredict}
function to predict the sub-cellular localisation of protein of
unknown localisation. See the \emph{pRoloc-bayesian} vignette for
details and examples. In this implementation, if numerical instability
is detected in the covariance matrix of the data a small multiple of
the identity is added. A message is printed if this conditioning step
is performed.
}
\references{
\emph{A Bayesian Mixture Modelling Approach For Spatial
Proteomics} Oliver M Crook, Claire M Mulvey, Paul D. W. Kirk,
Kathryn S Lilley, Laurent Gatto bioRxiv 282269; doi:
https://doi.org/10.1101/282269
}
\seealso{
The \code{\link[=plotEllipse]{plotEllipse()}} function can be used to visualise
TAGM models on PCA plots with ellipses.
}
