% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parsePhyloProfile.R
\name{filterProfileData}
\alias{filterProfileData}
\title{Filter phylogentic profiles}
\usage{
filterProfileData(DF, taxaCount, refTaxon = NULL,
    percentCO = c(0, 1), coorthoCOMax = 9999,
    var1CO  = c(0, 1), var2CO = c(0, 1), var1Rel = "protein",
    var2Rel = "protein", groupByCat = FALSE, catDt = NULL,
    var1AggregateBy = "max", var2AggregateBy = "max")
}
\arguments{
\item{DF}{a reduced dataframe contains info for all phylogenetic
profiles in the selected taxonomy rank.}

\item{taxaCount}{dataframe counting present taxa in each supertaxon}

\item{refTaxon}{selected reference taxon. NOTE: This taxon will not be
affected by the filtering. If you want to filter all, set refTaxon <- NULL.
Default = NULL.}

\item{percentCO}{min and max cutoffs for percentage of species present
in a supertaxon. Default = c(0, 1).}

\item{coorthoCOMax}{maximum number of co-orthologs allowed. Default =
9999.}

\item{var1CO}{min and max cutoffs for var1. Default = c(0, 1).}

\item{var2CO}{min anc max cutoffs for var2. Default = c(0, 1).}

\item{var1Rel}{relation of var1 ("protein" for protein-protein or
"species" for protein-species). Default = "protein".}

\item{var2Rel}{relation of var2 ("protein" for protein-protein or
"species" for protein-species). Default = "protein".}

\item{groupByCat}{group genes by their categories (TRUE or FALSE). Default =
FALSE.}

\item{catDt}{dataframe contains gene categories
(optional, NULL if groupByCat = FALSE or no info provided). Default = NULL.}

\item{var1AggregateBy}{aggregate method for VAR1 (max, min, mean
or median), applied for calculating var1 of supertaxa. Default = "max".}

\item{var2AggregateBy}{aggregate method for VAR2 (max, min, mean
or median), applied for calculating var2 of supertaxa. Default = "max".}
}
\value{
A filtered dataframe for generating profile plot including seed gene
IDs (or orthologous group IDs), their ortholog IDs and the corresponding
(super)taxa, (super)taxon IDs, number of co-orthologs in each (super)taxon,
values for two additional variables var1, var2, % of species present in each
supertaxon, and the categories of seed genes (or ortholog groups).
}
\description{
Create a filtered data needed for plotting or clustering
phylogenetic profiles. NOTE: this function require some intermediate steps
using the results from other functions. If you would like to get a full
processed data from the raw input, please use the function
fromInputToProfile() instead!
}
\examples{
# NOTE: this function require some intermediate steps using the results from
# other functions. If you would like to get a full processed data from the
# raw input, please use the function fromInputToProfile() instead!
library(dplyr)
data("fullProcessedProfile", package="PhyloProfile")
rankName <- "class"
refTaxon <- "Mammalia"
percentCutoff <- c(0.0, 1.0)
coorthologCutoffMax <- 10
var1Cutoff <- c(0.75, 1.0)
var2Cutoff <- c(0.5, 1.0)
var1Relation <- "protein"
var2Relation <- "species"
groupByCat <- FALSE
catDt <- NULL
var1AggregateBy <- "max"
var2AggregateBy <- "max"
taxonIDs <- levels(as.factor(fullProcessedProfile$ncbiID))
sortedInputTaxa <- sortInputTaxa(
    taxonIDs, rankName, refTaxon, NULL, NULL
)
taxaCount <- sortedInputTaxa \%>\% dplyr::group_by(supertaxon) \%>\%
    summarise(n = n(), .groups = "drop")
filterProfileData(
    fullProcessedProfile,
    taxaCount,
    refTaxon,
    percentCutoff,
    coorthologCutoffMax,
    var1Cutoff,
    var2Cutoff,
    var1Relation,
    var2Relation,
    groupByCat,
    catDt,
    var1AggregateBy,
    var2AggregateBy
)
}
\seealso{
\code{\link{parseInfoProfile}} and \code{\link{reduceProfile}}
for generating input dataframe, \code{\link{fullProcessedProfile}} for a
demo full processed profile dataframe, \code{\link{fromInputToProfile}} for
generating fully processed data from raw input.
}
\author{
Vinh Tran tran@bio.uni-frankfurt.de
}
