## CLASSIFICATION: V-FOLD CROSS VALIDATION WITH LOGITBOOST AND GENE SELECTION
crossval <- function(x, y, v=length(y), mfinal = 100, presel = 0,
                     estimate = 0, verbose = FALSE)
  {
    ## Number of classes
    K    <-  nlevels(as.factor(y))
    
    ## Checking the input and warning messages
    if (nrow(x)!=length(y)) stop("x and y must have equally many observation")
    if (v>length(y))        stop("set v for v-fold c.v. appropriately!")
    if (v<2)                stop("set v for v-fold c.v. appropriately!")
    if (K < 2)              stop("y must have at least two different levels")
    if (K > 10)          warning("more than 10 classes; is `y' categorical?")

    ## Calling the binary or the multiclass version
    if (K==2){output <- cv.binary(x, y, v, mfinal, presel, estimate, verbose)}
    if (K>2) {output <- cv.multic(x, y, v, mfinal, presel, estimate, verbose)}

    ## Output
    output
  }

## FOR BINARY PROBLEMS
cv.binary <- function(x, y, v, mfinal, presel, estimate, verbose)
  {    
    ## Number of samples and genes
    nsamples  <- length(y)
    ngenes    <- dim(x)[2]

    ## Defining the output variable
    ptest                     <- matrix(0, nsamples, mfinal)
    if (estimate>0) {likeli   <- matrix(0, nsamples, mfinal)}
    
    ## Predictions with LogitBoost
    for (i in 1:v)
      {
        test         <- v*(0:floor(nsamples/v))+i
        test         <- test[test<nsamples+1]
        lern         <- (1:nsamples)[-test]  
        xlearn       <- x[lern, ]
        ylearn       <- y[lern]
        xtest        <- x[test,,drop = FALSE]
        output       <- logitboost(xlearn, ylearn, xtest, mfinal, presel,
                                   estimate, verbose)
        ptest[test,] <- output$probs
        if(estimate>0) {likeli[test,]   <- output$loglikeli}
        if(verbose)    {print(paste("This was run number", i, "of", v))}
      }

    ## Output
    out   <- list(probs = ptest)
    if (estimate>0) {out   <- list(probs = ptest, loglikeli = likeli)}
    out
  }


## FOR MULTICLASS PROBLEMS
cv.multic <- function(x, y, v, mfinal, presel, estimate, verbose)
  {
    ## Number of samples, genes and classes
    nsamples  <- length(y)
    ngenes    <- dim(x)[2]
    K         <- nlevels(as.factor(y))

    ## Defining the output variable
    ptest                     <- array(0, c(nsamples, mfinal, K))
    if (estimate>0) {likeli   <- array(0, c(nsamples, mfinal, K))}
    
    ## Cross validation
    for (i in 1:v)
      {
        test      <- v*(0:floor(nsamples/v))+i
        test      <- test[test<nsamples+1]
        learn     <- (1:nsamples)[-test]
        xlearn    <- x[learn, ]
        ylearn    <- y[learn]
        xtest     <- x[test, , drop=FALSE]
        for (k in 0:(K-1))
          {
            output             <- logitboost(xlearn, (ylearn==k)*1, xtest,
                                  mfinal, presel, estimate, verbose)
            ptest[test,,(k+1)] <- output$probs
            if(estimate>0) {likeli[test,,(k+1)] <- output$loglikeli}
          }
        if(verbose) {print(paste("This was run number", i, "of", v))}
      }

    ## Output
    out <- list(probs=ptest)
    if (estimate>0) {out <- list(probs=ptest, loglikeli=likeli)}
    out
  }
