/*
 * Copyright R. Gentleman, W. Huber 2003-2004, all rights reserved
 *
 */

#include <R.h>
#include <Rinternals.h>
#include "R_ext/Arith.h"
#include "R_ext/Error.h"
#include "R_ext/Applic.h" /* machar */

#include <string.h>
#include <stdlib.h>

/* error messages should not be longer than that! */
char errmess[256];

/*--------------------------*/
/*  the complementary base  */
/*--------------------------*/
char compbase(char c) {
  char bases[] = "TACGNtacgn";
  char compl[] = "ATGCNatgcn";
  char* p;

  p = strchr(bases, (int) c);
  if(p==NULL) {
    sprintf(errmess, "Character %c does not code for a nucleic acid.", c);
    error(errmess);
  }
  return(compl[p-bases]);
}

/*------------------------------*/
/*  the non-complementary base  */
/*  for SNPs at MM position     */
/*------------------------------*/
char ncompbase(char c) {
  char bases[] = "TACGNtacgn";
  char compl[] = "CGTANcgtan";
  char* p;

  p = strchr(bases, (int) c);
  if(p==NULL) {
    sprintf(errmess, "Character %c does not code for a nucleic acid.", c);
    error(errmess);
  }
  return(compl[p-bases]);
}

/*------------------------------------------*/
/* R interface                              */
/* reverse all elements of the input string */
/*------------------------------------------*/
SEXP MP_revstring(SEXP x)
{
  SEXP r;
  char *rev;
  int i, j, k, c, n;

  if( !isString(x) )
    error("argument must be a string");

  n = length(x);

  PROTECT(r = allocVector(STRSXP, n) );
  for(k=0; k<n; k++)
    SET_STRING_ELT(r, k, duplicate(STRING_ELT(x, k)));

  for(k=0; k<n; k++ ) {
    rev = CHAR(STRING_ELT(r, k));
    for(i = 0, j=strlen(rev)-1; i<j; i++, j--) {
	c = rev[i];
        rev[i] = rev[j];
        rev[j] = c;
    }
  }
  UNPROTECT(1);
  return(r);
} 

/*------------------------------------------------*/
/* replace an element with its complementary base */
/*------------------------------------------------*/
SEXP MP_complementSeq(SEXP x, SEXP start, SEXP stop)
{
  SEXP r;
  char *rev;
  int i, i0, i1, imax, k, n;

  /* Bureaucracy */
  if (!isString(x))
    error("'x' must be a string.");
  if (!isInteger(start) || length(start)!=1)
    error("'start' must be an integer variable of length 1.");
  if (!isInteger(stop) || length(stop)!=1)
    error("'stop' must be an integer variable of length 1.");
  i0 = *INTEGER(start);
  i1 = *INTEGER(stop);
  if (i0 < 1)
    error("'start' must be >=1.");
  if (i1 < 0)
    error("'stop' must be >=0.");

  n  = length(x);
  
  PROTECT(r = allocVector(STRSXP, n) );
  for(k=0; k<n; k++)
    SET_STRING_ELT(r, k, duplicate(STRING_ELT(x, k)));

  for(k=0; k<n; k++ ) {
    rev  = CHAR(STRING_ELT(r, k));
    imax = strlen(rev);
    if ((i1 > 0) & (i1 < imax))
      imax = i1;
    for(i = i0-1; i<imax; i++) {
      rev[i] = compbase(rev[i]);
    } 
  }
  UNPROTECT(1);
  return(r);
}

/*----------------------------------------------------*/
/* replace an element with its non-complementary base */
/* used for SNPs at 13th position                     */
/*----------------------------------------------------*/
SEXP MP_ncomplementSeq(SEXP x, SEXP start, SEXP stop)
{
  SEXP r;
  char *rev;
  int i, i0, i1, imax, k, n;

  /* Bureaucracy */
  if (!isString(x))
    error("'x' must be a string.");
  if (!isInteger(start) || length(start)!=1)
    error("'start' must be an integer variable of length 1.");
  if (!isInteger(stop) || length(stop)!=1)
    error("'stop' must be an integer variable of length 1.");
  i0 = *INTEGER(start);
  i1 = *INTEGER(stop);
  if (i0 < 1)
    error("'start' must be >=1.");
  if (i1 < 0)
    error("'stop' must be >=0.");

  n  = length(x);
  
  PROTECT(r = allocVector(STRSXP, n) );
  for(k=0; k<n; k++)
    SET_STRING_ELT(r, k, duplicate(STRING_ELT(x, k)));

  for(k=0; k<n; k++ ) {
    rev  = CHAR(STRING_ELT(r, k));
    imax = strlen(rev);
    if ((i1 > 0) & (i1 < imax))
      imax = i1;
    for(i = i0-1; i<imax; i++) {
      rev[i] = ncompbase(rev[i]);
    } 
  }
  UNPROTECT(1);
  return(r);
}

/*------------------------------------------------*/
/* get the CGAT content of a sequence             */
/*------------------------------------------------*/
SEXP MP_basecontent(SEXP x)
{
  SEXP rv, rownames, colnames, dimnames, dim;
  char *seq;
  int i, j, n, ia, ic, ig, it;

  if( !isString(x) )
    error("argument must be a string");

  n = length(x);
  PROTECT(rv = allocVector(INTSXP, n*4));

  for(i=0; i<n; i++) {
    seq = CHAR(STRING_ELT(x, i));
    ia = ic = ig = it = 0;

    for(j=0; j<strlen(seq); j++) {
      switch(seq[j]) {
      case 'a': 
      case 'A':
	ia++;
	break;
      case 't':
      case 'T':
	it++;
	break;
      case 'c':
      case 'C':
	ic++;
	break;
      case 'g':
      case 'G':
	ig++;
	break;
      default:
	sprintf(errmess, "Unknown base %c in row %d.", seq[j], i);
	error(errmess);
      }
    }
    INTEGER(rv)[i    ] = ia; 
    INTEGER(rv)[i+n  ] = it; 
    INTEGER(rv)[i+n*2] = ic; 
    INTEGER(rv)[i+n*3] = ig; 
  }

  /* dim */
  PROTECT(dim = allocVector(INTSXP, 2));
  INTEGER(dim)[0] = n;
  INTEGER(dim)[1] = 4;
  setAttrib(rv, R_DimSymbol, dim);

  /* dim names */
  PROTECT(colnames = allocVector(STRSXP, 4));
  SET_VECTOR_ELT(colnames, 0, mkChar("A"));
  SET_VECTOR_ELT(colnames, 1, mkChar("T"));
  SET_VECTOR_ELT(colnames, 2, mkChar("C"));
  SET_VECTOR_ELT(colnames, 3, mkChar("G"));

  /* dim names */
  PROTECT(rownames = allocVector(STRSXP, n));
  PROTECT(dimnames = allocVector(VECSXP, 2));
  SET_VECTOR_ELT(dimnames, 0, rownames);
  SET_VECTOR_ELT(dimnames, 1, colnames);
  setAttrib(rv, R_DimNamesSymbol, dimnames);

  UNPROTECT(5);
  return(rv);
}

/*------------------------------------------------*/
/* get longest consecutive stretch of consecutive 
   letters 
/*------------------------------------------------*/
SEXP MP_longestConsecutive(SEXP x, SEXP letter)
{
  int i, j, ncur, nmax;
  char *pc, *seq;
  char c;
  SEXP rv;

  /* Check and preprocess function arguments */
  if (!isString(x))
    error("'x' must be a string.");

  if (!isString(letter) || length(letter)!=1)
    error("'letter' must be a character variable of length 1.");
  pc = CHAR(STRING_ELT(letter, 0));
  if(strlen(pc)!=1) {
      sprintf(errmess, "'letter' must contain exactly one character but contains %d.", 
          strlen(pc));
      error(errmess);
  }
  c = *pc;

  PROTECT(rv = allocVector(INTSXP, length(x)));

  for(i=0; i<length(x); i++) {
    seq = CHAR(STRING_ELT(x, i));
    nmax = ncur = 0;
    for(j=0; j<strlen(seq); j++) {
      if(seq[j]==c) {
        ncur++;
        if(ncur>nmax)
	  nmax=ncur;
      } else {
	ncur=0;
      }
    }
    INTEGER(rv)[i] = nmax;
  }

  UNPROTECT(1);
  return(rv);
}
