/*
 * Rdefines.h is needed for the SEXP typedef, for the error(), INTEGER(),
 * GET_DIM(), LOGICAL(), NEW_INTEGER(), PROTECT() and UNPROTECT() macros,
 * and for the NULL_USER_OBJECT and NA_INTEGER constant symbols.
 */
#include <Rdefines.h>

/*
 * R_ext/Rdynload.h is needed for the R_CallMethodDef typedef and the
 * R_registerRoutines() prototype.
 */
#include <R_ext/Rdynload.h>

/*
 * Biostrings_interface.h is needed for the DNAencode(), get_XString_asRoSeq(),
 * init_match_reporting(), report_match() and reported_matches_asSEXP()
 * protoypes, and for the COUNT_MRMODE and START_MRMODE constant symbols.
 */
#include "Biostrings_interface.h"


/*
 * Table used for fast look up between A, C, G, T internal codes and the
 * corresponding 0-based row indice (the row offset) in the PWM:
 *   A internal code     ->  0
 *   C internal code     ->  1
 *   G internal code     ->  2
 *   T internal code     ->  3
 *   other internal code -> -1
 */
static int DNAcode2PWMrowoffset[256];

static void init_DNAcode2PWMrowoffset()
{
	int i;

	for (i = 0; i < 256; i++)
		DNAcode2PWMrowoffset[i] = -1;
	DNAcode2PWMrowoffset[(unsigned char) DNAencode('A')] = 0;
	DNAcode2PWMrowoffset[(unsigned char) DNAencode('C')] = 1;
	DNAcode2PWMrowoffset[(unsigned char) DNAencode('G')] = 2;
	DNAcode2PWMrowoffset[(unsigned char) DNAencode('T')] = 3;
	return;
}

static int compute_score(const int *pwm, int pwm_ncol, const char *S, int nS, int pwm_shift)
{
	int score, i, rowoffset;

	S += pwm_shift;
	nS -= pwm_shift;
	if (pwm_shift < 0 || nS < pwm_ncol)
		error("trying to compute the score from an invalid starting position");
	score = 0;
	for (i = 0; i < pwm_ncol; i++, pwm += 4, S++) {
		rowoffset = DNAcode2PWMrowoffset[(unsigned char) *S];
		if (rowoffset == -1)
			continue;
		score += pwm[rowoffset];
	}
	return score;
}

/*
 * --- .Call ENTRY POINT ---
 * PWM_score() arguments are assumed to be:
 *   pwm: the Position Weight Matrix (integer matrix with row names A, C, G and T)
 *   subject: a DNAString object containing the subject sequence
 *   start: an integer vector of arbitrary length (NAs accepted)
 */
SEXP PWM_score(SEXP pwm, SEXP subject, SEXP start)
{
	error("IMPLEMENT ME!"); // remove when the function is ready

	// use GET_DIM() and INTEGER() to check that 'pwm' has 4 rows

	// get and store its number of cols in a local variable

	// use get_XString_asRoSeq() from the Biostrings C interface to
	// get an RoSeq struct (Read Only Sequence) "pointing" to 'subject':
	// this struct has 2 members (.elts and .nelt), the 1st one will point
	// to the first char in 'subject' and the 2nd will contain the length
	// of the sequence

	// call init_DNAcode2PWMrowoffset()

	// allocate the "answer object" (i.e. the SEXP that will be returned
	// to R): don't forget to PROTECT it now and to UNPROTECT it later
	// _right_ before you return it

	// use a "for" loop to walk on each element in 'start': for each
	// element, call compute_score() (defined above in this file)
	// to compute the score obtained at this starting position and store
	// it in the "answer object"

	// don't forget to handle NAs in the 'start' vector

	return NULL_USER_OBJECT; // return the "answer object" instead
}

/*
 * --- .Call ENTRY POINT ---
 * match_PWM() arguments are assumed to be:
 *   pwm: the Position Weight Matrix (integer matrix with row names A, C, G and T)
 *   subject: a DNAString object containing the subject sequence
 *   min_score: an integer vector of length 1 (not NA)
 *   count_only: a logical vector of length 1 (not NA)
 */
SEXP match_PWM(SEXP pwm, SEXP subject, SEXP min_score, SEXP count_only)
{
	error("IMPLEMENT ME!"); // remove when the function is ready

	// use GET_DIM() and INTEGER() to check that 'pwm' has 4 rows

	// before you start the main loop, don't forget to:
	// - call init_DNAcode2PWMrowoffset() (like in PWM_score())
	// - call init_match_reporting() (see Biostrings_interface.h for some
	//   important note about the match reporting functions)

	// your main loop will more or less look like this:
/*
	for (n1 = 0, n2 = pwm_ncol; n2 <= nS; n1++, n2++) {
		if (compute_score(...) >= minscore) {
			// The second arg (end) is ignored in match reporting
			// modes COUNT_MRMODE and START_MRMODE
			report_match(n1 + 1, -1);
		}
	}
*/

	// The SEXP returned by reported_matches_asSEXP() is UNPROTECTED
	// but you don't have to PROTECT it here since you are returning it
	// right away.
	return reported_matches_asSEXP();
}

/*
 * -- REGISTRATION OF THE .Call ENTRY POINTS ---
 */
static const R_CallMethodDef callMethods[] = {
	{"PWM_score", (DL_FUNC) &PWM_score, 3},
	{"match_PWM", (DL_FUNC) &match_PWM, 4},
	{NULL, NULL, 0}
};

// Note that the package name contains a . but we must replace it by a _ here
// because . is not allowed in C identifiers.
void R_init_simpleMatchPWM_stub(DllInfo *info)
{
	R_registerRoutines(info, NULL, callMethods, NULL, NULL);
}

