## Extend ShortRead

## an additional filter
chastityFilter <- function(.name="Illumina Chastity Filter") 
{
    srFilter(function(x) {
        if(any(rownames(varMetadata(alignData(x))) == "filtering")) {
            alignData(x)[["filtering"]] == "Y"
        } else {
            warning(paste("The '", .name, "' filter is only valid for Illumina reads.", sep=""))
            TRUE
        } 
    }, name = .name)
}

## de-multiplex multiplexed libs
setGeneric("demultiplex",
    function(obj, indexes=NULL, indexes.qty=12, index.length=6,
             edit.dist=2, type=c("independant","within"))
        standardGeneric("demultiplex")
)
setMethod("demultiplex", "AlignedRead",
    function(obj, indexes=NULL, indexes.qty=12, index.length=6, edit.dist=2,
             type=c("independant", "within"))
    {
        type <- match.arg(type)

        ## find top indexes.qty indexes if missing
        if (is.null(indexes)) {
            indexes <-
              switch(type,
                     independant =
                     head(names(sort(table(alignData(obj)[["multiplexIndex"]]),
                                     decreasing=TRUE)), indexes.qty),
                     within =
                     head(names(sort(oligonucleotideFrequency(narrow(sread(obj),
                                                                     start = 1L,
                                                                     width = index.length),
                                                              index.length,
                                                              simplify.as = "collapsed"),
                                     decreasing=TRUE)), indexes.qty))
        }

        ## get the barcodes, according to a certain size
        barcodes <-
          switch(type,
                 independant =
                 DNAStringSet(as.character(alignData(obj)[["multiplexIndex"]])),
                 within = narrow(sread(obj), start = 1L, width = index.length))

        ## just for illumina
        if (type == "independant" && any(nchar(barcodes) > index.length))
            barcodes <- narrow(barcodes, start = 1L, width = index.length)

        ## calculate the edit distance
        uniqueBarcodes <- unique(barcodes)

        negDist <-
          - t(do.call(rbind,
                      lapply(indexes, neditStartingAt, subject=uniqueBarcodes,
                             fixed=FALSE)))
        dimnames(negDist) <- list(as.character(uniqueBarcodes), indexes)

        ok <- which(apply(negDist, 1, max) >= - edit.dist)
        if (length(ok) == 0) {
            stop("no reads have barcodes within 'edit.dist'")
        } else if (length(ok) < length(obj)) {
            warning("dropping ", length(obj) - length(ok),
                    " reads with non-matching barcodes")
            uniqueBarcodes <- uniqueBarcodes[ok]
            negDist <- negDist[ok, , drop=FALSE]
        }

        firstMax <- max.col(negDist, ties.method="first")
        lastMax <- max.col(negDist, ties.method="last")
        ambiguous <- sum(firstMax != lastMax)
        if (ambiguous > 0) {
            warning("randomly placing ", ambiguous,
                    " reads that match more than one barcode equally")
            firstMax <- max.col(negDist, ties.method="random")
        }

        ## split AlignedRead by barcode
        mapping <- split(as.character(uniqueBarcodes), indexes[firstMax])
        objIdx <- split(seq_len(length(obj)), as.character(barcodes))
        select <-
          lapply(mapping, function(x)
                 obj[sort(unlist(objIdx[x], use.names=FALSE))])

        ## decompose reads
        list(reads =
             lapply(select, narrow, start = index.length + 1L),
             barcodes =
             lapply(select, function(x)
                    narrow(sread(x), start = 1L, width = index.length)))
    }
)
