owd <- setwd("/home/mtmorgan/proj/a/1000g/")
library(Rsamtools)
library(ShortRead)

makeStreamBatches <-
    function(file, seqname, length.out, ...)
{
    len <-
        scanBamHeader(file)[[1]][["targets"]][[seqname]] + 1
    cuts <-
        seq.int(1, len, length.out=length.out)
    start <- cuts[-length(cuts)]
    end <- cuts[-1] - 1
    which <- GRanges(seqname,
                     IRanges(start, end),
                     strand="*")
    ScanBamParam(which=which, ...)
}

streamBam <-
    function(file, index = file, ...,
             MAP, REDUCE=append,
             param=ScanBamParam())
{
    ## FIXME reads overlapping batches are visited twice
    len <- length(bamWhich(param)[[1]])
    idx <- seq_len(len)
    param0 <- param
    progress <- txtProgressBar(1, len)
    mapped <- Map(function(idx) {
        setTxtProgressBar(progress, idx)
        bamWhich(param)[[1]] <-
            bamWhich(param0)[[1]][idx]
        block <- scanBam(file, index, ...,
                         param=param)[[1]]
        MAP(block)
    }, idx)
    Reduce(REDUCE, mapped)
}

file <- "NA19240.chrom6.SLX.maq.SRP000032.2009_07.bam"

param <- makeStreamBatches(file, "6", 1000,
                         what=c("seq", "qual"))
bamWhich(param)[[1]] <-
    bamWhich(param)[[1]][1:10]

gcEnriched <-
    function(x)
{
    sr <- ShortReadQ(x[["seq"]],
                     FastqQuality(as(x[["qual"]], "BStringSet")))
    alph <- alphabetFrequency(sread(sr))
    gc <-  rowSums(alph[,c("G", "C")]) /
               rowSums(alph)
    sr[ gc > .8 ]
}

srq <- streamBam(file, param=param,
                 MAP=gcEnriched)
srq

table(width(srq))

srq36 <- srq[ width(srq) == 36 ]
abc <- alphabetByCycle(sread(srq36),
                       collapse=TRUE)
matplot(t(abc[1:4,]))

setwd(owd)
