## BEGIN getTabSeq.R
## A function that reads tab-delimited probe sequence
## (and other stuff) files from Affymetrix
## Modified by Benilton based on Wolfgang's code @ matchprobes


## BC: should the name of this function change?
## BC: it's the same name as the one at matchprobes
## BC: On May 26, I changed the name from
##     getProbeDataAffy to getTabSeq

getTabSeq <- function(datafile)
{  
  cat("Reading Sequence Info\n")
  if(missing(datafile)) stop("No sequence file given.\n")
#    datafile <- paste(arraytype, "_probe_tab", sep="")
  
  what = list("character", "numeric", "numeric", "numeric", "character", "character")
  head <- scan(datafile, sep="\t", quiet=TRUE, multi.line = FALSE, nlines=1, what="character")
  dat  <- scan(datafile, sep="\t", quiet=TRUE, multi.line = FALSE, skip=1,  what=what)

  if(any(unlist(head) != c("Probe Set Name", "Probe X", "Probe Y", 
     "Probe Interrogation Position", "Probe Sequence", "Target Strandedness"))) {
      mess = paste("The data file", datafile, "does not have the expected column names",
         "in its header line. Please make sure it is the right data file.\n")
      stop(mess)
    }

  for (i in which(what=="numeric")) {
    z = which(is.na(dat[[i]]))
    if(length(z)>0) 
      stop(paste("Corrupted data file: found non-number in line ", z[1],
                 " of column ", head[i], ": ", dat[z[1], i]), sep="") 
  }

  ## data frame with the probe data
  pt = data.frame(X = as.integer(dat[[2]]),  ## integer
                  Y = as.integer(dat[[3]]),  ## integer
                  feature_set_name = I(dat[[1]]),          ## character 
                  sequence = I(dat[[5]]), ## character
                  genomic_location = as.integer(dat[[4]]))        
  pt <- pt[order(pt$X,pt$Y),]
  return(pt)
}
## END getTabSeq.R
