## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() library(knitr) opts_chunk$set(cache=TRUE, tidy=FALSE) ## ----preliminaries,message=FALSE----------------------------------------- library(Biostrings) library(ShortRead) ## ----help1--------------------------------------------------------------- showMethods(complement) ## ----Intro--------------------------------------------------------------- b <- BString("I store any set of characters!" ) d <- DNAString("GCATAT-TAC") # Creates DNAString object. r <- RNAString("GCAUAU-UAC") # Creates RNAString object. r <- RNAString(d) # Converts d into RNAString object. p <- AAString("HCWYHH") ## ----fileaccess---------------------------------------------------------- fls <- list.files(system.file("extdata", package="BiocIntro"), pattern =".txt",full=TRUE) fls ## ----data---------------------------------------------------------------- #Approach- 1 # read in the FATSA file seq <- readFasta(fls) ##Lets create a DNAStringSet which is a container for storing a set of DNAString dna <- sread(seq) #Approach-2 dna <- readDNAStringSet(fls) # let us look at the first DNAString, brca1 stored at [1] # This [[]] operation converts a DNAStringSet to DNAString brca1 <- dna[[1]] brca2 <- dna[[2]] # making the output more understandable. # A sequence in FASTA format is represented as a series of lines, each of which # usually do not exceed 80 characters. successiveViews(brca1, width=rep(50,length(dna[[1]])/50+1)) ## ----successiveViewall, eval=FALSE--------------------------------------- ## options(showHeadLines=Inf) ## successiveViews(brca1, width=rep(50,length(dna[[1]])/50+1)) ## ----reverse------------------------------------------------------------- reverse(brca1) complement(brca1) reverseComplement(brca1) ## ------------------------------------------------------------------------ translate(brca1) ## ----predef-------------------------------------------------------------- DNA_BASES DNA_ALPHABET IUPAC_CODE_MAP ## ----frequeny------------------------------------------------------------ # what are the unique letter ? uniqueLetters(brca2) alphabetFrequency(brca2) alphabetFrequency(brca2, baseOnly=TRUE) dinucleotideFrequency(brca2) trinucleotideFrequency(brca2) ## ----sol-gc-------------------------------------------------------------- gcContent <- function(x) { alf <- alphabetFrequency(x, as.prob=TRUE) sum(alf[c("G","C")]) } gcContent(brca1) gcContent(brca2) ## ----homosap-ex, message=FALSE------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) ## ----homosap-gc-ex, message=FALSE---------------------------------------- gcContent(Hsapiens[["chr17"]]) gcContent(Hsapiens[["chr13"]]) ## ----hist-gc-ex---------------------------------------------------------- chrs <- paste0("chr", c(1:22,"X","Y")) data <- sapply(chrs, function(x) gcContent(Hsapiens[[x]])) names(data) <- chrs plot(data, xlab="chromosomes",ylab="gc Frequencies", xlim=c(1,24), col="blue") abline(h=gcContent(Hsapiens[["chr17"]]),col="red") abline(h=gcContent(Hsapiens[["chr13"]]),col="orange") title(main="gc Frequecies across Human Chromosomes", col.main="blue", font.main=4) legend("topleft",c("chromsomes","brca1","brca2"), cex=0.8, col=c("blue","red","orange"), pch=21:22, lty=1:2) ## ----sessionInfo--------------------------------------------------------- sessionInfo()