## ----echo=FALSE, results="hide", warning=FALSE-------------------------------- suppressPackageStartupMessages({ library('variants') }) ## ----eval=FALSE--------------------------------------------------------------- # library(VariantAnnotation) # library(org.Hs.eg.db) # library(TxDb.Hsapiens.UCSC.hg19.knownGene) # library(BSgenome.Hsapiens.UCSC.hg19) # library(PolyPhen.Hsapiens.dbSNP131) ## ----eval=FALSE--------------------------------------------------------------- # if (!"BiocManager" %in% rownames(installed.packages())) # install.packages("BiocManager") # BiocManager::install("mypackage") ## ----------------------------------------------------------------------------- file <- system.file("vcf", "NA06985_17.vcf.gz", package = "variants") ## ----------------------------------------------------------------------------- hdr <- scanVcfHeader(file) info(hdr) geno(hdr) ## ----------------------------------------------------------------------------- meta(hdr) ## ----------------------------------------------------------------------------- ## get entrez ids from gene symbols genesym <- c("TRPV1", "TRPV2", "TRPV3") geneid <- select( org.Hs.eg.db, keys=genesym, keytype="SYMBOL", columns="ENTREZID" ) geneid ## ----------------------------------------------------------------------------- txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene txdb ## ----------------------------------------------------------------------------- txdb <- keepSeqlevels(txdb, "chr17") ## ----------------------------------------------------------------------------- txbygene <- transcriptsBy(txdb, "gene") ## ----------------------------------------------------------------------------- gnrng <- unlist(range(txbygene[geneid$ENTREZID]), use.names=FALSE) names(gnrng) <- geneid$SYMBOL ## ----------------------------------------------------------------------------- seqinfo(gnrng) seqlevels(gnrng) <- sub("chr", "", seqlevels(gnrng)) genome(gnrng) <- "B37" seqinfo(gnrng) ## seqlevelsStyle(gnrng) <- "NCBI" param <- ScanVcfParam(which = gnrng, info = "DP", geno = c("GT", "cPd")) param ## Extract the TRPV ranges from the VCF file vcf <- readVcf(file, param = param) ## Inspect the VCF object with the 'fixed', 'info' and 'geno' accessors vcf head(fixed(vcf)) geno(vcf) ## ----------------------------------------------------------------------------- seqinfo(vcf) genome(vcf) <- "hg19" seqlevels(vcf) <- sub("([[:digit:]]+)", "chr\\1", seqlevels(vcf)) seqinfo(vcf) ## seqlevelsStyle(vcf) <- "UCSC" vcf <- keepSeqlevels(vcf, "chr17") ## ----eval=FALSE--------------------------------------------------------------- # ## Use the 'region' argument to define the region # ## of interest. See ?locateVariants for details. # cds <- locateVariants(vcf, txdb, CodingVariants()) # five <- locateVariants(vcf, txdb, FiveUTRVariants()) # splice <- locateVariants(vcf, txdb, SpliceSiteVariants()) # intron <- locateVariants(vcf, txdb, IntronVariants()) ## ----------------------------------------------------------------------------- all <- locateVariants(vcf, txdb, AllVariants()) ## ----------------------------------------------------------------------------- ## Did any variants match more than one gene? geneByQuery <- sapply(split(all$GENEID, all$QUERYID), unique) table(lengths(geneByQuery) > 1) ## Summarize the number of variants by gene: queryByGene <- sapply(split(all$QUERYID, all$GENEID), unique) table(lengths(queryByGene) > 1) sapply(queryByGene, length) ## Summarize variant location by gene: sapply(names(queryByGene), function(nm) { d <- all[all$GENEID %in% nm, c("QUERYID", "LOCATION")] table(d$LOCATION[duplicated(d) == FALSE]) }) ## ----------------------------------------------------------------------------- aa <- predictCoding(vcf, txdb, Hsapiens) ## ----------------------------------------------------------------------------- ## Did any variants match more than one gene? aageneByQuery <- split(aa$GENEID, aa$QUERYID) table(lengths(sapply(aageneByQuery, unique)) > 1) ## Summarize the number of variants by gene: aaaqueryByGene <- split(aa$QUERYID, aa$GENEID, drop=TRUE) sapply(aaaqueryByGene, length) ## Summarize variant consequence by gene: sapply(names(aaaqueryByGene), function(nm) { d <- aa[aa$GENEID %in% nm, c("QUERYID","CONSEQUENCE")] table(d$CONSEQUENCE[duplicated(d) == FALSE]) }) ## ----eval=FALSE--------------------------------------------------------------- # browseVignettes(package="VariantAnnotation") ## ----eval=FALSE--------------------------------------------------------------- # help.start() ## ----------------------------------------------------------------------------- sessionInfo()