## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("fastreeR") ## ----eval=TRUE, message=FALSE------------------------------------------------- options(java.parameters="-Xmx1G") unloadNamespace("fastreeR") library(fastreeR) library(utils) library(ape) library(stats) library(grid) library(BiocFileCache) ## ----eval=TRUE---------------------------------------------------------------- bfc <- BiocFileCache::BiocFileCache(ask = FALSE) tempVcfUrl <- paste0("https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/", "1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/", "supporting/related_samples/", "ALL.chrX.shapeit2_integrated_snvindels_v2a_related_samples_27022019.", "GRCh38.phased.vcf.gz") tempVcf <- BiocFileCache::bfcquery(bfc,field = "rname", "tempVcf")$rpath[1] if(is.na(tempVcf)) { tryCatch( { tempVcf <- BiocFileCache::bfcadd(bfc,"tempVcf",fpath=tempVcfUrl)[[1]] }, error=function(cond) { tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR") }, warning=function(cond) { tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR") } ) } if(file.size(tempVcf) == 0L) { tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR") } ## ----eval=TRUE---------------------------------------------------------------- tempFastasUrls <- c( #Mycobacterium liflandii paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/", "Mycobacterium_liflandii/latest_assembly_versions/", "GCF_000026445.2_ASM2644v2/GCF_000026445.2_ASM2644v2_genomic.fna.gz"), #Pelobacter propionicus paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/", "Pelobacter_propionicus/latest_assembly_versions/", "GCF_000015045.1_ASM1504v1/GCF_000015045.1_ASM1504v1_genomic.fna.gz"), #Rickettsia prowazekii paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/", "Rickettsia_prowazekii/latest_assembly_versions/", "GCF_000022785.1_ASM2278v1/GCF_000022785.1_ASM2278v1_genomic.fna.gz"), #Salmonella enterica paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/", "Salmonella_enterica/reference/", "GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz"), #Staphylococcus aureus paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/", "Staphylococcus_aureus/reference/", "GCF_000013425.1_ASM1342v1/GCF_000013425.1_ASM1342v1_genomic.fna.gz") ) tempFastas <- list() for (i in seq(1,5)) { tempFastas[[i]] <- BiocFileCache::bfcquery(bfc,field = "rname", paste0("temp_fasta",i))$rpath[1] if(is.na(tempFastas[[i]])) { tryCatch( { tempFastas[[i]] <- BiocFileCache::bfcadd(bfc, paste0("temp_fasta",i), fpath=tempFastasUrls[i])[[1]] }, error=function(cond) { tempFastas <- system.file("extdata", "samples.fasta.gz", package="fastreeR") break }, warning=function(cond) { tempFastas <- system.file("extdata", "samples.fasta.gz", package="fastreeR") break } ) } if(file.size(tempFastas[[i]]) == 0L) { tempFastas <- system.file("extdata", "samples.fasta.gz", package="fastreeR") break } } ## ----echo=TRUE, fig.cap="Sample statistics from vcf file", fig.wide=TRUE------ myVcfIstats <- fastreeR::vcf2istats(inputFile = tempVcf) plot(myVcfIstats[,7:9]) ## ----eval=TRUE---------------------------------------------------------------- myVcfDist <- fastreeR::vcf2dist(inputFile = tempVcf, threads = 2) ## ----echo=TRUE, fig.cap="Histogram of distances from vcf file", fig.wide=TRUE---- graphics::hist(myVcfDist, breaks = 100, main=NULL, xlab = "Distance", xlim = c(0,max(myVcfDist))) ## ----echo=TRUE, fig.cap="Tree from vcf with fastreeR", fig.wide=TRUE---------- myVcfTree <- fastreeR::dist2tree(inputDist = myVcfDist) plot(ape::read.tree(text = myVcfTree), direction = "down", cex = 0.3) ape::add.scale.bar() ape::axisPhylo(side = 2) ## ----echo=TRUE, fig.cap="Tree from vcf with fastreeR", fig.wide=TRUE---------- myVcfTree <- fastreeR::vcf2tree(inputFile = tempVcf, threads = 2) plot(ape::read.tree(text = myVcfTree), direction = "down", cex = 0.3) ape::add.scale.bar() ape::axisPhylo(side = 2) ## ----echo=TRUE, fig.cap="Tree from vcf with stats::hclust", fig.wide=TRUE----- myVcfTreeStats <- stats::hclust(myVcfDist) plot(myVcfTreeStats, ann = FALSE, cex = 0.3) ## ----eval=TRUE---------------------------------------------------------------- myVcfClust <- fastreeR::dist2clusters(inputDist = myVcfDist, cutHeight = 0.067) if (length(myVcfClust) > 1) { tree <- myVcfClust[[1]] clusters <- myVcfClust[[2]] tree clusters } ## ----eval=TRUE---------------------------------------------------------------- myFastaDist <- fastreeR::fasta2dist(tempFastas, kmer = 6) ## ----eval=FALSE--------------------------------------------------------------- # myFastaDist <- fastreeR::fasta2dist( # system.file("extdata", "samples.fasta.gz", package="fastreeR"), kmer = 6) ## ----echo=TRUE, fig.cap="Histogram of distances from fasta file",fig.wide=TRUE---- graphics::hist(myFastaDist, breaks = 100, main=NULL, xlab="Distance", xlim = c(0,max(myFastaDist))) ## ----echo=TRUE, fig.cap="Tree from fasta with fastreeR", fig.wide=TRUE-------- myFastaTree <- fastreeR::dist2tree(inputDist = myFastaDist) plot(ape::read.tree(text = myFastaTree), direction = "down", cex = 0.3) ape::add.scale.bar() ape::axisPhylo(side = 2) ## ----echo=TRUE, fig.cap="Tree from fasta with stats::hclust", fig.wide=TRUE---- myFastaTreeStats <- stats::hclust(myFastaDist) plot(myFastaTreeStats, ann = FALSE, cex = 0.3) ## ----setup-------------------------------------------------------------------- utils::sessionInfo()