## ----setup-------------------------------------------------------------------- library(MungeSumstats) ## ----eval=FALSE, message=TRUE------------------------------------------------- # eduAttainOkbayPth <- system.file("extdata","eduAttainOkbay.txt", # package="MungeSumstats") # reformatted <- # MungeSumstats::format_sumstats(path=eduAttainOkbayPth, # ref_genome="GRCh37") ## ----echo=FALSE--------------------------------------------------------------- #don't run time intensive checks eduAttainOkbayPth <- system.file("extdata","eduAttainOkbay.txt", package="MungeSumstats") reformatted <- MungeSumstats::format_sumstats(path=eduAttainOkbayPth, on_ref_genome = FALSE, strand_ambig_filter = FALSE, bi_allelic_filter = FALSE, allele_flip_check = FALSE, ref_genome="GRCh37") ## ----message=TRUE------------------------------------------------------------- #save ALS GWAS from the ieu open GWAS project to a temp directory ALSvcfPth <- system.file("extdata","ALSvcf.vcf", package="MungeSumstats") ## ----eval=FALSE--------------------------------------------------------------- # reformatted_vcf <- # MungeSumstats::format_sumstats(path=ALSvcfPth, # ref_genome="GRCh37") ## ----eval=FALSE, message=FALSE------------------------------------------------ # #set # reformatted_vcf_2 <- # MungeSumstats::format_sumstats(path=ALSvcfPth, # ref_genome="GRCh37", # log_folder_ind=TRUE, # imputation_ind=TRUE, # log_mungesumstats_msgs=TRUE) ## ----echo=FALSE,message=FALSE------------------------------------------------- #don't run time intensive checks reformatted_vcf_2 <- MungeSumstats::format_sumstats(path=ALSvcfPth, ref_genome="GRCh37", log_folder_ind=TRUE, imputation_ind=TRUE, log_mungesumstats_msgs=TRUE, on_ref_genome = FALSE, strand_ambig_filter = FALSE, bi_allelic_filter = FALSE, allele_flip_check = FALSE) ## ----message=TRUE------------------------------------------------------------- names(reformatted_vcf_2) ## ----message=TRUE------------------------------------------------------------- print(reformatted_vcf_2$log_files$snp_bi_allelic) ## ----message=FALSE,eval=FALSE------------------------------------------------- # #set # reformatted_vcf_2 <- # MungeSumstats::format_sumstats(path=ALSvcfPth, # ref_genome="GRCh37", # log_folder_ind=TRUE, # imputation_ind=TRUE, # log_mungesumstats_msgs=TRUE, # return_data=TRUE, # return_format="GRanges") ## ----message=FALSE,eval=FALSE------------------------------------------------- # #set # reformatted_vcf_2 <- # MungeSumstats::format_sumstats(path=ALSvcfPth, # ref_genome="GRCh37", # write_vcf=TRUE, # save_format ="openGWAS") ## ----message=FALSE,eval=FALSE------------------------------------------------- # # Pass path to Educational Attainment Okbay sumstat file to a temp directory # eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", # package = "MungeSumstats") # ALSvcfPth <- system.file("extdata","ALSvcf.vcf", package="MungeSumstats") # sumstats_list <- list(ss1 = eduAttainOkbayPth, ss2 = ALSvcfPth) # # ref_genomes <- MungeSumstats::get_genome_builds(sumstats_list = sumstats_list) # ## ----------------------------------------------------------------------------- sumstats_dt <- MungeSumstats::formatted_example() sumstats_dt_hg38 <- MungeSumstats::liftover(sumstats_dt = sumstats_dt, ref_genome = "hg19", convert_ref_genome = "hg38") knitr::kable(head(sumstats_dt_hg38)) ## ----------------------------------------------------------------------------- eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", package = "MungeSumstats") formatted_path <- tempfile(fileext = "eduAttainOkbay_standardised.tsv.gz") #### 1. Read in the data and standardise header names #### dat <- MungeSumstats::read_sumstats(path = eduAttainOkbayPth, standardise_headers = TRUE) knitr::kable(head(dat)) #### 2. Write to disk as a compressed, tab-delimited, tabix-indexed file #### formatted_path <- MungeSumstats::write_sumstats(sumstats_dt = dat, save_path = formatted_path, tabix_index = TRUE, write_vcf = FALSE, return_path = TRUE) ## ----------------------------------------------------------------------------- #### Mess up some column names #### dat_raw <- data.table::copy(dat) data.table::setnames(dat_raw, c("SNP","CHR"), c("rsID","Seqnames")) #### Add a non-standard column that I want to keep the casing for #### dat_raw$Support <- runif(nrow(dat_raw)) dat2 <- MungeSumstats::standardise_header(sumstats_dt = dat_raw, uppercase_unmapped = FALSE, return_list = FALSE ) knitr::kable(head(dat2)) ## ----message=TRUE, echo=FALSE------------------------------------------------- utils::sessionInfo()