## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("mslp") # BiocManager::install("data.table") ## ----eval = FALSE------------------------------------------------------------- # #- Preprocessing the data. # #- Path to input files. # library(mslp) # P_mut <- "data_mutations_extended.txt" # P_cna <- "data_CNA.txt" # P_expr <- "data_RNA_Seq_v2_expression_median.txt" # P_z <- "data_RNA_Seq_v2_mRNA_median_Zscores.txt" # # res <- pp_tcga(P_mut, P_cna, P_expr, P_z) # # saveRDS(res$mut_data, "mut_data.rds") # saveRDS(res$expr_data, "expr_data.rds") # saveRDS(res$zscore_data, "zscore_data.rds") ## ----------------------------------------------------------------------------- require(future) require(doFuture) library(magrittr) library(mslp) library(data.table) #- mutation from TCGA datasets to computate SLP via comp_slp data(example_comp_mut) #- mutation from TCGA datasets to computate SLP via corr_slp. data(example_corr_mut) data(example_expr) data(example_z) ## ----------------------------------------------------------------------------- plan(multisession, workers = 2) res_comp <- comp_slp(example_z, example_comp_mut) saveRDS(res_comp, file = "compSLP_res.rds") plan(sequential) ## ----------------------------------------------------------------------------- plan(multisession, workers = 2) res_corr <- corr_slp(example_expr, example_corr_mut) saveRDS(res_corr, "corrSLP_res.rds") #- Filter res by importance threshold to reduce false positives. im_thresh <- 0.0016 res_f <- res_corr[im >= im_thresh] plan(sequential) ## ----------------------------------------------------------------------------- plan(multisession, workers = 2) #- Random mutations and runs mutgene <- sample(intersect(example_corr_mut$mut_entrez, rownames(example_expr)), 5) nperm <- 3 res_random <- lapply(seq_len(nperm), function(x) corr_slp(example_expr, example_corr_mut, mutgene = mutgene)) im_res <- est_im(res_random) res_f_2 <- res_corr[im >= mean(im_res$roc_thresh)] plan(sequential) ## ----eval = FALSE------------------------------------------------------------- # library(readxl) # # #- nature11003-s3.xls is available in the supplementary data of CCLE publication. # ccle <- readxl::read_excel("nature11003-s3.xls", sheet = "Table S1", skip = 2) %>% # as.data.frame %>% # set_names(gsub(" ", "_", names(.))) %>% # as.data.table %>% # .[, CCLE_name := toupper(CCLE_name)] %>% # unique # # #- Only use the Nonsynonymous Mutations. CCLE_DepMap_18Q1_maf_20180207.txt can be downloaded from the CCLE website. # #- Only need the columns of cell_line and mut_entrez. # mut_type <- c("Missense_Mutation", "Nonsense_Mutation", "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Nonstop_Mutation") # ccle_mut <- fread("CCLE_DepMap_18Q1_maf_20180207.txt") %>% # .[Variant_Classification %in% mut_type] %>% # .[, Tumor_Sample_Barcode := toupper(Tumor_Sample_Barcode)] %>% # .[, Entrez_Gene_Id := as.character(Entrez_Gene_Id)] %>% # .[, .(Tumor_Sample_Barcode, Entrez_Gene_Id)] %>% # unique %>% # setnames(c("cell_line", "mut_entrez")) # # #- Select brca cell lines # brca_ccle_mut <- ccle_mut[cell_line %in% unique(ccle[CCLE_tumor_type == "breast"])] ## ----------------------------------------------------------------------------- plan(multisession, workers = 2) #- Merge data. #- Load previous results. res_comp <- readRDS("compSLP_res.rds") res_corr <- readRDS("corrSLP_res.rds") merged_res <- merge_slp(res_comp, res_corr) #- Toy hits data. screen_1 <- merged_res[, .(slp_entrez, slp_symbol)] %>% unique %>% .[sample(nrow(.), round(.8 * nrow(.)))] %>% setnames(c(1, 2), c("screen_entrez", "screen_symbol")) %>% .[, cell_line := "cell_1"] screen_2 <- merged_res[, .(slp_entrez, slp_symbol)] %>% unique %>% .[sample(nrow(.), round(.8 * nrow(.)))] %>% setnames(c(1, 2), c("screen_entrez", "screen_symbol")) %>% .[, cell_line := "cell_2"] screen_hit <- rbind(screen_1, screen_2) #- Toy mutation data. mut_1 <- merged_res[, .(mut_entrez)] %>% unique %>% .[sample(nrow(.), round(.8 * nrow(.)))] %>% .[, cell_line := "cell_1"] mut_2 <- merged_res[, .(mut_entrez)] %>% unique %>% .[sample(nrow(.), round(.8 * nrow(.)))] %>% .[, cell_line := "cell_2"] mut_info <- rbind(mut_1, mut_2) #- Hits that are identified as SLPs. scr_res <- lapply(c("cell_1", "cell_2"), scr_slp, screen_hit, mut_info, merged_res) scr_res[lengths(scr_res) == 0] <- NULL scr_res <- rbindlist(scr_res) k_res <- cons_slp(scr_res, merged_res) #- Filter results, e.g., by kappa_value and padj. k_res_f <- k_res[kappa_value >= 0.6 & padj <= 0.1] plan(sequential) sessionInfo()