## ----LoadFunctions, echo=FALSE, message=FALSE, warning=FALSE, results='hide'---- library(knitr) opts_chunk$set(error = FALSE) library(EventPointer) library(dplyr) library(kableExtra) ## ----style, echo = FALSE, results = 'asis'------------------------------------ ##BiocStyle::markdown() ## ----eval=FALSE--------------------------------------------------------------- # # library(BiocManager) # # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # # BiocManager::install("EventPointer") ## ----CDFGTF, eval=FALSE, warning=FALSE, collapse=TRUE------------------------- # # # Set input variables # PathFiles<-system.file("extdata",package="EventPointer") # DONSON_GTF<-paste(PathFiles,"/DONSON.gtf",sep="") # PSRProbes<-paste(PathFiles,"/PSR_Probes.txt",sep="") # JunctionProbes<-paste(PathFiles,"/Junction_Probes.txt",sep="") # Directory<-tempdir() # array<-"HTA-2_0" # # # Run the function # # CDFfromGTF(input="AffyGTF",inputFile=DONSON_GTF, # PSR=PSRProbes,Junc=JunctionProbes, # PathCDF=Directory,microarray=array) # ## ----aroma, eval=FALSE-------------------------------------------------------- # # # Simple example of Aroma.Affymetrix Preprocessing Pipeline # # verbose <- Arguments$getVerbose(-8); # timestampOn(verbose); # projectName <- "Experiment" # cdfGFile <- "EP_HTA-2_0,r" # cdfG <- AffymetrixCdfFile$byChipType(cdfGFile) # cs <- AffymetrixCelSet$byName(projectName, cdf=cdfG) # bc <- NormExpBackgroundCorrection(cs, method="mle", tag=c("*","r11")); # csBC <- process(bc,verbose=verbose,ram=20); # qn <- QuantileNormalization(csBC, typesToUpdate="pm"); # csN <- process(qn,verbose=verbose,ram=20); # plmEx <- ExonRmaPlm(csN, mergeGroups=FALSE) # fit(plmEx, verbose=verbose) # cesEx <- getChipEffectSet(plmEx) # ExFit <- extractDataFrame(cesEx, addNames = TRUE) ## ----EP_arrays, eval=TRUE----------------------------------------------------- data(ArraysData) Dmatrix<-matrix(c(1,1,1,1,0,0,1,1),nrow=4,ncol=2,byrow=FALSE) Cmatrix<-t(t(c(0,1))) EventsFound<-paste(system.file("extdata",package="EventPointer"),"/EventsFound.txt",sep="") Events<-EventPointer(Design=Dmatrix, Contrast=Cmatrix, ExFit=ArraysData, Eventstxt=EventsFound, Filter=FALSE, Qn=0.25, Statistic="LogFC", PSI=TRUE) ## ----EP_Arrays_Res_Table, echo=FALSE------------------------------------------ kable(Events[1:5,],digits=5,row.names=TRUE,align="c",caption = "Table 1: EventPointer Arrays results") ## ----Arrays_IGV, eval=FALSE, collapse=TRUE------------------------------------ # # # Set Input Variables # # DONSON_GTF<-paste(PathFiles,"/DONSON.gtf",sep="") # PSRProbes<-paste(PathFiles,"/PSR_Probes.txt",sep="") # JunctionProbes<-paste(PathFiles,"/Junction_Probes.txt",sep="") # Directory<-tempdir() # EventsFound<-paste(system.file("extdata",package="EventPointer"),"/EventsFound.txt",sep="") # array<-"HTA-2_0" # # # # Generate Visualization files # # EventPointer_IGV(Events[1,,drop=FALSE],"AffyGTF",DONSON_GTF,PSRProbes,JunctionProbes,Directory,EventsFound,array) # # ## ----PrepareBam, eval=FALSE, collapse=TRUE------------------------------------ # # Obtain the samples and directory for .bam files # # # the object si contains example sample information from the SGSeq R package # # use ?si to see the corresponding documentation # # BamInfo<-si # Samples<-BamInfo[,2] # PathToSamples <- system.file("extdata/bams", package = "SGSeq") # PathToGTF<-paste(system.file("extdata",package="EventPointer"),"/FBXO31.gtf",sep="") # # # Run PrepareBam function # SG_RNASeq<-PrepareBam_EP(Samples=Samples, # SamplePath=PathToSamples, # Ref_Transc="GTF", # fileTransc=PathToGTF, # cores=1) ## ----EventDetection, eval=TRUE------------------------------------------------ # Run EventDetection function data(SG_RNASeq) TxtPath<-tempdir() AllEvents_RNASeq<-EventDetection(SG_RNASeq,cores=1,Path=TxtPath) ## ----ListofLists, eval=FALSE-------------------------------------------------- # Events[[i]][[j]] ## ----EP_RNASeq, eval=TRUE----------------------------------------------------- Dmatrix<-matrix(c(1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1),ncol=2,byrow=FALSE) Cmatrix<-t(t(c(0,1))) Events <- EventPointer_RNASeq(AllEvents_RNASeq,Dmatrix,Cmatrix,Statistic="LogFC",PSI=TRUE) ## ----EP_RNASeq_Res_Table, echo=FALSE------------------------------------------ kable(Events[1:5,],digits=5,row.names=TRUE,align="c",caption = "Table 2: EventPointer RNASeq results") ## ----RNAS_IGV, eval=TRUE, collapse=TRUE--------------------------------------- # IGV Visualization EventsTxt<-paste(system.file("extdata",package="EventPointer"),"/EventsFound_RNASeq.txt",sep="") PathGTF<-tempdir() EventPointer_RNASeq_IGV(Events,SG_RNASeq,EventsTxt,PathGTF) ## ----GTFfGTF, eval=TRUE, warning=FALSE, collapse=TRUE------------------------- # Set input variables PathFiles<-system.file("extdata",package="EventPointer") inputFile <- paste(PathFiles,"/gencode.v24.ann_2genes.gtf",sep="") Transcriptome <- "Gencode24_2genes" Pathtxt <- tempdir() ## ----GTFfGTF_2, eval=FALSE, warning=FALSE, collapse=TRUE---------------------- # # Run the function # inputFile <- "/home/external_HDD/HDD_1/EventPointer/inst/extdata/gencode.v24.ann_2genes_prueba.gtf" # EventXtrans <- EventDetection_transcriptome(inputFile = inputFile, # Transcriptome = Transcriptome, # Pathtxt=Pathtxt, # cores=1) # ## ----GTFfGTFnames, eval=FALSE, warning=FALSE, collapse=TRUE------------------- # # names(EventXtrans) # ## ----getbootstrpdata, eval=TRUE, warning=FALSE, collapse=TRUE----------------- PathSamples<-system.file("extdata",package="EventPointer") PathSamples <- paste0(PathSamples,"/output") PathSamples <- dir(PathSamples,full.names = TRUE) data_exp <- getbootstrapdata(PathSamples = PathSamples,type = "kallisto") ## ----getdata_nb, eval=TRUE, warning=FALSE, collapse=TRUE---------------------- # this code chunk is an example of how to load the data from kallisto output. # the expression of the isoforms are counts PathFiles <- system.file("extdata",package="EventPointer") filesnames <- dir(paste0(PathFiles,"/output")) PathFiles <- dir(paste0(PathFiles,"/output"),full.names = TRUE) dirtoload <- paste0(PathFiles,"/","abundance.tsv") RNASeq <- read.delim(dirtoload[1],sep = "\t", colClasses = c(NA,"NULL","NULL",NA,"NULL")) for (n in 2:length(dirtoload)){ RNASeq[,n+1] <- read.delim(dirtoload[n],sep = '\t', colClasses = c('NULL','NULL','NULL',NA,'NULL')) } rownames(RNASeq)<-RNASeq[,1] RNASeq<-RNASeq[,-1] colnames(RNASeq) <- filesnames ## ----check_ann_nb, eval=TRUE, warning=FALSE, collapse=TRUE-------------------- rownames(RNASeq)[1:5] EventXtrans$transcritnames[1:5] ## ----PSI_Statistic, eval=TRUE, warning=FALSE, collapse=TRUE------------------- #change rownames of RNASeq variable rownames(RNASeq) <- sapply(strsplit(rownames(RNASeq),"\\|"),function(X) return(X[1])) RNASeq<-as.matrix(RNASeq) #must be a matrix variable PSIss_nb <- GetPSI_FromTranRef(PathsxTranscript = EventXtrans, Samples = RNASeq, Bootstrap = FALSE, Filter = FALSE) PSI <- PSIss_nb$PSI Expression <- PSIss_nb$ExpEvs ## ----check_ann, eval=TRUE, warning=FALSE, collapse=TRUE----------------------- rownames(data_exp[[1]])[1:5] EventXtrans$transcritnames[1:5] ## ----psi_with_bootstrap, eval=TRUE, warning=FALSE, collapse=TRUE-------------- #change rownames of the first element of teh list data_exp rownames(data_exp[[1]]) <- sapply(strsplit(rownames(data_exp[[1]]),"\\|"),function(X) return(X[1])) PSIss <- GetPSI_FromTranRef(PathsxTranscript = EventXtrans, Samples = data_exp, Bootstrap = TRUE, Filter = FALSE) PSI <- PSIss$PSI Expression <- PSIss$ExpEvs ## ----PSI_Statistic2, eval=TRUE, warning=FALSE, collapse=TRUE------------------ # Design and contrast matrix: Design <- matrix(c(1,1,1,1,0,0,1,1),nrow=4) Contrast <- matrix(c(0,1),nrow=2) # Statistical analysis: Fit <- EventPointer_RNASeq_TranRef(Count_Matrix = Expression,Statistic = "LogFC",Design = Design, Contrast = Contrast) ## ----EP_TranRef_Res_Table, echo=FALSE----------------------------------------- kable(Fit,digits=5,row.names=FALSE,align="c",caption = "Table 3: PSI_Statistic results") ## ----ep_bootstrp_statistic, eval=TRUE, warning=FALSE, collapse=TRUE----------- Dmatrix <- cbind(1,rep(c(0,1),each=2)) Cmatrix <- matrix(c(0,1),nrow=2) Fit <- EventPointer_Bootstraps(PSI = PSI, Design = Dmatrix, Contrast = Cmatrix, cores = 1, ram = 1, nBootstraps = 10, UsePseudoAligBootstrap = TRUE) ## ----ResultTable, eval=TRUE, warning=FALSE, collapse=TRUE--------------------- ResulTable(EP_Result = Fit,coef = 1,number = 5) ## ----ep_tranref_igv, eval=TRUE, warning=FALSE, collapse=TRUE------------------ SG_List <- EventXtrans$SG_List PathEventsTxt<-system.file('extdata',package='EventPointer') PathEventsTxt <- paste0(PathEventsTxt,"/EventsFound_Gencode24_2genes.txt") PathGTF <- tempdir() EventPointer_RNASeq_TranRef_IGV(SG_List = SG_List,pathtoeventstable = PathEventsTxt,PathGTF = PathGTF) ## ----biomart, eval=FALSE, warning=FALSE, collapse=TRUE------------------------ # # library(biomaRt) # mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "mar2016.archive.ensembl.org") # mart<-useDataset("hsapiens_gene_ensembl",mart) # # mistranscritos <- EventXtrans$transcritnames # head(mistranscritos) # # ## we need to remove the ".x": # mistranscritos <- gsub("\\..*","",mistranscritos) # # Dominios <- getBM(attributes = c("ensembl_transcript_id","interpro","interpro_description"), # filters = "ensembl_transcript_id", # values = mistranscritos, # mart=mart) # # #we build the isoform x protein domain matrix # library(Matrix) # ii <- match(Dominios$ensembl_transcript_id,mistranscritos) # misDominios <- unique(Dominios[,2]) # jj <- match(Dominios[,2],misDominios) # # TxD <- sparseMatrix(i=ii,j = jj,dims = c(length(mistranscritos),length(misDominios)),x = 1) # rownames(TxD) <- mistranscritos # colnames(TxD) <- misDominios # ## ----pd_enrichmetn, eval=FALSE, warning=FALSE, collapse=TRUE------------------ # # data("TxD") # # #check same annotation for transcripts: # EventXtrans$transcritnames[1] # # [1] "ENST00000611540.4" # rownames(TxD)[1] # # [1] "ENST00000611540" # # # ## as is not the same, we change EventXtrans$transcritnames annotation # transcriptnames <- EventXtrans$transcritnames # transcriptnames <- gsub("\\..*","",transcriptnames) # EventXtrans$transcritnames <- transcriptnames # # Result_PDEA <- Protein_Domain_Enrichment(PathsxTranscript = EventXtrans, # TxD = TxD, # Diff_PSI = Fit$deltaPSI) # # ## ----pathprimer3, eval=FALSE, warning=FALSE, collapse=TRUE-------------------- # # Primer3Path <- Sys.which("primer3_core") # ## ----FindPrimers, eval=FALSE, warning=FALSE, collapse=TRUE-------------------- # # data("EventXtrans") # #From the output of EventsGTFfromTranscriptomeGTF we take the splicing graph information # SG_list <- EventXtrans$SG_List # #SG_list contains the information of the splicing graphs for each gene # #Let's supone we want to design primers for the event 1 of the gene ENSG00000254709.7 # # #We take the splicing graph information of the required gene # SG <- SG_list$ENSG00000254709.7 # # #We point the event number # EventNum <- 1 # # #Define rest of variables: # Primer3Path <- Sys.which("primer3_core") # Dir <- "C:\\PROGRA~2\\primer3\\" # # # MyPrimers_taqman <- FindPrimers(SG = SG, # EventNum = EventNum, # Primer3Path = Primer3Path, # Dir = Dir, # mygenomesequence = BSgenome.Hsapiens.UCSC.hg38::Hsapiens, # taqman = 1, # nProbes=1, # nPrimerstwo=4, # ncommonForward=4, # ncommonReverse=4, # nExons=10, # nPrimers =5, # maxLength = 1200) # ## ----EP_DesiCP_Res_Table, eval=TRUE, warning=FALSE, collapse=TRUE,echo=FALSE---- data("MyPrimers") kable(MyPrimers[1:5,],digits=5,row.names=FALSE,align="c",caption = "Table 4: Data.frame output of FindPrimers for conventional PCR") %>% kable_styling() %>% scroll_box(width ="660px") ## ----EP_DesiTP_Res_Table, eval=TRUE, warning=FALSE, collapse=TRUE,echo=FALSE---- data("MyPrimers_taqman") kable(MyPrimers_taqman[1:5,],digits=5,row.names=FALSE,align="c",caption = "Table 5: Data.frame output of FindPrimers for conventional PCR") %>% kable_styling() %>% scroll_box(width ="660px") ## ----CDFGTF_MP, eval=FALSE, warning=FALSE, collapse=TRUE---------------------- # # # Set input variables # PathFiles<-system.file("extdata",package="EventPointer") # DONSON_GTF<-paste(PathFiles,"/DONSON.gtf",sep="") # PSRProbes<-paste(PathFiles,"/PSR_Probes.txt",sep="") # JunctionProbes<-paste(PathFiles,"/Junction_Probes.txt",sep="") # Directory<-tempdir() # array<-"HTA-2_0" # # # Run the function # # CDFfromGTF_Multipath(input="AffyGTF",inputFile=DONSON_GTF, # PSR=PSRProbes,Junc=JunctionProbes, # PathCDF=Directory,microarray=array,paths=3) # ## ----EventDetection_MP, eval=FALSE-------------------------------------------- # # Run EventDetection function # data(SG_RNASeq) # TxtPath<-tempdir() # AllEvents_RNASeq_MP<-EventDetectionMultipath(SG_RNASeq,cores=1,Path=TxtPath,paths=3) ## ----ListofLists_MP, eval=FALSE----------------------------------------------- # Events[[i]][[j]] ## ----reclassification, eval=TRUE---------------------------------------------- #load splicing graph data("SG_reclassify") #load table with info of the events PathFiles<-system.file("extdata",package="EventPointer") inputFile <- paste(PathFiles,"/Events_found_class.txt",sep="") EventTable <- read.delim(file=inputFile) #this table has the information of 5 complex events. EventTable_new <- Events_ReClassification(EventTable = EventTable, SplicingGraph = SG_reclassify) ## ----PSI_ADV, eval=TRUE, collapse=TRUE---------------------------------------- # Microarrays (two paths) data(ArraysData) PSI_Arrays_list<-EventPointer:::getPSI(ArraysData) PSI_Arrays <- PSI_Arrays_list$PSI Residuals_Arrays <- PSI_Arrays_list$Residuals # Microarrays (Multi-Path) data(ArrayDatamultipath) PSI_MP_Arrays_list <- EventPointer:::getPSImultipath(ArrayDatamultipath) PSI_multipath_Arrays <- PSI_MP_Arrays_list$PSI Residuals_multipath_Arrays <- PSI_MP_Arrays_list$Residuals # RNASeq (two paths) data(AllEvents_RNASeq) PSI_RNASeq_list<-EventPointer:::getPSI_RNASeq(AllEvents_RNASeq) PSI_RNASeq <- PSI_RNASeq_list$PSI Residuals_RNASeq <- PSI_RNASeq_list$Residuals # RNASeq (Multi-Path) data(AllEvents_RNASeq_MP) PSI_MP_RNASeq_list <- EventPointer:::getPSI_RNASeq_MultiPath(AllEvents_RNASeq_MP) PSI_multipath_RNASeq <- PSI_MP_RNASeq_list$PSI Residuals_multipath_RNASeq <- PSI_MP_RNASeq_list$Residuals ## ----PSI_ADV2, eval=TRUE, collapse=TRUE--------------------------------------- Dmatrix<-matrix(c(1,1,1,1,0,0,1,1),nrow=4,ncol=2,byrow=FALSE) Cmatrix<-t(c(0,1)) table <- PSI_Statistic(PSI = PSI_Arrays,Design = Dmatrix,Contrast = Cmatrix,nboot = 20) ## ----PSI_ADV3, eval=TRUE, collapse=TRUE--------------------------------------- Dmatrix<-matrix(c(1,1,1,1,0,0,1,1),nrow=4,ncol=2,byrow=FALSE) Cmatrix<-t(t(c(0,1))) Ress <- vector("list", length = ncol(Cmatrix)) fitresiduals <- limma::lmFit(Residuals_Arrays,design = Dmatrix) fitresiduals2 <- limma::contrasts.fit(fitresiduals, Cmatrix) fitresiduals2 <- limma::eBayes(fitresiduals2) # repeated if there is more than one contrast for (jj in 1:ncol(Cmatrix)) { TopPSI <- limma::topTable(fitresiduals2, coef = jj, number = Inf)[, 1, drop = FALSE] colnames(TopPSI)<-"Residuals" Ress[[jj]] <- TopPSI } ## ----------------------------------------------------------------------------- sessionInfo()