## ----eval=TRUE,echo=TRUE,message=FALSE---------------------------------------- library(TFEA.ChIP) data( "hypoxia_DESeq", "hypoxia", package="TFEA.ChIP" ) # load example datasets hypoxia_table <- preprocessInputData( hypoxia_DESeq ) head( hypoxia_table ) head( hypoxia ) hypoxia_table <- preprocessInputData( hypoxia ) head( hypoxia_table ) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ #extract vector with names of upregulated genes Genes.Upreg <- Select_genes( hypoxia_table, min_LFC = 1 ) #extract vector with names of non-responsive genes Genes.Control <- Select_genes( hypoxia_table, min_pval = 0.5, max_pval = 1, min_LFC = -0.25, max_LFC = 0.25 ) ## ----eval=TRUE,echo=TRUE,message=FALSE---------------------------------------- #Conversion of hgnc to ENTREZ IDs GeneID2entrez( gene.IDs = c("EGLN3","NFYA","ALS2","MYC","ARNT" ) ) # To translate from mouse IDs: # GeneID2entrez( gene.IDs = c( "Hmmr", "Tlx3", "Cpeb4" ), mode = "m2h" ) # To get the equivalent human gene IDs # GeneID2entrez( gene.IDs = c( "Hmmr", "Tlx3", "Cpeb4" ), mode = "m2m" ) # To get mouse ENTREZ gene IDs ## ----eval=TRUE,echo=TRUE------------------------------------------------------ CM_list_UP <- contingency_matrix( Genes.Upreg, Genes.Control ) #generates list of contingency tables, one per dataset pval_mat_UP <- getCMstats( CM_list_UP ) #generates list of p-values and OR from association test head( pval_mat_UP ) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ chip_index <- get_chip_index( TFfilter = c( "HIF1A","EPAS1","ARNT" ) ) #restrict the analysis to datasets assaying these factors chip_index <- get_chip_index( encodeFilter = TRUE ) # Or select ENCODE datasets only CM_list_UPe <- contingency_matrix( Genes.Upreg, Genes.Control, chip_index ) #generates list of contingency tables pval_mat_UPe <- getCMstats( CM_list_UPe, chip_index ) #generates list of p-values and ORs head( pval_mat_UPe ) ## ----eval=TRUE, echo=TRUE, fig.width=8, fig.height=4-------------------------- TF_ranking <- rankTFs( pval_mat_UP, rankMethod = "gsea", makePlot = TRUE ) TF_ranking[[ "TFranking_plot" ]] head( TF_ranking[[ "TF_ranking" ]] ) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # plot_CM( pval_mat_UP ) #plot p-values against ORs ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # HIFs <- c( "EPAS1","HIF1A","ARNT" ) # names(HIFs) <- c( "EPAS1","HIF1A","ARNT" ) # col <- c( "red","blue","green" ) # plot_CM( pval_mat_UP, specialTF = HIFs, TF_colors = col ) #plot p-values against ORs highlighting indicated TFs ## ----eval=TRUE,echo=TRUE------------------------------------------------------ chip_index <- get_chip_index( TFfilter = c( "HIF1A","EPAS1","ARNT" ) ) #restrict the analysis to datasets assaying these factors ## ----eval=TRUE,echo=TRUE,results='hide'--------------------------------------- GSEA.result <- GSEA_run( hypoxia_table$Genes, hypoxia_table$log2FoldChange, chip_index, get.RES = TRUE) #run GSEA analysis ## ----eval=TRUE,echo=TRUE------------------------------------------------------ head(GSEA.result[["Enrichment.table"]]) head(GSEA.result[["RES"]][["GSM2390642"]]) head(GSEA.result[["indicators"]][["GSM2390642"]]) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # TF.hightlight <- c( "EPAS1","ARNT","HIF1A" ) # names( TF.hightlight ) <- c( "EPAS1","ARNT","HIF1A" ) # col <- c( "red","blue","green" ) # plot_ES( GSEA.result, LFC = hypoxia_table$log2FoldChange, specialTF = TF.hightlight, TF_colors = col) ## ----eval=FALSE, echo=TRUE---------------------------------------------------- # plot_RES( # GSEA_result = GSEA.result, LFC = hypoxia_table$log2FoldChange, # TF = c( "ARNT", "EPAS1" ), Accession = c( # "GSE89836.ARNT.HUVEC-C", # "GSE89836.EPAS1.HUVEC-C" ) ) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # folder <- "~/peak.files.folder" # File.list<-dir( folder ) # format <- "macs" # # gr.list <- lapply( # seq_along( File.list ), # function( File.list, myMetaData, format ){ # # tmp<-read.table( File.list[i], ..., stringsAsFactors = FALSE ) # # file.metadata <- myMetaData[ myMetaData$Name == File.list[i], ] # # ChIP.dataset.gr<-txt2GR(tmp, format, file.metadata) # # return(ChIP.dataset.gr) # }, # File.list = File.list, # myMetadata = myMetadata, # format = format # ) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ # As an example of the output data( "ARNT.peaks.bed","ARNT.metadata", package = "TFEA.ChIP" ) # Loading example datasets for this function ARNT.gr <- txt2GR( ARNT.peaks.bed, "macs1.4", ARNT.metadata ) head( ARNT.gr, n=2 ) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # dnaseClusters<-read.table( # file="~/path.to.file.txt", # header = TRUE, sep="\t", stringsAsFactors = FALSE ) # dnaseClusters<-makeGRangesFromDataFrame( # dnaseClusters, ignore.strand=TRUE, # seqnames.field="chrom", start.field="chromStart", # end.field="chromEnd" ) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # library( TxDb.Hsapiens.UCSC.hg19.knownGene, quietly = TRUE ) # txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene # Genes <- genes( txdb ) # # near.gene <- findOverlaps( dnaseClusters, Genes, maxgap = 1000 ) # # dnase.sites.list <- queryHits( near.gene ) # near.gene <- subjectHits( near.gene ) # # gene_ids <- Genes[ near.gene ]$gene_id # DHS.database <- dnaseClusters[ dnase.sites.list ] # mcols(DHS.database)$gene_id <- gene_ids # ## ----eval=TRUE,echo=TRUE------------------------------------------------------ data( "DnaseHS_db", "gr.list", package="TFEA.ChIP" ) # Loading example datasets for this function TF.gene.binding.db <- makeChIPGeneDB( DnaseHS_db, gr.list ) str( TF.gene.binding.db ) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ library(TxDb.Hsapiens.UCSC.hg19.knownGene) data( "gr.list", package="TFEA.ChIP") # Loading example datasets for this function txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene Genes <- genes( txdb ) TF.gene.binding.db <- makeChIPGeneDB( Genes, gr.list, distanceMargin = 0 ) str( TF.gene.binding.db ) ## ----eval=FALSE,echo=TRUE----------------------------------------------------- # set_user_data( binary_matrix = myTFBSmatrix, metadata = myMetaData )