## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse=TRUE, comment="#>", warning=FALSE, message=FALSE, error=FALSE, crop = NULL ) ## ----library------------------------------------------------------------------ library(BiocStyle) library(HPAanalyze) library(tibble) library(dplyr) library(ggplot2) ## ----echo=FALSE, fig.cap="HPAanalyze workflow.", out.width = '100%'----------- knitr::include_graphics("figures/workflow.png") ## ----downloadedData, eval=FALSE----------------------------------------------- # # This should give you the latest of everything, but unless you have a lot of RAM and processing power, I would not recomend it. # downloadedData <- hpaDownload(downloadList='all') # summary(downloadedData) # # #> Length Class Mode # #> normal_tissue 6 tbl_df list # #> pathology 11 tbl_df list # #> subcellular_location 14 tbl_df list # #> ... ## ----histology---------------------------------------------------------------- downloadedData <- hpaDownload(downloadList='histology', version='example') # version = "example" will load the built-in dataset. That's sufficient for normal usage, and save you some time. ## ----normal_tissue------------------------------------------------------------ tibble::glimpse(downloadedData$normal_tissue, give.attr=FALSE) ## ----pathology---------------------------------------------------------------- tibble::glimpse(downloadedData$pathology, give.attr=FALSE) ## ----subcellular_location----------------------------------------------------- tibble::glimpse(downloadedData$subcellular_location, give.attr=FALSE) ## ----rna-help, eval = FALSE--------------------------------------------------- # ?hpaDownload ## ----rna1, warning=FALSE, message=FALSE, eval = FALSE------------------------- # downloadedData <- hpaDownload(downloadList='rna tissue') # # tibble::glimpse(downloadedData, give.attr=FALSE) # # #> List of 4 # #> $ rna_tissue_consensus: tibble [1,180,464 x 4] (S3: tbl_df/tbl/data.frame) # #> ..$ ensembl: chr [1:1180464] "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" ... # #> ..$ gene : chr [1:1180464] "TSPAN6" "TSPAN6" "TSPAN6" "TSPAN6" ... # #> ..$ tissue : chr [1:1180464] "adipose tissue" "adrenal gland" "amygdala" "appendix" ... # #> ..$ nx : chr [1:1180464] "27.0" "9.8" "7.0" "4.4" ... # #> $ rna_tissue_hpa : tibble [845,810 x 6] (S3: tbl_df/tbl/data.frame) # #> ..$ ensembl: chr [1:845810] "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" ... # #> ..$ gene : chr [1:845810] "TSPAN6" "TSPAN6" "TSPAN6" "TSPAN6" ... # #> ..$ tissue : chr [1:845810] "adipose tissue" "adrenal gland" "appendix" "B-cells" ... # #> ..$ tpm : chr [1:845810] "31.5" "26.4" "9.2" "0.1" ... # #> ..$ ptpm : chr [1:845810] "37.7" "32.7" "14.5" "0.2" ... # #> ..$ nx : chr [1:845810] "9.8" "7.6" "2.1" "0.3" ... # #> $ rna_tissue_gtex : tibble [639,744 x 6] (S3: tbl_df/tbl/data.frame) # #> ..$ ensembl: chr [1:639744] "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" ... # #> ..$ gene : chr [1:639744] "TSPAN6" "TSPAN6" "TSPAN6" "TSPAN6" ... # #> ..$ tissue : chr [1:639744] "adipose tissue" "adrenal gland" "amygdala" "basal ganglia" ... # #> ..$ tpm : chr [1:639744] "27.4" "15.5" "7.3" "7.7" ... # #> ..$ ptpm : chr [1:639744] "34.2" "18.8" "9.0" "9.4" ... # #> ..$ nx : chr [1:639744] "13.4" "9.8" "7.0" "6.6" ... # #> $ rna_tissue_fantom : tibble [797,265 x 6] (S3: tbl_df/tbl/data.frame) # #> ..$ ensembl : chr [1:797265] "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" "ENSG00000000003" ... # #> ..$ gene : chr [1:797265] "TSPAN6" "TSPAN6" "TSPAN6" "TSPAN6" ... # #> ..$ tissue : chr [1:797265] "adipose tissue" "amygdala" "appendix" "basal ganglia" ... # #> ..$ tags_per_million : chr [1:797265] "60.1" "10.7" "13.5" "16.2" ... # #> ..$ scaled_tags_per_million: chr [1:797265] "87.4" "12.4" "17.8" "18.0" ... # #> ..$ nx : chr [1:797265] "27.0" "3.5" "4.4" "5.5" ... ## ----list_param, eval=FALSE--------------------------------------------------- # ## If you use the output from hpaDownload() # downloadedData <- hpaDownload(downloadList=c("Normal tissue", "Pathology", "RNA HPA tissue", "RNA HPA cell line")) # str(hpaListParam(downloadedData)) # # # List of 4 # # $ normal_tissue :List of 2 # # ..$ tissue : chr [1:63] "adipose tissue" "adrenal gland" "appendix" "bone marrow" ... # # ..$ cell_type: chr [1:120] "adipocytes" "glandular cells" "lymphoid tissue" "hematopoietic cells" ... # # $ pathology :List of 1 # # ..$ cancer: chr [1:20] "breast cancer" "carcinoid" "cervical cancer" "colorectal cancer" ... # # $ rna_tissue_hpa:List of 1 # # ..$ tissue: chr [1:43] "adipose tissue" "adrenal gland" "appendix" "B-cells" ... # # $ rna_celline :List of 1 # # ..$ cell_line: chr [1:69] "A-431" "A549" "AF22" "AN3-CA" ... ## ----subset1, message=FALSE, warning=FALSE------------------------------------ downloadedData <- hpaDownload(downloadList='histology', version='example') sapply(downloadedData, nrow) ## ----subset2, message=FALSE, warning=FALSE------------------------------------ geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS') tissueList <- c('breast', 'cerebellum', 'skin 1') cancerList <- c('breast cancer', 'glioma', 'melanoma') cellLineList <- c('A-431', 'A549', 'AF22', 'AN3-CA') subsetData <- hpaSubset(data=downloadedData, targetGene=geneList, targetTissue=tissueList, targetCancer=cancerList, targetCellLine=cellLineList) sapply(subsetData, nrow) ## ----eval=FALSE--------------------------------------------------------------- # hpaExport(subsetData, fileName='subset.xlsx', fileType='xlsx') ## ----visData, echo=FALSE, warning=FALSE, message=FALSE------------------------ downloadedData <- hpaDownload('histology', 'example') ## ----hpaVis_eg---------------------------------------------------------------- hpaVis(downloadedData, targetGene = c("GCH1", "PTS", "SPR", "DHFR"), targetTissue = c("cerebellum", "cerebral cortex", "hippocampus"), targetCancer = c("glioma")) ## ----visTissue---------------------------------------------------------------- geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS') tissueList <- c('breast', 'cerebellum', 'skin 1') hpaVisTissue(downloadedData, targetGene=geneList, targetTissue=tissueList) ## ----visPatho----------------------------------------------------------------- geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS') cancerList <- c('breast cancer', 'glioma', 'lymphoma', 'prostate cancer') colorGray <- c('slategray1', 'slategray2', 'slategray3', 'slategray4') hpaVisPatho(downloadedData, targetGene=geneList, targetCancer=cancerList, color=colorGray) ## ----visSubcell--------------------------------------------------------------- geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS') hpaVisSubcell(downloadedData, targetGene=geneList, customTheme=TRUE) + ggplot2::theme_minimal() + ggplot2::ylab('Subcellular locations') + ggplot2::xlab('Protein') + ggplot2::theme(axis.text.x=element_text(angle=45, hjust=1)) + ggplot2::theme(legend.position="none") + ggplot2::coord_equal() ## ----eval=FALSE--------------------------------------------------------------- # EGFR <- hpaXml(inputXml='ENSG00000146648') # names(EGFR) # # #> [1] "ProtClass" "TissueExprSum" "Antibody" "TissueExpr" ## ----XmlGet, eval=FALSE------------------------------------------------------- # EGFRxml <- hpaXmlGet('ENSG00000146648') ## ----XmlProtClass, eval=FALSE------------------------------------------------- # hpaXmlProtClass(EGFRxml) # # #> # A tibble: 40 x 4 # #> id name parent_id source # #> # #> 1 Ez Enzymes # #> 2 Ec ENZYME proteins Ez ENZYME # #> 3 Et Transferases Ec ENZYME # #> 4 Ki Kinases Ez UniProt # #> 5 Kt Tyr protein kinases Ki UniProt # #> 6 Ma Predicted membrane proteins MDM # #> 7 Md Membrane proteins predicted by MDM MDM # #> 8 Me MEMSAT3 predicted membrane proteins MEMSAT3 # #> 9 Mf MEMSAT-SVM predicted membrane proteins MEMSAT-SVM # #> 10 Mg Phobius predicted membrane proteins Phobius # #> # ... with 30 more rows ## ----XmlTissueExprSum, eval=FALSE--------------------------------------------- # hpaXmlTissueExprSum(EGFRxml) # # #> $summary # #> [1] "Cytoplasmic and membranous expression in several tissues, most abundant in placenta." # #> # #> $img # #> tissue # #> 1 cerebral cortex # #> 2 lymph node # #> 3 liver # #> 4 colon # #> 5 kidney # #> 6 testis # #> 7 placenta # #> imageUrl # #> 1 http://v18.proteinatlas.org/images/18530/41191_B_7_5_rna_selected.jpg # #> 2 http://v18.proteinatlas.org/images/18530/41191_A_7_8_rna_selected.jpg # #> 3 http://v18.proteinatlas.org/images/18530/41191_A_7_4_rna_selected.jpg # #> 4 http://v18.proteinatlas.org/images/18530/41191_A_9_3_rna_selected.jpg # #> 5 http://v18.proteinatlas.org/images/18530/41191_A_9_5_rna_selected.jpg # #> 6 http://v18.proteinatlas.org/images/18530/41191_A_6_6_rna_selected.jpg # #> 7 http://v18.proteinatlas.org/images/18530/41191_A_1_7_rna_selected.jpg ## ----XmlAntibody, eval=FALSE-------------------------------------------------- # hpaXmlAntibody(EGFRxml) # # #> # A tibble: 5 x 4 # #> id releaseDate releaseVersion RRID # #> # #> 1 CAB000035 2006-03-13 1.2 # #> 2 HPA001200 2008-02-15 3.1 AB_1078723 # #> 3 HPA018530 2008-12-03 4.1 AB_1848044 # #> 4 CAB068186 2014-11-06 13 AB_2665679 # #> 5 CAB073534 2015-10-16 14 ## ----XmlTissueExpr1, eval = FALSE--------------------------------------------- # tissueExpression <- hpaXmlTissueExpr(EGFRxml) # summary(tissueExpression) # # #> Length Class Mode # #> [1,] 18 tbl_df list # #> [2,] 18 tbl_df list # #> [3,] 18 tbl_df list # #> [4,] 18 tbl_df list # #> [5,] 18 tbl_df list ## ----XmlTissueExpr2, eval = FALSE--------------------------------------------- # tissueExpression[[1]] # # #> # A tibble: 327 x 18 # #> patientId age sex staining intensity quantity location imageUrl # #> # #> 1 1653 53 Male http://~ # #> 2 1721 60 Fema~ http://~ # #> 3 1725 57 Male http://~ # #> 4 4 25 Male http://~ # #> 5 512 34 Fema~ http://~ # #> 6 2664 74 Fema~ http://~ # #> 7 2665 88 Fema~ http://~ # #> 8 1391 54 Fema~ http://~ # #> 9 1447 45 Fema~ http://~ # #> 10 1452 44 Fema~ http://~ # #> # ... with 317 more rows, and 10 more variables: snomedCode1 , # #> # snomedCode2 , snomedCode3 , snomedCode4 , # #> # snomedCode5 , tissueDescription1 , tissueDescription2 , # #> # tissueDescription3 , tissueDescription4 , # #> # tissueDescription5