TileDBArray 1.19.0
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.56841173 0.90373485 0.31319912 . -0.47205602 -0.24551230
## [2,] 0.46405949 -0.55625563 -0.04175475 . -0.38601155 -1.60226256
## [3,] -1.43934781 1.66307384 0.13149947 . -1.94643348 0.01794626
## [4,] 0.09782233 1.21397211 -0.07058132 . -0.18789537 -0.33410157
## [5,] -1.10403847 -0.73324146 -0.60207193 . 3.11485049 0.89101365
## ... . . . . . .
## [96,] 0.1240103 1.3770297 0.3872702 . 0.5424488 1.1703854
## [97,] -2.5174387 1.4571269 1.2350138 . -0.2422422 -1.4544582
## [98,] -0.8651828 0.1612703 0.7084816 . 0.1014356 1.1942882
## [99,] 0.1945557 -1.4136624 -1.6494652 . -1.2700046 -0.4159237
## [100,] 0.4193387 -0.0517595 0.1554385 . -1.6995357 -1.4129708
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.56841173 0.90373485 0.31319912 . -0.47205602 -0.24551230
## [2,] 0.46405949 -0.55625563 -0.04175475 . -0.38601155 -1.60226256
## [3,] -1.43934781 1.66307384 0.13149947 . -1.94643348 0.01794626
## [4,] 0.09782233 1.21397211 -0.07058132 . -0.18789537 -0.33410157
## [5,] -1.10403847 -0.73324146 -0.60207193 . 3.11485049 0.89101365
## ... . . . . . .
## [96,] 0.1240103 1.3770297 0.3872702 . 0.5424488 1.1703854
## [97,] -2.5174387 1.4571269 1.2350138 . -0.2422422 -1.4544582
## [98,] -0.8651828 0.1612703 0.7084816 . 0.1014356 1.1942882
## [99,] 0.1945557 -1.4136624 -1.6494652 . -1.2700046 -0.4159237
## [100,] 0.4193387 -0.0517595 0.1554385 . -1.6995357 -1.4129708
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 0 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 -0.56841173 0.90373485 0.31319912 . -0.47205602 -0.24551230
## GENE_2 0.46405949 -0.55625563 -0.04175475 . -0.38601155 -1.60226256
## GENE_3 -1.43934781 1.66307384 0.13149947 . -1.94643348 0.01794626
## GENE_4 0.09782233 1.21397211 -0.07058132 . -0.18789537 -0.33410157
## GENE_5 -1.10403847 -0.73324146 -0.60207193 . 3.11485049 0.89101365
## ... . . . . . .
## GENE_96 0.1240103 1.3770297 0.3872702 . 0.5424488 1.1703854
## GENE_97 -2.5174387 1.4571269 1.2350138 . -0.2422422 -1.4544582
## GENE_98 -0.8651828 0.1612703 0.7084816 . 0.1014356 1.1942882
## GENE_99 0.1945557 -1.4136624 -1.6494652 . -1.2700046 -0.4159237
## GENE_100 0.4193387 -0.0517595 0.1554385 . -1.6995357 -1.4129708
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## -0.56841173 0.46405949 -1.43934781 0.09782233 -1.10403847 0.48476871
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 -0.56841173 0.90373485 0.31319912 0.29458176 -0.03614175
## GENE_2 0.46405949 -0.55625563 -0.04175475 0.62123323 1.63674653
## GENE_3 -1.43934781 1.66307384 0.13149947 0.11442335 1.04262391
## GENE_4 0.09782233 1.21397211 -0.07058132 -0.18705316 -0.18184696
## GENE_5 -1.10403847 -0.73324146 -0.60207193 0.19791303 0.98566536
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 -1.1368235 1.8074697 0.6263982 . -0.94411204 -0.49102460
## GENE_2 0.9281190 -1.1125113 -0.0835095 . -0.77202311 -3.20452511
## GENE_3 -2.8786956 3.3261477 0.2629989 . -3.89286697 0.03589253
## GENE_4 0.1956447 2.4279442 -0.1411626 . -0.37579074 -0.66820314
## GENE_5 -2.2080769 -1.4664829 -1.2041439 . 6.22970098 1.78202730
## ... . . . . . .
## GENE_96 0.2480206 2.7540594 0.7745403 . 1.0848977 2.3407708
## GENE_97 -5.0348773 2.9142539 2.4700275 . -0.4844845 -2.9089164
## GENE_98 -1.7303656 0.3225406 1.4169632 . 0.2028713 2.3885763
## GENE_99 0.3891115 -2.8273249 -3.2989304 . -2.5400092 -0.8318474
## GENE_100 0.8386774 -0.1035190 0.3108770 . -3.3990714 -2.8259415
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6
## 1.2541024 4.5049959 18.2158227 21.4432173 6.8384325 0.8108843
## SAMP_7 SAMP_8 SAMP_9 SAMP_10
## 14.2254174 -10.8712333 -6.3361647 1.1806673
out %*% runif(ncol(out))
## [,1]
## GENE_1 0.795173320
## GENE_2 -1.139872705
## GENE_3 -0.764629404
## GENE_4 2.439377127
## GENE_5 1.423608865
## GENE_6 3.305559765
## GENE_7 1.936159308
## GENE_8 0.369164143
## GENE_9 0.133123988
## GENE_10 -2.183470921
## GENE_11 -0.481491261
## GENE_12 -3.358310722
## GENE_13 -3.261378304
## GENE_14 0.127588296
## GENE_15 -0.835336172
## GENE_16 1.159865374
## GENE_17 0.648338130
## GENE_18 0.185195075
## GENE_19 3.859939491
## GENE_20 -2.404543107
## GENE_21 -2.835983178
## GENE_22 -0.118705958
## GENE_23 -0.903961511
## GENE_24 -2.043919823
## GENE_25 -0.456454046
## GENE_26 0.779336252
## GENE_27 1.984206296
## GENE_28 1.494395338
## GENE_29 -0.861409935
## GENE_30 2.078688771
## GENE_31 -2.744288889
## GENE_32 -3.186738307
## GENE_33 -0.001454226
## GENE_34 -4.199318573
## GENE_35 -1.145475055
## GENE_36 -2.187538933
## GENE_37 -0.920087265
## GENE_38 0.301943679
## GENE_39 0.657402233
## GENE_40 -0.600595741
## GENE_41 -0.456857815
## GENE_42 -4.408910098
## GENE_43 0.325223279
## GENE_44 3.073226524
## GENE_45 -1.919786091
## GENE_46 0.549985333
## GENE_47 -1.211320850
## GENE_48 0.434475179
## GENE_49 0.260224861
## GENE_50 2.473018711
## GENE_51 3.889433764
## GENE_52 3.664407442
## GENE_53 0.674127040
## GENE_54 -0.560552972
## GENE_55 0.812107744
## GENE_56 3.955231076
## GENE_57 3.509741940
## GENE_58 2.310791372
## GENE_59 2.762854469
## GENE_60 0.830308272
## GENE_61 0.374334572
## GENE_62 -0.550251726
## GENE_63 4.973842565
## GENE_64 -5.940989910
## GENE_65 4.337941634
## GENE_66 -1.835503115
## GENE_67 3.438190785
## GENE_68 -2.089494965
## GENE_69 0.069190826
## GENE_70 2.766786193
## GENE_71 2.217819434
## GENE_72 -2.212950994
## GENE_73 0.921728956
## GENE_74 0.900311176
## GENE_75 0.611686669
## GENE_76 1.889844415
## GENE_77 1.110682747
## GENE_78 0.329808569
## GENE_79 0.756991075
## GENE_80 1.195769259
## GENE_81 0.525332088
## GENE_82 -5.055624610
## GENE_83 2.130716475
## GENE_84 1.016185178
## GENE_85 -0.130689167
## GENE_86 -2.094283536
## GENE_87 -1.081598957
## GENE_88 0.504961019
## GENE_89 5.819333189
## GENE_90 3.862933359
## GENE_91 2.387216205
## GENE_92 2.103485229
## GENE_93 -0.247007311
## GENE_94 -1.190477755
## GENE_95 -2.001196212
## GENE_96 0.856300017
## GENE_97 0.640660625
## GENE_98 -0.932978252
## GENE_99 -5.036713404
## GENE_100 0.020277710
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -1.2197027 -0.7533243 0.8553519 . -1.43772407 0.83734626
## [2,] -0.2147053 0.5387369 1.9215906 . -0.07366986 0.96545748
## [3,] 0.8839770 -1.7060841 -1.5438596 . -0.28711177 -0.70287555
## [4,] 2.0865829 -0.7264361 0.5727891 . -0.21609107 -1.96786666
## [5,] 0.3209921 0.5021140 1.5028450 . -0.96658994 -0.47899230
## ... . . . . . .
## [96,] -1.01493684 -0.18831152 -1.26851968 . 0.24696945 1.61326848
## [97,] 0.01289775 0.44131708 -1.78408692 . 0.26143287 1.03880056
## [98,] -0.48359996 1.32633811 -0.69049872 . 0.74092928 0.40491056
## [99,] -1.55834863 0.70950555 -1.66998294 . 0.02809277 0.19390347
## [100,] -0.37910768 -0.11545088 -0.11258432 . 1.22803023 -1.00553565
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -1.2197027 -0.7533243 0.8553519 . -1.43772407 0.83734626
## [2,] -0.2147053 0.5387369 1.9215906 . -0.07366986 0.96545748
## [3,] 0.8839770 -1.7060841 -1.5438596 . -0.28711177 -0.70287555
## [4,] 2.0865829 -0.7264361 0.5727891 . -0.21609107 -1.96786666
## [5,] 0.3209921 0.5021140 1.5028450 . -0.96658994 -0.47899230
## ... . . . . . .
## [96,] -1.01493684 -0.18831152 -1.26851968 . 0.24696945 1.61326848
## [97,] 0.01289775 0.44131708 -1.78408692 . 0.26143287 1.03880056
## [98,] -0.48359996 1.32633811 -0.69049872 . 0.74092928 0.40491056
## [99,] -1.55834863 0.70950555 -1.66998294 . 0.02809277 0.19390347
## [100,] -0.37910768 -0.11545088 -0.11258432 . 1.22803023 -1.00553565
sessionInfo()
## R version 4.5.0 Patched (2025-04-21 r88169)
## Platform: x86_64-apple-darwin20
## Running under: macOS Monterey 12.7.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.21 TileDBArray_1.19.0 DelayedArray_0.35.1
## [4] SparseArray_1.9.0 S4Arrays_1.9.0 IRanges_2.43.0
## [7] abind_1.4-8 S4Vectors_0.47.0 MatrixGenerics_1.21.0
## [10] matrixStats_1.5.0 BiocGenerics_0.55.0 generics_0.1.3
## [13] Matrix_1.7-3 BiocStyle_2.37.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.6.0 jsonlite_2.0.0 compiler_4.5.0
## [4] BiocManager_1.30.25 crayon_1.5.3 Rcpp_1.0.14
## [7] nanoarrow_0.6.0-1 jquerylib_0.1.4 yaml_2.3.10
## [10] fastmap_1.2.0 lattice_0.22-7 R6_2.6.1
## [13] RcppCCTZ_0.2.13 XVector_0.49.0 tiledb_0.30.2
## [16] knitr_1.50 bookdown_0.43 bslib_0.9.0
## [19] rlang_1.1.6 cachem_1.1.0 xfun_0.52
## [22] sass_0.4.10 bit64_4.6.0-1 cli_3.6.5
## [25] spdl_0.0.5 digest_0.6.37 grid_4.5.0
## [28] lifecycle_1.0.4 data.table_1.17.0 evaluate_1.0.3
## [31] nanotime_0.3.12 zoo_1.8-14 rmarkdown_2.29
## [34] tools_4.5.0 htmltools_0.5.8.1