Skip to content

Commit

Permalink
Added simplified saving methods for DelayedArrays.
Browse files Browse the repository at this point in the history
Currently these just divert to the dense/sparse savers; support for
preservation of delayed operations via chihaya will be added later.

Also unlocked the sparse tests so that they check the block processing.
This also involved some bugfixes.
  • Loading branch information
LTLA committed Nov 28, 2023
1 parent d9e324c commit 013b5b1
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 13 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ importFrom(DelayedArray,currentViewport)
importFrom(DelayedArray,extract_array)
importFrom(DelayedArray,getAutoBlockLength)
importFrom(DelayedArray,getAutoBlockSize)
importFrom(DelayedArray,isPristine)
importFrom(DelayedArray,is_sparse)
importFrom(DelayedArray,matrixClass)
importFrom(DelayedArray,new_DelayedArray)
Expand All @@ -65,6 +66,7 @@ importFrom(DelayedArray,nzindex)
importFrom(DelayedArray,path)
importFrom(DelayedArray,read_sparse_block)
importFrom(DelayedArray,rowAutoGrid)
importFrom(DelayedArray,seed)
importFrom(DelayedArray,type)
importFrom(HDF5Array,H5SparseMatrixSeed)
importFrom(HDF5Array,HDF5Array)
Expand Down
49 changes: 49 additions & 0 deletions R/saveDelayedArray.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' Save DelayedArrays to disk
#'
#' Save \link{DelayedArray} objects to their on-disk representation.
#'
#' @param x A \linkS4class{DelayedArray} object.
#' @param path String containing a path to a directory in which to save \code{x}.
#' @param delayedarray.preserve.ops Logical scalar indicating whether delayed operations should be preserved on-disk.
#' @param ... Further arguments, ignored.
#'
#' @return
#' \code{x} is saved to \code{path} and \code{NULL} is invisibly returned.
#'
#' @author Aaron Lun
#' @examples
#' mat <- Matrix::rsparsematrix(100, 200, density=0.2)
#' rownames(mat) <- paste0("GENE_", seq_len(nrow(mat)))
#' dmat <- DelayedArray::DelayedArray(mat) * 1
#'
#' dir <- tempfile()
#' saveObject(dmat, dir)
#' list.files(dir)
#'
#' @name saveDelayedArray
NULL

#' @export
#' @rdname saveDelayedArray
#' @importFrom DelayedArray isPristine seed
setMethod("saveObject", "DelayedArray", function(x, path, delayedarray.preserve.ops=FALSE, ...) {
if (isPristine(x)) {
s <- seed(x)
fun <- selectMethod("saveObject", class(s), optional=TRUE)
if (!is.null(fun)) {
return(fun(s, path, ...))
}
}

if (!delayedarray.preserve.ops) {
if (is_sparse(x)) {
.save_compressed_sparse_matrix(x, path, ...)
} else {
.save_array(x, path, ...)
}
} else {
stop("preservation of delayed operations is not currently supported")
}

invisible(NULL)
})
8 changes: 6 additions & 2 deletions R/saveSparseMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ setMethod("h5_write_sparse_matrix", "ANY", function(x, handle, details, ...) {
on.exit(H5Dclose(dhandle), add=TRUE, after=FALSE)

ihandle <- h5_create_vector(handle, "indices", N, type=.choose_itype(ilimit), chunks=chunks)
on.exit(H5Dclose(dhandle), add=TRUE, after=FALSE)
on.exit(H5Dclose(ihandle), add=TRUE, after=FALSE)

shandle <- H5Screate_simple(N)
on.exit(H5Sclose(shandle), add=TRUE, after=FALSE)
Expand All @@ -218,9 +218,13 @@ setMethod("h5_write_sparse_matrix", "ANY", function(x, handle, details, ...) {
secondary <- secondary[o]
v <- v[o]

if (!is.null(details$placeholder)) {
v[is.missing(v)] <- details$placeholder
}

H5Sselect_hyperslab(shandle, "H5S_SELECT_SET", start=start, count=length(v))
H5Dwrite(dhandle, v, h5spaceFile=shandle)
H5Dwrite(ihandle, secondary, h5spaceFile=shandle)
H5Dwrite(ihandle, secondary - 1L, h5spaceFile=shandle)
pointers <- c(pointers, list(tabulate(primary, ndim)))
start <- start + length(v)
}
Expand Down
37 changes: 37 additions & 0 deletions man/saveDelayedArray.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
expect_identical_without_names <- function(x, y) {
if (!is.null(dimnames(x))) {
if (identical(dimnames(x), vector("list", length(dim(x))))) {
dimnames(x) <- NULL
}
}
if (!is.null(dimnames(y))) {
if (identical(dimnames(y), vector("list", length(dim(x))))) {
dimnames(y) <- NULL
}
}
expect_identical(x, y)
}
39 changes: 28 additions & 11 deletions tests/testthat/test-SparseMatrix.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# This tests the sparse readers/writers.
# library(testthat); library(alabaster.matrix); source("test-SparseMatrix.R")
# library(testthat); library(alabaster.matrix); source("setup.R"); source("test-SparseMatrix.R")

library(Matrix)
library(SparseArray)
library(DelayedArray)

test_that("writing to a sparse matrix works as expected for numeric data", {
for (i in 1:3) {
for (i in 1:4) {
for (miss in c(TRUE, FALSE)) {
x <- rsparsematrix(100, 20, 0.5)
if (miss) {
Expand All @@ -23,13 +24,13 @@ test_that("writing to a sparse matrix works as expected for numeric data", {
tmp <- tempfile(fileext=".h5")
saveObject(x, tmp)
roundtrip <- readObject(tmp)
expect_identical(as.matrix(roundtrip), as.matrix(x))
expect_identical_without_names(as.matrix(roundtrip), as.matrix(x))
}
}
})

test_that("writing to a sparse matrix works as expected for logical data", {
for (i in 1:3) {
for (i in 1:4) {
for (miss in c(TRUE, FALSE)) {
x <- rsparsematrix(100, 20, 0.5) > 0
if (miss) {
Expand All @@ -47,13 +48,13 @@ test_that("writing to a sparse matrix works as expected for logical data", {
tmp <- tempfile(fileext=".h5")
saveObject(x, tmp)
roundtrip <- readObject(tmp)
expect_identical(as.matrix(roundtrip), as.matrix(x))
expect_identical_without_names(as.matrix(roundtrip), as.matrix(x))
}
}
})

test_that("writing to a sparse matrix works as expected for integer data", {
for (i in 1:1) {
for (i in 1:2) {
for (miss in c(TRUE, FALSE)) {
x <- round(rsparsematrix(100, 20, 0.5) * 10)
if (miss) {
Expand All @@ -62,15 +63,15 @@ test_that("writing to a sparse matrix works as expected for integer data", {

if (i == 1) {
x <- as(x, "SVT_SparseMatrix")
type(x) <- "integer"
} else if (i == 2) {
x <- DelayedArray(x) * 1L # force use of the block method.
}
type(x) <- "integer"

tmp <- tempfile(fileext=".h5")
saveObject(x, tmp)
roundtrip <- readObject(tmp)
expect_identical(as.matrix(roundtrip), as.matrix(x))
expect_identical_without_names(as.matrix(roundtrip), as.matrix(x))
}
}
})
Expand All @@ -86,7 +87,7 @@ test_that("depositing a large sparseMatrix vector works correctly", {
roundtrip <- readObject(tmp)
expect_identical(as(roundtrip, 'dgCMatrix'), x)

# Now injecting an NA.
# Now injecting an NA, which should force it to use chunk-wise processing.
x@x[1] <- NA

tmp <- tempfile(fileext=".h5")
Expand All @@ -95,8 +96,24 @@ test_that("depositing a large sparseMatrix vector works correctly", {
# expect_identical(as(roundtrip, 'dgCMatrix'), x) # TODO: bug in HDF5Array
})

test_that("depositing small chunks works correctly", {
x <- rsparsematrix(1000, 500, 0.2)
y <- DelayedArray(x) * 1 # force block processing.

tmp <- tempfile(fileext=".h5")
local({
old <- getAutoBlockSize()
setAutoBlockSize(20000)
on.exit(setAutoBlockSize(old))
saveObject(x, tmp)
})

roundtrip <- readObject(tmp)
expect_identical(as(roundtrip, 'dgCMatrix'), x)
})

test_that("fallback to large integer types for indices works correctly", {
for (i in 1:3) {
for (i in 1:4) {
x <- rsparsematrix(100000, 20, 0.001)
x[100000,20] <- 99 # making sure there's a value at the bottom-right so that we check the index correctly.

Expand All @@ -112,7 +129,7 @@ test_that("fallback to large integer types for indices works correctly", {
tmp <- tempfile(fileext=".h5")
saveObject(x, tmp)
roundtrip <- readObject(tmp)
expect_identical(as.matrix(roundtrip), as.matrix(x))
expect_identical_without_names(as.matrix(roundtrip), as.matrix(x))
}
})

Expand Down

0 comments on commit 013b5b1

Please sign in to comment.