From f1f5134b51c8d029951fd053a53619d940f2fc04 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:02:48 +0100 Subject: [PATCH 01/16] migrate obsm varm tests, skip if known issue --- NAMESPACE | 1 + R/anndataR-package.R | 2 +- R/known_issues.R | 65 +++++++ inst/known_issues.yaml | 20 +++ man/anndataR-package.Rd | 3 +- tests/testthat/helper-expect_py_df_equal.R | 12 ++ .../testthat/helper-expect_py_matrix_equal.R | 28 +++ tests/testthat/test-InMemoryAnnData.R | 2 +- tests/testthat/test-roundtrip-obspvarp.R | 159 ++++++++++-------- tests/testthat/test-roundtrip-obsvar.R | 82 +++++---- 10 files changed, 265 insertions(+), 109 deletions(-) create mode 100644 R/known_issues.R create mode 100644 inst/known_issues.yaml create mode 100644 tests/testthat/helper-expect_py_df_equal.R create mode 100644 tests/testthat/helper-expect_py_matrix_equal.R diff --git a/NAMESPACE b/NAMESPACE index 72fce24f..6ac4485a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,5 +16,6 @@ importFrom(cli,cli_inform) importFrom(cli,cli_warn) importFrom(methods,as) importFrom(methods,new) +importFrom(purrr,map_dfr) importFrom(purrr,map_lgl) importFrom(rlang,caller_env) diff --git a/R/anndataR-package.R b/R/anndataR-package.R index 04762c83..44ae4943 100644 --- a/R/anndataR-package.R +++ b/R/anndataR-package.R @@ -61,7 +61,7 @@ ## usethis namespace: start #' @importFrom cli cli_abort cli_warn cli_inform -#' @importFrom purrr map_lgl +#' @importFrom purrr map_lgl map_dfr #' @importFrom methods as new ## usethis namespace: end NULL diff --git a/R/known_issues.R b/R/known_issues.R new file mode 100644 index 00000000..ad0a34a9 --- /dev/null +++ b/R/known_issues.R @@ -0,0 +1,65 @@ +# This file contains the known issues that are currently present in the package. +# It can be used to generate documentation, but also throw warnings instead of errors +# in tests. +#' @importFrom assertthat are_equal +read_known_issues <- function() { + # file is at inst/known_issues.yaml + data <- yaml::read_yaml(system.file("known_issues.yaml", package = "anndataR")) + + map_dfr( + data$known_issues, + function(row) { + assertthat::are_equal(row, c( + "backend", "slot", "dtype", "process", "error_message", + "description", "proposed_solution", "to_investigate", + "to_fix" + )) + + expand.grid(row) + } + ) +} + +is_known <- function(backend, slot, dtype, process, known_issues = NULL) { + if (is.null(known_issues)) { + known_issues <- read_known_issues() + } + + filt <- rep(TRUE, nrow(known_issues)) + + if (!is.null(backend)) { + filt <- filt & known_issues$backend %in% backend + } + if (!is.null(slot)) { + filt <- filt & known_issues$slot %in% slot + } + if (!is.null(dtype)) { + filt <- filt & known_issues$dtype %in% dtype + } + if (!is.null(process)) { + filt <- filt & known_issues$process %in% process + } + + filt +} + +message_if_known <- function(backend, slot, dtype, process, known_issues = NULL) { + if (is.null(known_issues)) { + known_issues <- read_known_issues() + } + + filt <- is_known(backend, slot, dtype, process, known_issues) + + if (any(filt)) { + # take first + row <- known_issues[which(filt)[[1]], ] + + paste0( + "Known issue for backend '", row$backend, "', slot '", row$slot, + "', dtype '", row$dtype, "', process '", row$process, "': ", + row$description + ) + } else { + NULL + } +} diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml new file mode 100644 index 00000000..f98f1d65 --- /dev/null +++ b/inst/known_issues.yaml @@ -0,0 +1,20 @@ +known_issues: + - backend: HDF5AnnData + slot: + - obsp + - varp + dtype: + - integer_csparse + - integer_matrix + - integer_rsparse + process: [read] + error_message: | + Failure (test-roundtrip-obspvarp.R:111:5): Writing an AnnData with obs and var 'integer_csparse' works + a$dtype (`actual`) not equal to b$dtype (`expected`). + + `class(actual)`: "numpy.dtypes.Float64DType" "numpy.dtype" "python.builtin.object" + `class(expected)`: "numpy.dtypes.Int64DType" "numpy.dtype" "python.builtin.object" + description: Integers are being converted to floats. + proposed_solution: Debug + to_investigate: True + to_fix: True diff --git a/man/anndataR-package.Rd b/man/anndataR-package.Rd index 47093117..671099fc 100644 --- a/man/anndataR-package.Rd +++ b/man/anndataR-package.Rd @@ -92,7 +92,6 @@ Authors: \item Luke Zappia \email{luke@lazappi.id.au} (\href{https://orcid.org/0000-0001-7744-8565}{ORCID}) (lazappi) \item Martin Morgan \email{mtmorgan.bioc@gmail.com} (\href{https://orcid.org/0000-0002-5874-8148}{ORCID}) (mtmorgan) \item Louise Deconinck \email{louise.deconinck@gmail.com} (\href{https://orcid.org/0000-0001-8100-6823}{ORCID}) (LouiseDck) - \item Data Intuitive \email{info@data-intuitive.com} } Other contributors: @@ -101,6 +100,8 @@ Other contributors: \item Isaac Virshup (\href{https://orcid.org/0000-0002-1710-8945}{ORCID}) (ivirshup) [contributor] \item Brian Schilder \email{brian_schilder@alumni.brown.edu} (\href{https://orcid.org/0000-0001-5949-2191}{ORCID}) (bschilder) [contributor] \item Chananchida Sang-aram (\href{https://orcid.org/0000-0002-0922-0822}{ORCID}) (csangara) [contributor] + \item Data Intuitive \email{info@data-intuitive.com} [funder, copyright holder] + \item Chan Zuckerberg Initiative [funder] } } diff --git a/tests/testthat/helper-expect_py_df_equal.R b/tests/testthat/helper-expect_py_df_equal.R new file mode 100644 index 00000000..6e0122d3 --- /dev/null +++ b/tests/testthat/helper-expect_py_df_equal.R @@ -0,0 +1,12 @@ +expect_py_df_equal <- function(a, b) { + pd <- reticulate::import("pandas") + + expect_null( + pd$testing$assert_frame_equal( + a, + b, + check_dtype = FALSE, + check_exact = FALSE + ) + ) +} diff --git a/tests/testthat/helper-expect_py_matrix_equal.R b/tests/testthat/helper-expect_py_matrix_equal.R new file mode 100644 index 00000000..b7c08787 --- /dev/null +++ b/tests/testthat/helper-expect_py_matrix_equal.R @@ -0,0 +1,28 @@ +expect_py_matrix_equal <- function(a, b, rtol = 1e-6, atol = 1e-6) { + bi <- reticulate::import_builtins() + np <- reticulate::import("numpy") + scipy <- reticulate::import("scipy") + + expect_equal(bi$type(a), bi$type(b)) # does this always work? + + expect_equal(a$dtype, b$dtype) + + expect_equal(reticulate::py_to_r(a$shape), reticulate::py_to_r(b$shape)) + + a_dense <- + if (scipy$sparse$issparse(a)) { + a$toarray() + } else { + a + } + b_dense <- + if (scipy$sparse$issparse(b)) { + b$toarray() + } else { + b + } + + expect_null( + np$testing$assert_allclose(a_dense, b_dense, rtol = rtol, atol = atol) + ) +} diff --git a/tests/testthat/test-InMemoryAnnData.R b/tests/testthat/test-InMemoryAnnData.R index 186d5edd..23bc0e5b 100644 --- a/tests/testthat/test-InMemoryAnnData.R +++ b/tests/testthat/test-InMemoryAnnData.R @@ -40,7 +40,7 @@ test_that("with empty var", { expect_identical(ad$shape(), c(10L, 0L)) }) -test_that("with only X, no obs or var", function() { +test_that("with only X, no obs or var", { X <- dummy$X dimnames(X) <- list( rownames(dummy$obs), diff --git a/tests/testthat/test-roundtrip-obspvarp.R b/tests/testthat/test-roundtrip-obspvarp.R index 00cb7c17..5326f22e 100644 --- a/tests/testthat/test-roundtrip-obspvarp.R +++ b/tests/testthat/test-roundtrip-obspvarp.R @@ -1,106 +1,121 @@ skip_if_no_anndata() -skip_if_not_installed("hdf5r") +skip_if_not_installed("reticulate") -data <- generate_dataset(10L, 20L) +library(reticulate) +testthat::skip_if_not( + reticulate::py_module_available("dummy_anndata"), + message = "Python dummy_anndata module not available for testing" +) -test_names <- names(data$obsp) +ad <- reticulate::import("anndata", convert = FALSE) +da <- reticulate::import("dummy_anndata", convert = FALSE) +bi <- reticulate::import_builtins() -# TODO: re-enable test -test_names <- c() +known_issues <- read_known_issues() + +test_names <- names(da$matrix_generators) for (name in test_names) { - test_that(paste0("roundtrip with obsp and varp '", name, "'"), { - # create anndata - ad <- AnnData( - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE], - obsp = data$obsp[name], - varp = data$varp[name] - ) + # first generate a python h5ad + adata_py <- da$generate_dataset( + x_type = NULL, + obs_types = list(), + var_types = list(), + layer_types = list(), + obsm_types = list(), + varm_types = list(), + obsp_types = list(name), + varp_types = list(name), + uns_types = list(), + nested_uns_types = list() + ) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - write_h5ad(ad, filename) + # create a couple of paths + file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) + file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) - # read from file - ad_new <- read_h5ad(filename, to = "HDF5AnnData") + # write to file + adata_py$write_h5ad(file_py) - # expect slots are unchanged + test_that(paste0("Reading an AnnData with obsp and varp '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = name, + process = "read", + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( - ad_new$obsp[[name]], - data$obsp[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$shape(), + unlist(reticulate::py_to_r(adata_py$shape)) ) expect_equal( - ad_new$varp[[name]], - data$varp[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$obsp_keys(), + py_to_r(adata_py$obsp$keys()) + ) + expect_equal( + adata_r$varp_keys(), + py_to_r(adata_py$varp$keys()) ) + + # check that the print output is the same + str_r <- capture.output(print(adata_r)) + str_py <- capture.output(print(adata_py)) + expect_equal(str_r, str_py) }) -} -for (name in test_names) { - test_that(paste0("reticulate->hdf5 with obsp and varp '", name, "'"), { - # create anndata - ad <- anndata::AnnData( - obs = data.frame(row.names = data$obs_names), - var = data.frame(row.names = data$var_names), - obsp = data$obsp[name], - varp = data$varp[name] + # maybe this test simply shouldn't be run if there is a known issue with reticulate + test_that(paste0("Comparing an anndata with obsp and varp '", name, "' with reticulate works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = name, + process = c("read", "reticulate"), + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - ad$write_h5ad(filename) - - # read from file - ad_new <- HDF5AnnData$new(filename) + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") - # expect slots are unchanged expect_equal( - ad_new$obsp[[name]], - data$obsp[[name]], + adata_r$obsp[[name]], + py_to_r(py_get_item(adata_py$obsp, name)), tolerance = 1e-6 ) expect_equal( - ad_new$varp[[name]], - data$varp[[name]], + adata_r$varp[[name]], + py_to_r(py_get_item(adata_py$varp, name)), tolerance = 1e-6 ) }) -} -for (name in test_names) { - test_that(paste0("hdf5->reticulate with obsp and varp '", name, "'"), { - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - - # create anndata - ad <- AnnData( - obsp = data$obsp[name], - varp = data$varp[name], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + test_that(paste0("Writing an AnnData with obsp and varp '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = name, + process = c("read", "write"), + known_issues = known_issues ) - write_h5ad(ad, filename) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") + write_h5ad(adata_r, file_r) # read from file - ad_new <- anndata::read_h5ad(filename) + adata_py2 <- ad$read_h5ad(file_r) - # expect slots are unchanged - expect_equal( - ad_new$obsp[[name]], - data$obsp[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + # expect that the objects are the same + expect_py_matrix_equal( + py_get_item(adata_py2$obsp, name), + py_get_item(adata_py$obsp, name) ) - expect_equal( - ad_new$varp[[name]], - data$varp[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + expect_py_matrix_equal( + py_get_item(adata_py2$varp, name), + py_get_item(adata_py$varp, name) ) }) } diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index 0479845f..88a68be0 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -9,8 +9,8 @@ testthat::skip_if_not( ad <- reticulate::import("anndata", convert = FALSE) da <- reticulate::import("dummy_anndata", convert = FALSE) -pd <- reticulate::import("pandas", convert = FALSE) -bi <- reticulate::import_builtins() + +known_issues <- read_known_issues() test_names <- names(da$vector_generators) @@ -25,11 +25,9 @@ for (name in test_names) { varm_types = list(), obsp_types = list(), varp_types = list(), - uns_types = list() + uns_types = list(), + nested_uns_types = list() ) - # remove uns - workaround for https://github.com/data-intuitive/dummy-anndata/issues/2 - adata_py$uns <- bi$dict() - # TODO: remove X # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) @@ -39,6 +37,15 @@ for (name in test_names) { adata_py$write_h5ad(file_py) test_that(paste0("reading an AnnData with obs and var '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = name, + process = "read", + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( adata_r$shape(), @@ -57,24 +64,43 @@ for (name in test_names) { str_r <- capture.output(print(adata_r)) str_py <- capture.output(print(adata_py)) expect_equal(str_r, str_py) + }) + + # maybe this test simply shouldn't be run if there is a known issue with reticulate + test_that(paste0("Comparing an anndata with obs and var '", name, "' with reticulate works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obs", "var"), + dtype = name, + process = c("read", "reticulate"), + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") - # if we would test the objects at this stage, - # we're also testing reticulate's conversion - # nolint start - # expect_equal( - # adata_r$obs[[name]], - # py_to_r(adata_py$obs[[name]]), - # tolerance = 1e-6 - # ) - # expect_equal( - # adata_r$var[[name]], - # py_to_r(adata_py$var[[name]]), - # tolerance = 1e-6 - # ) - # nolint end + expect_equal( + adata_r$obs[[name]], + py_to_r(adata_py$obs)[[name]], + tolerance = 1e-6 + ) + expect_equal( + adata_r$var[[name]], + py_to_r(adata_py$var)[[name]], + tolerance = 1e-6 + ) }) test_that(paste0("Writing an AnnData with obs and var '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = name, + process = c("read", "write"), + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") write_h5ad(adata_r, file_r) @@ -82,19 +108,7 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - zz <- pd$testing$assert_frame_equal( - adata_py2$obs, - adata_py$obs, - check_dtype = FALSE, - check_exact = FALSE - ) - expect_null(reticulate::py_to_r(zz)) - zz <- pd$testing$assert_frame_equal( - adata_py2$var, - adata_py$var, - check_dtype = FALSE, - check_exact = FALSE - ) - expect_null(reticulate::py_to_r(zz)) + expect_py_df_equal(adata_py2$obs, adata_py$obs) + expect_py_df_equal(adata_py2$var, adata_py$var) }) } From ade4b99b86acc23bd37cde29d64f60155ae36a8d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:12:08 +0100 Subject: [PATCH 02/16] update known issues --- R/known_issues.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/known_issues.R b/R/known_issues.R index ad0a34a9..36935b42 100644 --- a/R/known_issues.R +++ b/R/known_issues.R @@ -1,19 +1,25 @@ # This file contains the known issues that are currently present in the package. # It can be used to generate documentation, but also throw warnings instead of errors # in tests. -#' @importFrom assertthat are_equal read_known_issues <- function() { - # file is at inst/known_issues.yaml + check_requires("Reading known issues", "yaml") + data <- yaml::read_yaml(system.file("known_issues.yaml", package = "anndataR")) map_dfr( data$known_issues, function(row) { - assertthat::are_equal(row, c( + expected_names <- c( "backend", "slot", "dtype", "process", "error_message", "description", "proposed_solution", "to_investigate", "to_fix" - )) + ) + if (!all(expected_names %in% names(row))) { + stop( + "Expected columns ", paste0("'", expected_names, "'", collapse = ", "), + " in known_issues.yaml, but got ", paste0("'", names(row), "'", collapse = ", ") + ) + } expand.grid(row) } From 0e65cbd62bc9f9493ea6a04aeaabc75424877257 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:16:06 +0100 Subject: [PATCH 03/16] fix linting issues --- tests/testthat/helper-expect_py_df_equal.R | 8 +++++++- tests/testthat/helper-expect_py_matrix_equal.R | 11 +++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/testthat/helper-expect_py_df_equal.R b/tests/testthat/helper-expect_py_df_equal.R index 6e0122d3..5b1e4ef9 100644 --- a/tests/testthat/helper-expect_py_df_equal.R +++ b/tests/testthat/helper-expect_py_df_equal.R @@ -1,7 +1,13 @@ expect_py_df_equal <- function(a, b) { + requireNamespace("testthat") + requireNamespace("reticulate") + + bi <- reticulate::import_builtins() pd <- reticulate::import("pandas") - expect_null( + testthat::expect_equal(bi$type(a), bi$type(b)) # does this always work? + + testthat::expect_null( pd$testing$assert_frame_equal( a, b, diff --git a/tests/testthat/helper-expect_py_matrix_equal.R b/tests/testthat/helper-expect_py_matrix_equal.R index b7c08787..3220c710 100644 --- a/tests/testthat/helper-expect_py_matrix_equal.R +++ b/tests/testthat/helper-expect_py_matrix_equal.R @@ -1,13 +1,16 @@ expect_py_matrix_equal <- function(a, b, rtol = 1e-6, atol = 1e-6) { + requireNamespace("testthat") + requireNamespace("reticulate") + bi <- reticulate::import_builtins() np <- reticulate::import("numpy") scipy <- reticulate::import("scipy") - expect_equal(bi$type(a), bi$type(b)) # does this always work? + testthat::expect_equal(bi$type(a), bi$type(b)) # does this always work? - expect_equal(a$dtype, b$dtype) + testthat::expect_equal(a$dtype, b$dtype) - expect_equal(reticulate::py_to_r(a$shape), reticulate::py_to_r(b$shape)) + testthat::expect_equal(reticulate::py_to_r(a$shape), reticulate::py_to_r(b$shape)) a_dense <- if (scipy$sparse$issparse(a)) { @@ -22,7 +25,7 @@ expect_py_matrix_equal <- function(a, b, rtol = 1e-6, atol = 1e-6) { b } - expect_null( + testthat::expect_null( np$testing$assert_allclose(a_dense, b_dense, rtol = rtol, atol = atol) ) } From 6a6b71d5112ed398b06e15701c9a1df810a80028 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:21:57 +0100 Subject: [PATCH 04/16] add yaml to suggests --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cf2131d6..fb6a1862 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,8 @@ Suggests: SummarizedExperiment, testthat (>= 3.0.0), vctrs, - withr + withr, + yaml VignetteBuilder: knitr Config/Needs/website: pkgdown, tibble, knitr, rprojroot, stringr, readr, From c4ddeca74aa3bde5e894943ab19ca805d3c58782 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:49:07 +0100 Subject: [PATCH 05/16] fix error message --- inst/known_issues.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index f98f1d65..90574608 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -9,12 +9,12 @@ known_issues: - integer_rsparse process: [read] error_message: | - Failure (test-roundtrip-obspvarp.R:111:5): Writing an AnnData with obs and var 'integer_csparse' works + Failure (test-roundtrip-obspvarp.R:111:5): Writing an AnnData with obsp and varp 'integer_csparse' works a$dtype (`actual`) not equal to b$dtype (`expected`). `class(actual)`: "numpy.dtypes.Float64DType" "numpy.dtype" "python.builtin.object" `class(expected)`: "numpy.dtypes.Int64DType" "numpy.dtype" "python.builtin.object" description: Integers are being converted to floats. - proposed_solution: Debug + proposed_solution: Debug and fix to_investigate: True to_fix: True From 400beea569bc6d3888ed8b36f241c7985e324ca5 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 11:57:13 +0100 Subject: [PATCH 06/16] add obsm varm as well --- inst/known_issues.yaml | 2 + tests/testthat/test-roundtrip-obsmvarm.R | 167 ++++++++++++----------- 2 files changed, 89 insertions(+), 80 deletions(-) diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 90574608..25e941a1 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -3,6 +3,8 @@ known_issues: slot: - obsp - varp + - obsm + - varm dtype: - integer_csparse - integer_matrix diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index d5a3277f..8e6cac4e 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -1,114 +1,121 @@ skip_if_no_anndata() -skip_if_not_installed("hdf5r") +skip_if_not_installed("reticulate") -data <- generate_dataset(10L, 20L) +library(reticulate) +testthat::skip_if_not( + reticulate::py_module_available("dummy_anndata"), + message = "Python dummy_anndata module not available for testing" +) -test_names <- names(data$obsm) +ad <- reticulate::import("anndata", convert = FALSE) +da <- reticulate::import("dummy_anndata", convert = FALSE) +bi <- reticulate::import_builtins() -# TODO: re-enable this -test_names <- test_names[test_names != "character_with_nas"] +known_issues <- read_known_issues() -# TODO: Add denseMatrix support to anndata and anndataR -test_names <- test_names[!grepl("_dense", test_names)] +test_names <- names(da$matrix_generators) for (name in test_names) { - test_that(paste0("roundtrip with obsm and varm '", name, "'"), { - # create anndata - ad <- AnnData( - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE], - obsm = data$obsm[name], - varm = data$varm[name] + # first generate a python h5ad + adata_py <- da$generate_dataset( + x_type = NULL, + obs_types = list(), + var_types = list(), + layer_types = list(), + obsm_types = list(name), + varm_types = list(name), + obsp_types = list(), + varp_types = list(), + uns_types = list(), + nested_uns_types = list() + ) + + # create a couple of paths + file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) + file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + + # write to file + adata_py$write_h5ad(file_py) + + test_that(paste0("Reading an AnnData with obsm and varm '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsm", "varm"), + dtype = name, + process = "read", + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - write_h5ad(ad, filename) - - # read from file - ad_new <- read_h5ad(filename, to = "HDF5AnnData") - - # expect slots are unchanged + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( - ad_new$obsm[[name]], - data$obsm[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$shape(), + unlist(reticulate::py_to_r(adata_py$shape)) ) expect_equal( - ad_new$varm[[name]], - data$varm[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$obsm_keys(), + py_to_r(adata_py$obsm$keys()) + ) + expect_equal( + adata_r$varm_keys(), + py_to_r(adata_py$varm$keys()) ) - }) -} -# TODO: re-enable these tests -# it seemed like there is a difference in the dimnames of the -# obsm and varm between anndata and anndataR -test_names <- c() + # check that the print output is the same + str_r <- capture.output(print(adata_r)) + str_py <- capture.output(print(adata_py)) + expect_equal(str_r, str_py) + }) -for (name in test_names) { - test_that(paste0("reticulate->hdf5 with obsm and varm '", name, "'"), { - # create anndata - ad <- anndata::AnnData( - obs = data.frame(row.names = data$obs_names), - var = data.frame(row.names = data$var_names), - obsm = data$obsm[name], - varm = data$varm[name] + # maybe this test simply shouldn't be run if there is a known issue with reticulate + test_that(paste0("Comparing an anndata with obsm and varm '", name, "' with reticulate works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsm", "varm"), + dtype = name, + process = c("read", "reticulate"), + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - ad$write_h5ad(filename) - - # read from file - ad_new <- HDF5AnnData$new(filename) + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") - # expect slots are unchanged expect_equal( - ad_new$obsm[[name]], - data$obsm[[name]], + adata_r$obsm[[name]], + py_to_r(py_get_item(adata_py$obsm, name)), tolerance = 1e-6 ) expect_equal( - ad_new$varm[[name]], - data$varm[[name]], + adata_r$varm[[name]], + py_to_r(py_get_item(adata_py$varm, name)), tolerance = 1e-6 ) }) -} -for (name in test_names) { - test_that(paste0("hdf5->reticulate with obsm and varm '", name, "'"), { - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - - # create anndata - ad <- AnnData( - obsm = data$obsm[name], - varm = data$varm[name], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + test_that(paste0("Writing an AnnData with obsm and varm '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsm", "varm"), + dtype = name, + process = c("read", "write"), + known_issues = known_issues ) - write_h5ad(ad, filename) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") + write_h5ad(adata_r, file_r) # read from file - ad_new <- anndata::read_h5ad(filename) + adata_py2 <- ad$read_h5ad(file_r) - # expect slots are unchanged - expect_equal( - ad_new$obsm[[name]], - data$obsm[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + # expect that the objects are the same + expect_py_matrix_equal( + py_get_item(adata_py2$obsm, name), + py_get_item(adata_py$obsm, name) ) - expect_equal( - ad_new$varm[[name]], - data$varm[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + expect_py_matrix_equal( + py_get_item(adata_py2$varm, name), + py_get_item(adata_py$varm, name) ) }) } From 9528eeec91ddbb4c9b28df71b0293e6632b76994 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 12:01:48 +0100 Subject: [PATCH 07/16] port layers --- inst/known_issues.yaml | 3 +- tests/testthat/test-roundtrip-layers.R | 159 +++++++++++++------------ 2 files changed, 84 insertions(+), 78 deletions(-) diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 25e941a1..10dfcbfd 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -5,10 +5,11 @@ known_issues: - varp - obsm - varm + - layers dtype: - integer_csparse - - integer_matrix - integer_rsparse + - integer_matrix process: [read] error_message: | Failure (test-roundtrip-obspvarp.R:111:5): Writing an AnnData with obsp and varp 'integer_csparse' works diff --git a/tests/testthat/test-roundtrip-layers.R b/tests/testthat/test-roundtrip-layers.R index 0e72961f..07824bb1 100644 --- a/tests/testthat/test-roundtrip-layers.R +++ b/tests/testthat/test-roundtrip-layers.R @@ -1,103 +1,108 @@ skip_if_no_anndata() -skip_if_not_installed("hdf5r") +skip_if_not_installed("reticulate") -data <- generate_dataset(10L, 20L) +library(reticulate) +testthat::skip_if_not( + reticulate::py_module_available("dummy_anndata"), + message = "Python dummy_anndata module not available for testing" +) -test_names <- names(data$layers) +ad <- reticulate::import("anndata", convert = FALSE) +da <- reticulate::import("dummy_anndata", convert = FALSE) +bi <- reticulate::import_builtins() -# TODO: Add denseMatrix support to anndata and anndataR -test_names <- test_names[!grepl("_dense", test_names)] +known_issues <- read_known_issues() + +test_names <- names(da$matrix_generators) for (name in test_names) { - test_that(paste0("roundtrip with layer '", name, "'"), { - # create anndata - ad <- AnnData( - layers = data$layers[name], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + # first generate a python h5ad + adata_py <- da$generate_dataset( + x_type = NULL, + obs_types = list(), + var_types = list(), + layer_types = list(name), + obsm_types = list(), + varm_types = list(), + obsp_types = list(), + varp_types = list(), + uns_types = list(), + nested_uns_types = list() + ) + + # create a couple of paths + file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) + file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + + # write to file + adata_py$write_h5ad(file_py) + + test_that(paste0("Reading an AnnData with layer '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("layers"), + dtype = name, + process = "read", + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - write_h5ad(ad, filename) - - # read from file - ad_new <- read_h5ad(filename, to = "HDF5AnnData") - - # expect slots are unchanged + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( - ad_new$layers[[name]], - data$layers[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$shape(), + unlist(reticulate::py_to_r(adata_py$shape)) ) + expect_equal( + adata_r$layers_keys(), + py_to_r(adata_py$layers$keys()) + ) + + # check that the print output is the same + str_r <- capture.output(print(adata_r)) + str_py <- capture.output(print(adata_py)) + expect_equal(str_r, str_py) }) -} -for (name in test_names) { - test_that(paste0("reticulate->hdf5 with layer '", name, "'"), { - # add rownames - layers <- data$layers[name] - obs <- data.frame(row.names = rownames(data$obs)) - var <- data.frame(row.names = rownames(data$var)) - - # create anndata - ad <- anndata::AnnData( - layers = layers, - shape = dim(data$X), - obs = obs, - var = var + # maybe this test simply shouldn't be run if there is a known issue with reticulate + test_that(paste0("Comparing an anndata with layer '", name, "' with reticulate works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("layers"), + dtype = name, + process = c("read", "reticulate"), + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - ad$write_h5ad(filename) + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") - # read from file - ad_new <- HDF5AnnData$new(filename) - - # expect slots are unchanged expect_equal( - ad_new$layers[[name]], - data$layers[[name]], + adata_r$layers[[name]], + py_to_r(py_get_item(adata_py$layers, name)), tolerance = 1e-6 ) }) -} - -r2py_names <- test_names -# TODO: rsparse gets converted to csparse by anndata -r2py_names <- r2py_names[!grepl("rsparse", r2py_names)] - -for (name in r2py_names) { - test_that(paste0("hdf5->reticulate with layer '", name, "'"), { - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - # make anndata - ad <- AnnData( - layers = data$layers[name], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + test_that(paste0("Writing an AnnData with layer '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("layers"), + dtype = name, + process = c("read", "write"), + known_issues = known_issues ) - write_h5ad(ad, filename) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") + write_h5ad(adata_r, file_r) # read from file - ad_new <- anndata::read_h5ad(filename) - - # expect slots are unchanged - layer_ <- ad_new$layers[[name]] - # anndata returns these layers as CsparseMatrix - if (grepl("rsparse", name)) { - layer_ <- as(layer_, "RsparseMatrix") - } - # strip rownames - dimnames(layer_) <- list(NULL, NULL) - expect_equal( - layer_, - data$layers[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_py2 <- ad$read_h5ad(file_r) + + # expect that the objects are the same + expect_py_matrix_equal( + py_get_item(adata_py2$layers, name), + py_get_item(adata_py$layers, name) ) }) } From 9bc7aa7720d9769d8035323e347519aba99ee2ea Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 12:04:16 +0100 Subject: [PATCH 08/16] port X --- inst/known_issues.yaml | 3 +- tests/testthat/test-roundtrip-X.R | 161 +++++++++++++++--------------- 2 files changed, 80 insertions(+), 84 deletions(-) diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 10dfcbfd..6834edfa 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -1,11 +1,12 @@ known_issues: - backend: HDF5AnnData slot: + - X + - layers - obsp - varp - obsm - varm - - layers dtype: - integer_csparse - integer_rsparse diff --git a/tests/testthat/test-roundtrip-X.R b/tests/testthat/test-roundtrip-X.R index ca491b87..77f06b7c 100644 --- a/tests/testthat/test-roundtrip-X.R +++ b/tests/testthat/test-roundtrip-X.R @@ -1,109 +1,104 @@ skip_if_no_anndata() -skip_if_not_installed("hdf5r") +skip_if_not_installed("reticulate") -data <- generate_dataset(10L, 20L) +library(reticulate) +testthat::skip_if_not( + reticulate::py_module_available("dummy_anndata"), + message = "Python dummy_anndata module not available for testing" +) -test_names <- names(data$layers) +ad <- reticulate::import("anndata", convert = FALSE) +da <- reticulate::import("dummy_anndata", convert = FALSE) +bi <- reticulate::import_builtins() -# TODO: Add denseMatrix support to anndata and anndataR -test_names <- test_names[!grepl("_dense", test_names)] +known_issues <- read_known_issues() + +test_names <- names(da$matrix_generators) for (name in test_names) { - test_that(paste0("roundtrip with X '", name, "'"), { - # create anndata - ad <- AnnData( - X = data$layers[[name]], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + # first generate a python h5ad + adata_py <- da$generate_dataset( + x_type = name, + obs_types = list(), + var_types = list(), + layer_types = list(), + obsm_types = list(), + varm_types = list(), + obsp_types = list(), + varp_types = list(), + uns_types = list(), + nested_uns_types = list() + ) + + # create a couple of paths + file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) + file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + + # write to file + adata_py$write_h5ad(file_py) + + test_that(paste0("Reading an AnnData with X '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("X"), + dtype = name, + process = "read", + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - write_h5ad(ad, filename) - - # read from file - ad_new <- read_h5ad(filename, to = "HDF5AnnData") - - # expect slots are unchanged + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( - ad_new$X, - data$layers[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$shape(), + unlist(reticulate::py_to_r(adata_py$shape)) ) + + # check that the print output is the same + str_r <- capture.output(print(adata_r)) + str_py <- capture.output(print(adata_py)) + expect_equal(str_r, str_py) }) -} -for (name in test_names) { - test_that(paste0("reticulate->hdf5 with X '", name, "'"), { - # add rownames - X <- data$layers[[name]] - obs <- data.frame(row.names = rownames(data$obs)) - var <- data.frame(row.names = rownames(data$var)) - - # TODO: remove this? - if (is.matrix(X) && any(is.na(X))) { - na_indices <- is.na(X) - X[na_indices] <- NaN - } - - # create anndata - ad <- anndata::AnnData( - X = X, - obs = obs, - var = var + # maybe this test simply shouldn't be run if there is a known issue with reticulate + test_that(paste0("Comparing an anndata with X '", name, "' with reticulate works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("X"), + dtype = name, + process = c("read", "reticulate"), + known_issues = known_issues ) + skip_if(!is.null(msg), message = msg) - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - ad$write_h5ad(filename) - - # read from file - ad_new <- HDF5AnnData$new(filename) + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") - # expect slots are unchanged expect_equal( - ad_new$X, - data$layers[[name]], + adata_r$X, + py_to_r(adata_py$X), tolerance = 1e-6 ) }) -} - -r2py_names <- test_names -# TODO: re-enable -- rsparse gets converted to csparse by anndata -r2py_names <- r2py_names[!grepl("rsparse", r2py_names)] - -for (name in r2py_names) { - test_that(paste0("hdf5->reticulate with X '", name, "'"), { - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - - # make anndata - ad <- AnnData( - X = data$layers[[name]], - obs = data$obs[, c(), drop = FALSE], - var = data$var[, c(), drop = FALSE] + test_that(paste0("Writing an AnnData with X '", name, "' works"), { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("X"), + dtype = name, + process = c("read", "write"), + known_issues = known_issues ) - write_h5ad(ad, filename) + skip_if(!is.null(msg), message = msg) + + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") + write_h5ad(adata_r, file_r) # read from file - ad_new <- anndata::read_h5ad(filename) - - # expect slots are unchanged - layer_ <- ad_new$X - # anndata returns these layers as CsparseMatrix - if (grepl("rsparse", name)) { - layer_ <- as(layer_, "RsparseMatrix") - } - # strip rownames - dimnames(layer_) <- list(NULL, NULL) - expect_equal( - layer_, - data$layers[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_py2 <- ad$read_h5ad(file_r) + + # expect that the objects are the same + expect_py_matrix_equal( + adata_py2$X, + adata_py$X ) }) } From 9dc494f7da3427af5717ff13f910787158f9102a Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 12:04:21 +0100 Subject: [PATCH 09/16] add note --- tests/testthat/test-roundtrip-obsmvarm.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index 8e6cac4e..4376463d 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -7,6 +7,9 @@ testthat::skip_if_not( message = "Python dummy_anndata module not available for testing" ) +# TODO: make sure data frames in obsm / varm are also tested? +# TODO: create a github issue for this + ad <- reticulate::import("anndata", convert = FALSE) da <- reticulate::import("dummy_anndata", convert = FALSE) bi <- reticulate::import_builtins() From e08932cf391e0be47b9f4e63febea13c365d606e Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 13:10:41 +0100 Subject: [PATCH 10/16] also include arrays in obsmvarm --- inst/known_issues.yaml | 50 ++++++++++++++++ tests/testthat/helper-expect_equal_py.R | 57 +++++++++++++++++++ tests/testthat/helper-expect_py_df_equal.R | 18 ------ .../testthat/helper-expect_py_matrix_equal.R | 31 ---------- tests/testthat/test-roundtrip-X.R | 2 +- tests/testthat/test-roundtrip-layers.R | 4 +- tests/testthat/test-roundtrip-obsmvarm.R | 16 +++--- tests/testthat/test-roundtrip-obspvarp.R | 8 +-- tests/testthat/test-roundtrip-obsvar.R | 8 +-- 9 files changed, 126 insertions(+), 68 deletions(-) create mode 100644 tests/testthat/helper-expect_equal_py.R delete mode 100644 tests/testthat/helper-expect_py_df_equal.R delete mode 100644 tests/testthat/helper-expect_py_matrix_equal.R diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 6834edfa..0d7c6e8a 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -22,3 +22,53 @@ known_issues: proposed_solution: Debug and fix to_investigate: True to_fix: True + - backend: HDF5AnnData + slot: + - obsm + - varm + dtype: + - boolean_array + - categorical + - categorical_missing_values + - categorical_ordered + - categorical_ordered_missing_values + - dense_array + - integer_array + - nullable_boolean_array + - nullable_integer_array + - string_array + process: [reticulate] + error_message: | + adata_r$varm[[name]] (`actual`) not equal to py_to_r(py_get_item(adata_py$varm, name)) (`expected`). + + `dim(actual)` is absent + `dim(expected)` is an integer vector (20) + description: Python nd.arrays have a dimension while R vectors do not. + proposed_solution: Debug and fix + to_investigate: True + to_fix: True + - backend: HDF5AnnData + slot: + - obsm + - varm + dtype: + - boolean_array + - categorical + - categorical_missing_values + - categorical_ordered + - categorical_ordered_missing_values + - dense_array + - integer_array + - nullable_boolean_array + - nullable_integer_array + - string_array + process: [write] + error_message: | + Error in `if (found_dim != expected_dim) { + stop("dim(", label, ")[", i, "] should have shape: ", expected_dim, + ", found: ", found_dim, ".") + }`: argument is of length zero + description: R vectors don't have a dimension. + proposed_solution: The input checking function for obsm and varm should allow the object to be a vector of the correct length instead of only a matrix or a data frame. + to_investigate: True + to_fix: True diff --git a/tests/testthat/helper-expect_equal_py.R b/tests/testthat/helper-expect_equal_py.R new file mode 100644 index 00000000..5bd6234d --- /dev/null +++ b/tests/testthat/helper-expect_equal_py.R @@ -0,0 +1,57 @@ +expect_equal_py <- function(a, b) { + requireNamespace("testthat") + requireNamespace("reticulate") + + bi <- reticulate::import_builtins() + + testthat::expect_equal(bi$type(a), bi$type(b)) # does this always work? + + if (inherits(a, "pandas.core.frame.DataFrame")) { + pd <- reticulate::import("pandas") + testthat::expect_null( + pd$testing$assert_frame_equal( + a, + b, + check_dtype = FALSE, + check_exact = FALSE + ) + ) + } else if (inherits(a, "np.ndarray") || inherits(a, "scipy.sparse.base.spmatrix")) { + scipy <- reticulate::import("scipy") + np <- reticulate::import("numpy") + + testthat::expect_equal(a$dtype, b$dtype) + + testthat::expect_equal( + py_to_r_ifneedbe(a$shape), + py_to_r_ifneedbe(b$shape) + ) + + a_dense <- + if (scipy$sparse$issparse(a)) { + a$toarray() + } else { + a + } + b_dense <- + if (scipy$sparse$issparse(b)) { + b$toarray() + } else { + b + } + + testthat::expect_null( + np$testing$assert_allclose(a_dense, b_dense) + ) + + } +} + +py_to_r_ifneedbe <- function(x) { + if (inherits(x, "python.builtin.object")) { + requireNamespace("reticulate") + reticulate::py_to_r(x) + } else { + x + } +} \ No newline at end of file diff --git a/tests/testthat/helper-expect_py_df_equal.R b/tests/testthat/helper-expect_py_df_equal.R deleted file mode 100644 index 5b1e4ef9..00000000 --- a/tests/testthat/helper-expect_py_df_equal.R +++ /dev/null @@ -1,18 +0,0 @@ -expect_py_df_equal <- function(a, b) { - requireNamespace("testthat") - requireNamespace("reticulate") - - bi <- reticulate::import_builtins() - pd <- reticulate::import("pandas") - - testthat::expect_equal(bi$type(a), bi$type(b)) # does this always work? - - testthat::expect_null( - pd$testing$assert_frame_equal( - a, - b, - check_dtype = FALSE, - check_exact = FALSE - ) - ) -} diff --git a/tests/testthat/helper-expect_py_matrix_equal.R b/tests/testthat/helper-expect_py_matrix_equal.R deleted file mode 100644 index 3220c710..00000000 --- a/tests/testthat/helper-expect_py_matrix_equal.R +++ /dev/null @@ -1,31 +0,0 @@ -expect_py_matrix_equal <- function(a, b, rtol = 1e-6, atol = 1e-6) { - requireNamespace("testthat") - requireNamespace("reticulate") - - bi <- reticulate::import_builtins() - np <- reticulate::import("numpy") - scipy <- reticulate::import("scipy") - - testthat::expect_equal(bi$type(a), bi$type(b)) # does this always work? - - testthat::expect_equal(a$dtype, b$dtype) - - testthat::expect_equal(reticulate::py_to_r(a$shape), reticulate::py_to_r(b$shape)) - - a_dense <- - if (scipy$sparse$issparse(a)) { - a$toarray() - } else { - a - } - b_dense <- - if (scipy$sparse$issparse(b)) { - b$toarray() - } else { - b - } - - testthat::expect_null( - np$testing$assert_allclose(a_dense, b_dense, rtol = rtol, atol = atol) - ) -} diff --git a/tests/testthat/test-roundtrip-X.R b/tests/testthat/test-roundtrip-X.R index 77f06b7c..8aeb420b 100644 --- a/tests/testthat/test-roundtrip-X.R +++ b/tests/testthat/test-roundtrip-X.R @@ -96,7 +96,7 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - expect_py_matrix_equal( + expect_equal_py( adata_py2$X, adata_py$X ) diff --git a/tests/testthat/test-roundtrip-layers.R b/tests/testthat/test-roundtrip-layers.R index 07824bb1..9ba0aa25 100644 --- a/tests/testthat/test-roundtrip-layers.R +++ b/tests/testthat/test-roundtrip-layers.R @@ -54,7 +54,7 @@ for (name in test_names) { ) expect_equal( adata_r$layers_keys(), - py_to_r(adata_py$layers$keys()) + bi$list(adata_py$layers$keys()) ) # check that the print output is the same @@ -100,7 +100,7 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - expect_py_matrix_equal( + expect_equal_py( py_get_item(adata_py2$layers, name), py_get_item(adata_py$layers, name) ) diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index 4376463d..5172cf8d 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -7,16 +7,16 @@ testthat::skip_if_not( message = "Python dummy_anndata module not available for testing" ) -# TODO: make sure data frames in obsm / varm are also tested? -# TODO: create a github issue for this - ad <- reticulate::import("anndata", convert = FALSE) da <- reticulate::import("dummy_anndata", convert = FALSE) bi <- reticulate::import_builtins() known_issues <- read_known_issues() -test_names <- names(da$matrix_generators) +test_names <- c( + names(da$matrix_generators), + names(da$vector_generators) +) for (name in test_names) { # first generate a python h5ad @@ -57,11 +57,11 @@ for (name in test_names) { ) expect_equal( adata_r$obsm_keys(), - py_to_r(adata_py$obsm$keys()) + bi$list(adata_py$obsm$keys()) ) expect_equal( adata_r$varm_keys(), - py_to_r(adata_py$varm$keys()) + bi$list(adata_py$varm$keys()) ) # check that the print output is the same @@ -112,11 +112,11 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - expect_py_matrix_equal( + expect_equal_py( py_get_item(adata_py2$obsm, name), py_get_item(adata_py$obsm, name) ) - expect_py_matrix_equal( + expect_equal_py( py_get_item(adata_py2$varm, name), py_get_item(adata_py$varm, name) ) diff --git a/tests/testthat/test-roundtrip-obspvarp.R b/tests/testthat/test-roundtrip-obspvarp.R index 5326f22e..25b3e9ba 100644 --- a/tests/testthat/test-roundtrip-obspvarp.R +++ b/tests/testthat/test-roundtrip-obspvarp.R @@ -54,11 +54,11 @@ for (name in test_names) { ) expect_equal( adata_r$obsp_keys(), - py_to_r(adata_py$obsp$keys()) + bi$list(adata_py$obsp$keys()) ) expect_equal( adata_r$varp_keys(), - py_to_r(adata_py$varp$keys()) + bi$list(adata_py$varp$keys()) ) # check that the print output is the same @@ -109,11 +109,11 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - expect_py_matrix_equal( + expect_equal_py( py_get_item(adata_py2$obsp, name), py_get_item(adata_py$obsp, name) ) - expect_py_matrix_equal( + expect_equal_py( py_get_item(adata_py2$varp, name), py_get_item(adata_py$varp, name) ) diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index 88a68be0..e9e09892 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -53,11 +53,11 @@ for (name in test_names) { ) expect_equal( adata_r$obs_keys(), - py_to_r(adata_py$obs_keys()) + bi$list(adata_py$obs_keys()) ) expect_equal( adata_r$var_keys(), - py_to_r(adata_py$var_keys()) + bi$list(adata_py$var_keys()) ) # check that the print output is the same @@ -108,7 +108,7 @@ for (name in test_names) { adata_py2 <- ad$read_h5ad(file_r) # expect that the objects are the same - expect_py_df_equal(adata_py2$obs, adata_py$obs) - expect_py_df_equal(adata_py2$var, adata_py$var) + expect_equal_py(adata_py2$obs, adata_py$obs) + expect_equal_py(adata_py2$var, adata_py$var) }) } From aad550efec43fdd80b24df6223db38135b42558d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 13:20:44 +0100 Subject: [PATCH 11/16] fix obsvar --- tests/testthat/test-roundtrip-obsvar.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index e9e09892..aeb54fad 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -9,6 +9,7 @@ testthat::skip_if_not( ad <- reticulate::import("anndata", convert = FALSE) da <- reticulate::import("dummy_anndata", convert = FALSE) +bi <- reticulate::import_builtins() known_issues <- read_known_issues() From 55d759bedb0c447a431fb602f859f20c87b6cc8c Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 13:30:37 +0100 Subject: [PATCH 12/16] fix styling --- tests/testthat/helper-expect_equal_py.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/testthat/helper-expect_equal_py.R b/tests/testthat/helper-expect_equal_py.R index 5bd6234d..aabe829e 100644 --- a/tests/testthat/helper-expect_equal_py.R +++ b/tests/testthat/helper-expect_equal_py.R @@ -43,7 +43,6 @@ expect_equal_py <- function(a, b) { testthat::expect_null( np$testing$assert_allclose(a_dense, b_dense) ) - } } @@ -54,4 +53,4 @@ py_to_r_ifneedbe <- function(x) { } else { x } -} \ No newline at end of file +} From 8e65a2b22d81bdd0f59e7f5a92ef1975be049901 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 14:03:00 +0100 Subject: [PATCH 13/16] skip categorical --- tests/testthat/test-roundtrip-obsmvarm.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index 5172cf8d..65bc0305 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -17,6 +17,7 @@ test_names <- c( names(da$matrix_generators), names(da$vector_generators) ) +test_names <- setdiff(test_names, c("categorical")) for (name in test_names) { # first generate a python h5ad From d0d342b4dfa78375190741f04c61b02830cfa835 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 14 Nov 2024 17:33:51 +0100 Subject: [PATCH 14/16] Update tests/testthat/test-InMemoryAnnData.R Co-authored-by: Luke Zappia --- tests/testthat/test-InMemoryAnnData.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-InMemoryAnnData.R b/tests/testthat/test-InMemoryAnnData.R index 23bc0e5b..e3d4e0fd 100644 --- a/tests/testthat/test-InMemoryAnnData.R +++ b/tests/testthat/test-InMemoryAnnData.R @@ -40,7 +40,7 @@ test_that("with empty var", { expect_identical(ad$shape(), c(10L, 0L)) }) -test_that("with only X, no obs or var", { +test_that("Creating AnnData works with only X, no obs or var", { X <- dummy$X dimnames(X) <- list( rownames(dummy$obs), From efac790ccd6b212b8caa7b49e8c63f120c2d2f02 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 15 Nov 2024 09:06:03 +0100 Subject: [PATCH 15/16] Update tests/testthat/test-roundtrip-obsvar.R --- tests/testthat/test-roundtrip-obsvar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index aeb54fad..b38e3ec4 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -40,7 +40,7 @@ for (name in test_names) { test_that(paste0("reading an AnnData with obs and var '", name, "' works"), { msg <- message_if_known( backend = "HDF5AnnData", - slot = c("obsp", "varp"), + slot = c("obs", "var"), dtype = name, process = "read", known_issues = known_issues From 23b5abb32613830f040376afff514ba36a1ed937 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 15 Nov 2024 09:35:58 +0100 Subject: [PATCH 16/16] add temporary workaround for data-intuitive/dummy-anndata#12 --- tests/testthat/test-roundtrip-obsmvarm.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index 65bc0305..372a72b0 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -17,7 +17,14 @@ test_names <- c( names(da$matrix_generators), names(da$vector_generators) ) -test_names <- setdiff(test_names, c("categorical")) + +# temporary workaround for +# https://github.com/data-intuitive/dummy-anndata/issues/12 +test_names <- setdiff(test_names, c( + "categorical", "categorical_missing_values", + "categorical_ordered", "categorical_ordered_missing_values", + "nullable_boolean_array", "nullable_integer_array" +)) for (name in test_names) { # first generate a python h5ad