From 20ea340fd6c13b9412554741f21406642fbea7cb Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Thu, 26 Sep 2024 16:57:35 +0200 Subject: [PATCH 1/3] feat: add new extractByIndex method - Add an `extractByIndex()` method to subset/extract content from a `MsBackend`. Using this method in contrast to `[` avoids errors with some parallel processing setups in which a `[` method for a backend might not be found (see https://github.com/rformassspectrometry/MsBackendMetaboLights/issues/5). --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 5 +++++ R/AllGenerics.R | 2 ++ R/MsBackend.R | 27 +++++++++++++++++++++++ R/MsBackendCached.R | 9 ++++++++ R/MsBackendDataFrame.R | 10 +++++++++ R/MsBackendHdf5Peaks.R | 14 ++++++++++++ R/MsBackendMemory.R | 12 ++++++++++ R/Spectra-functions.R | 5 +++-- R/Spectra.R | 11 ++++++---- man/MsBackend.Rd | 21 ++++++++++++++++-- man/MsBackendCached.Rd | 7 ++++-- man/hidden_aliases.Rd | 9 ++++++++ tests/testthat/test_MsBackend.R | 3 +++ tests/testthat/test_MsBackendCached.R | 12 ++++++++++ tests/testthat/test_MsBackendDataFrame.R | 22 +++++++++++++++++++ tests/testthat/test_MsBackendHdf5Peaks.R | 4 ++++ tests/testthat/test_MsBackendMemory.R | 28 +++++++++++++++++++++++- tests/testthat/test_MsBackendMzR.R | 2 ++ vignettes/MsBackend.Rmd | 22 +++++++++++++++++++ 21 files changed, 216 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0270d5db..91db6af4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.9 +Version: 1.15.10 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index 7e79b6c9..e3e4970c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ exportMethods(dropNaSpectraVariables) exportMethods(entropy) exportMethods(estimatePrecursorIntensity) exportMethods(export) +exportMethods(extractByIndex) exportMethods(filterAcquisitionNum) exportMethods(filterDataOrigin) exportMethods(filterDataStorage) diff --git a/NEWS.md b/NEWS.md index 828350b4..3cc44fb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # Spectra 1.15 +## Changes in 1.15.10 + +- Add new `extractSpectra()` generic and implementation for `MsBackend`. Fixes + [issue #5](https://github.com/rformassspectrometry/MsBackendMetaboLights/issues/5). + ## Changes in 1.15.9 - Restructure and reorganize documentation for `Spectra`. diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 5ec6d054..d02aa13c 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -19,6 +19,8 @@ setGeneric("entropy", function(object, ...) standardGeneric("entropy")) setGeneric("export", function(object, ...) standardGeneric("export")) +setGeneric("extractByIndex", function(object, i) + standardGeneric("extractByIndex")) setGeneric("filterFourierTransformArtefacts", function(object, ...) standardGeneric("filterFourierTransformArtefacts")) setGeneric("neutralLoss", function(object, param, ...) diff --git a/R/MsBackend.R b/R/MsBackend.R index e0d67630..d7216c53 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -328,6 +328,18 @@ #' *mzML* or *mzXML* format. See the documentation for the `MsBackendMzR` #' class below for more information. #' +#' - `extractByIndex()`: function to subset a backend to selected elements +#' defined by the provided index. Similar to `[`, this method should allow +#' extracting (or to subset) the data in any order. In contrast to `[`, +#' however, `i` is expected to be an `integer` (while `[` should also +#' support `logical` and eventually `character`). While being apparently +#' redundant to `[`, this methods avoids package namespace errors/problems +#' that can result in implementations of `[` being not found by R (which +#' can happen sometimes in parallel processing using the [SnowParam()]). This +#' method is used internally by `Spectra` to extract/subset its backend. +#' Implementation is optional, as the default implementation for `MsBackend` +#' will use `[`. +#' #' - `filterAcquisitionNum()`: filters the object keeping only spectra matching #' the provided acquisition numbers (argument `n`). If `dataOrigin` or #' `dataStorage` is also provided, `object` is subsetted to the spectra with @@ -1101,6 +1113,21 @@ setMethod("dropNaSpectraVariables", "MsBackend", function(object) { selectSpectraVariables(object, c(svs[keep], "mz", "intensity")) }) +#' @rdname MsBackend +#' +#' @export +setMethod("extractByIndex", c("MsBackend", "ANY"), function(object, i) { + object[i = i] +}) + +#' @rdname MsBackend +#' +#' @export +setMethod("extractByIndex", c("MsBackend", "missing"), function(object, i) { + message("extractByIndex,MsBackend,missing") + object +}) + #' @exportMethod filterAcquisitionNum #' #' @importMethodsFrom ProtGenerics filterAcquisitionNum diff --git a/R/MsBackendCached.R b/R/MsBackendCached.R index 5628037d..caf7f743 100644 --- a/R/MsBackendCached.R +++ b/R/MsBackendCached.R @@ -294,6 +294,15 @@ setMethod("dataStorage", "MsBackendCached", function(object) { rep("", length(object)) }) +#' @rdname MsBackendCached +setMethod("extractByIndex", c("MsBackendCached", "ANY"), + function(object, i) { + slot(object, "localData", check = FALSE) <- + object@localData[i, , drop = FALSE] + object@nspectra <- nrow(object@localData) + object +}) + #' @rdname MsBackendCached setMethod("length", "MsBackendCached", function(x) { x@nspectra diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index c13052b7..c89e192d 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -181,6 +181,14 @@ setReplaceMethod("dataStorage", "MsBackendDataFrame", function(object, value) { object }) +#' @rdname hidden_aliases +setMethod("extractByIndex", c("MsBackendDataFrame", "ANY"), + function(object, i) { + slot(object, "spectraData", check = FALSE) <- + extractROWS(object@spectraData, i) + object + }) + #' @rdname hidden_aliases setMethod("intensity", "MsBackendDataFrame", function(object) { if (any(colnames(object@spectraData) == "intensity")) @@ -544,6 +552,8 @@ setReplaceMethod("$", "MsBackendDataFrame", function(x, name, value) { #' @importFrom MsCoreUtils i2index #' #' @rdname hidden_aliases +#' +#' @export setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) diff --git a/R/MsBackendHdf5Peaks.R b/R/MsBackendHdf5Peaks.R index e5482803..ebcb8ea2 100644 --- a/R/MsBackendHdf5Peaks.R +++ b/R/MsBackendHdf5Peaks.R @@ -291,6 +291,20 @@ setMethod("[", "MsBackendHdf5Peaks", function(x, i, j, ..., drop = FALSE) { x }) +#' @rdname hidden_aliases +#' +#' @aliases [,MsBackendHdf5Peaks-method +setMethod("extractByIndex", c("MsBackendHdf5Peaks", "ANY"), + function(object, i) { + fls <- unique(object@spectraData$dataStorage) + slot(object, "spectraData", check = FALSE) <- + extractROWS(object@spectraData, i) + slot(object, "modCount", check = FALSE) <- + object@modCount[match( + unique(object@spectraData$dataStorage), fls)] + object +}) + #' @rdname hidden_aliases setMethod("backendMerge", "MsBackendHdf5Peaks", function(object, ...) { object <- unname(c(object, ...)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index d38722ab..594fc799 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -192,6 +192,18 @@ setReplaceMethod("dataStorage", "MsBackendMemory", function(object, value) { object }) +#' @rdname hidden_aliases +setMethod("extractByIndex", c("MsBackendMemory", "ANY"), function(object, i) { + slot(object, "spectraData", check = FALSE) <- + object@spectraData[i, , drop = FALSE] + if (length(object@peaksData)) + slot(object, "peaksData", check = FALSE) <- object@peaksData[i] + if (length(object@peaksDataFrame)) + slot(object, "peaksDataFrame", check = FALSE) <- + object@peaksDataFrame[i] + object +}) + #' @rdname hidden_aliases setMethod("intensity", "MsBackendMemory", function(object) { if (length(object)) { diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index 033a2b2d..99014163 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -236,8 +236,9 @@ applyProcessing <- function(object, f = processingChunkFactor(object), }, queue = queue, pv = pv, svars = svars, BPPARAM = BPPARAM) bknds <- backendMerge(bknds) if (is.unsorted(f)) - bknds <- bknds[order(unlist(split(seq_along(bknds), f), - use.names = FALSE))] + bknds <- extractByIndex( + bknds, order(unlist(split(seq_along(bknds), f), + use.names = FALSE))) object@backend <- bknds } else { if (length(svars)) diff --git a/R/Spectra.R b/R/Spectra.R index 045cf88a..7564b0a4 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -515,8 +515,9 @@ setMethod( ## That below ensures the backend is returned in its original ## order - unsplit does unfortunately not work. if (is.unsorted(f)) - bknds <- bknds[order(unlist(split(seq_along(bknds), f), - use.names = FALSE))] + bknds <- extractByIndex( + bknds, order(unlist(split(seq_along(bknds), f), + use.names = FALSE))) } else { bknds <- backendInitialize( backend, data = spectraData(object@backend), ...) @@ -2415,7 +2416,8 @@ setMethod("[", "Spectra", function(x, i, j, ..., drop = FALSE) { stop("Subsetting 'Spectra' by columns is not (yet) supported") if (missing(i)) return(x) - slot(x, "backend", check = FALSE) <- x@backend[i = i] + slot(x, "backend", check = FALSE) <- extractByIndex( + x@backend, i2index(i, length(x))) x }) @@ -2439,7 +2441,8 @@ setMethod("filterAcquisitionNum", "Spectra", function(object, n = integer(), #' @rdname filterMsLevel setMethod("filterEmptySpectra", "Spectra", function(object) { - object@backend <- object@backend[as.logical(lengths(object))] + object@backend <- extractByIndex(object@backend, + which(as.logical(lengths(object)))) object@processing <- .logging(object@processing, "Filter: removed empty spectra.") object diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 2874d082..269b7cdd 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -40,6 +40,8 @@ \alias{dataStorage,MsBackend-method} \alias{dataStorage<-,MsBackend-method} \alias{dropNaSpectraVariables,MsBackend-method} +\alias{extractByIndex,MsBackend,ANY-method} +\alias{extractByIndex,MsBackend,missing-method} \alias{filterAcquisitionNum,MsBackend-method} \alias{filterDataOrigin,MsBackend-method} \alias{filterDataStorage,MsBackend-method} @@ -145,6 +147,10 @@ \S4method{dropNaSpectraVariables}{MsBackend}(object) +\S4method{extractByIndex}{MsBackend,ANY}(object, i) + +\S4method{extractByIndex}{MsBackend,missing}(object, i) + \S4method{filterAcquisitionNum}{MsBackend}(object, n, file, ...) \S4method{filterDataOrigin}{MsBackend}(object, dataOrigin = character()) @@ -316,6 +322,8 @@ backend provides.} \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} +\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.} + \item{n}{for \code{filterAcquisitionNum()}: \code{integer} with the acquisition numbers to filter for.} @@ -411,8 +419,6 @@ reported total ion current should be reported, or whether the total ion current should be (re)calculated on the actual data (\code{initial = FALSE}).} -\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.} - \item{j}{For \code{[}: not supported.} \item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return @@ -604,6 +610,17 @@ queue) are applied prior to export - this would not be possible with only a for the \code{MsBackendMzR} backend that supports export of the data in \emph{mzML} or \emph{mzXML} format. See the documentation for the \code{MsBackendMzR} class below for more information. +\item \code{extractByIndex()}: function to subset a backend to selected elements +defined by the provided index. Similar to \code{[}, this method should allow +extracting (or to subset) the data in any order. In contrast to \code{[}, +however, \code{i} is expected to be an \code{integer} (while \code{[} should also +support \code{logical} and eventually \code{character}). While being apparently +redundant to \code{[}, this methods avoids package namespace errors/problems +that can result in implementations of \code{[} being not found by R (which +can happen sometimes in parallel processing using the \code{\link[=SnowParam]{SnowParam()}}). This +method is used internally by \code{Spectra} to extract/subset its backend. +Implementation is optional, as the default implementation for \code{MsBackend} +will use \code{[}. \item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or \code{dataStorage} is also provided, \code{object} is subsetted to the spectra with diff --git a/man/MsBackendCached.Rd b/man/MsBackendCached.Rd index e65e41e9..ae8c6687 100644 --- a/man/MsBackendCached.Rd +++ b/man/MsBackendCached.Rd @@ -5,6 +5,7 @@ \alias{MsBackendCached-class} \alias{backendInitialize,MsBackendCached-method} \alias{dataStorage,MsBackendCached-method} +\alias{extractByIndex,MsBackendCached,ANY-method} \alias{length,MsBackendCached-method} \alias{spectraVariables,MsBackendCached-method} \alias{spectraData,MsBackendCached-method} @@ -57,6 +58,8 @@ MsBackendCached() \S4method{dataStorage}{MsBackendCached}(object) +\S4method{extractByIndex}{MsBackendCached,ANY}(object, i) + \S4method{length}{MsBackendCached}(x) \S4method{spectraVariables}{MsBackendCached}(object) @@ -150,6 +153,8 @@ variables to keep.} \item{...}{ignored} +\item{i}{For \code{[}: \code{integer} with the indices to subset the object.} + \item{x}{A \code{MsBackendCached} object.} \item{columns}{For \code{spectraData()}: \code{character} with the names of the spectra @@ -158,8 +163,6 @@ variables to retrieve.} \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} -\item{i}{For \code{[}: \code{integer} with the indices to subset the object.} - \item{j}{For \code{[}: ignored.} \item{drop}{For \code{[}: not considered.} diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index 1249a50f..3e70d26c 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -20,6 +20,7 @@ \alias{dataOrigin<-,MsBackendDataFrame-method} \alias{dataStorage,MsBackendDataFrame-method} \alias{dataStorage<-,MsBackendDataFrame-method} +\alias{extractByIndex,MsBackendDataFrame,ANY-method} \alias{intensity,MsBackendDataFrame-method} \alias{intensity<-,MsBackendDataFrame-method} \alias{isEmpty,MsBackendDataFrame-method} @@ -75,6 +76,7 @@ \alias{spectraData<-,MsBackendHdf5Peaks-method} \alias{$<-,MsBackendHdf5Peaks-method} \alias{[,MsBackendHdf5Peaks-method} +\alias{extractByIndex,MsBackendHdf5Peaks,ANY-method} \alias{backendMerge,MsBackendHdf5Peaks-method} \alias{show,MsBackendMemory-method} \alias{backendMerge,MsBackendMemory-method} @@ -87,6 +89,7 @@ \alias{dataOrigin<-,MsBackendMemory-method} \alias{dataStorage,MsBackendMemory-method} \alias{dataStorage<-,MsBackendMemory-method} +\alias{extractByIndex,MsBackendMemory,ANY-method} \alias{intensity,MsBackendMemory-method} \alias{intensity<-,MsBackendMemory-method} \alias{ionCount,MsBackendMemory-method} @@ -185,6 +188,8 @@ \S4method{dataStorage}{MsBackendDataFrame}(object) <- value +\S4method{extractByIndex}{MsBackendDataFrame,ANY}(object, i) + \S4method{intensity}{MsBackendDataFrame}(object) \S4method{intensity}{MsBackendDataFrame}(object) <- value @@ -309,6 +314,8 @@ \S4method{[}{MsBackendHdf5Peaks}(x, i, j, ..., drop = FALSE) +\S4method{extractByIndex}{MsBackendHdf5Peaks,ANY}(object, i) + \S4method{backendMerge}{MsBackendHdf5Peaks}(object, ...) \S4method{show}{MsBackendMemory}(object) @@ -333,6 +340,8 @@ \S4method{dataStorage}{MsBackendMemory}(object) <- value +\S4method{extractByIndex}{MsBackendMemory,ANY}(object, i) + \S4method{intensity}{MsBackendMemory}(object) \S4method{intensity}{MsBackendMemory}(object) <- value diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index d80bd757..cf36605e 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -56,6 +56,9 @@ test_that("MsBackend methods throw errors", { expect_error(dm[1], "implemented for") expect_error(dm$a, "implemented for") expect_error(dm$a <- "a", "implemented for") + expect_error(extractByIndex(dm, 1), "implemented for") + + expect_equal(extractByIndex(dm), dm) }) test_that("reset,MsBackend works", { diff --git a/tests/testthat/test_MsBackendCached.R b/tests/testthat/test_MsBackendCached.R index 6ff1b7ee..86bd8639 100644 --- a/tests/testthat/test_MsBackendCached.R +++ b/tests/testthat/test_MsBackendCached.R @@ -87,12 +87,24 @@ test_that("[,MsBackendCached works", { res <- be[c(1, 4, 3), ] expect_true(length(res) == 3) expect_true(nrow(res@localData) == 3) + res_2 <- extractByIndex(be, c(1, 4, 3)) + expect_equal(res, res_2) df <- data.frame(msLevel = 1L, b = 1:6) be <- backendInitialize(be, data = df) res <- be[c(6, 1, 3)] expect_true(length(res) == 3) expect_equal(res@localData$b, c(6, 1, 3)) + res_2 <- extractByIndex(be, c(6, 1, 3)) + expect_equal(res, res_2) + + res <- be[c(6, 1, 3, 1)] + expect_true(length(res) == 4) + expect_equal(res@localData$b, c(6, 1, 3, 1)) + res_2 <- extractByIndex(be, c(6, 1, 3, 1)) + expect_equal(res, res_2) + + expect_equal(extractByIndex(be), be) }) test_that("$,MsBackendCached works", { diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index 4b41d6d0..e5de3662 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -576,24 +576,42 @@ test_that("show,MsBackendDataFrame works", { test_that("[,MsBackendDataFrame works", { be <- MsBackendDataFrame() expect_error(be[1]) + + expect_equal(extractByIndex(be), be) + df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") be <- backendInitialize(be, df) res <- be[1] expect_true(validObject(res)) expect_equal(be@spectraData[1, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 1) + expect_equal(res, res_2) res <- be[2] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 2) + expect_equal(res, res_2) res <- be[2:1] expect_true(validObject(res)) expect_equal(be@spectraData[2:1, ], res@spectraData) + res_2 <- extractByIndex(be, 2:1) + expect_equal(res, res_2) + + res <- be[c(2, 1, 2)] + expect_equal(res$scanIndex, c(2, 1, 2)) + res_2 <- extractByIndex(be, c(2, 1, 2)) + expect_equal(res, res_2) res <- be[c(FALSE, FALSE)] expect_true(validObject(res)) expect_true(length(res) == 0) + res_2 <- extractByIndex(be, integer()) + expect_equal(res, res_2) res <- be[c(FALSE, TRUE)] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 2) + expect_equal(res, res_2) expect_error(be[TRUE], "match the length of") expect_error(be["a"], "does not have names") @@ -606,11 +624,15 @@ test_that("[,MsBackendDataFrame works", { expect_true(validObject(res)) expect_equal(dataStorage(res), "2") expect_equal(res@spectraData$file, "b") + res_2 <- extractByIndex(be, 3) + expect_equal(res, res_2) res <- be[c(3, 1)] expect_true(validObject(res)) expect_equal(dataStorage(res), c("2", "1")) expect_equal(res@spectraData$file, c("b", "a")) + res_2 <- extractByIndex(be, c(3, 1)) + expect_equal(res, res_2) }) test_that("selectSpectraVariables,MsBackendDataFrame works", { diff --git a/tests/testthat/test_MsBackendHdf5Peaks.R b/tests/testthat/test_MsBackendHdf5Peaks.R index b7afdf37..3604d895 100644 --- a/tests/testthat/test_MsBackendHdf5Peaks.R +++ b/tests/testthat/test_MsBackendHdf5Peaks.R @@ -334,12 +334,16 @@ test_that("[,MsBackendHdf5Peaks works", { expect_identical(peaksData(res), sciex_pks[idx]) expect_identical(rtime(res), rtime(sciex_mzr)[idx]) expect_identical(msLevel(res), msLevel(sciex_mzr)[idx]) + res_2 <- extractByIndex(be, idx) + expect_equal(res, res_2) idx <- dataStorage(be) == fls[2] res <- be[idx, ] expect_true(validObject(res)) expect_true(all(dataStorage(res) == fls[2])) expect_identical(peaksData(res), sciex_pks[idx]) + res_2 <- extractByIndex(be, idx) + expect_equal(res, res_2) }) test_that("backendMerge,MsBackendHdf5Peaks works", { diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 59776143..bb3c9973 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -501,41 +501,67 @@ test_that("$<-,MsBackendMemory works", { test_that("[,MsBackendMemory works", { be <- new("MsBackendMemory") + res <- extractByIndex(be) + expect_equal(res, be) + df <- data.frame(scanIndex = 1:2, a = "a", b = "b") be <- backendInitialize(be, df) res <- be[1] expect_true(validObject(res)) expect_equal(be@spectraData[1, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 1) + expect_equal(res, res_2) + res <- be[2] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 2) + expect_equal(res, res_2) + res <- be[2:1] expect_true(validObject(res)) expect_equal(be@spectraData[2:1, ], res@spectraData) + res_2 <- extractByIndex(be, 2:1) + expect_equal(res, res_2) res <- be[c(FALSE, FALSE)] expect_true(validObject(res)) expect_true(length(res) == 0) + res_2 <- extractByIndex(be, integer()) + expect_equal(res, res_2) + res <- be[c(FALSE, TRUE)] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) + res_2 <- extractByIndex(be, 2) + expect_equal(res, res_2) expect_error(be[TRUE], "match the length of") expect_error(be["a"], "names") df <- data.frame(scanIndex = c(1L, 2L, 1L, 2L), - file = c("a", "a", "b", "b")) + file = c("a", "a", "b", "b"), + idx = 1:4) be <- backendInitialize(be, df) dataStorage(be) <- c("1", "1", "2", "2") res <- be[3] expect_true(validObject(res)) expect_equal(dataStorage(res), "2") expect_equal(res@spectraData$file, "b") + res_2 <- extractByIndex(be, 3) + expect_equal(res, res_2) res <- be[c(3, 1)] expect_true(validObject(res)) expect_equal(dataStorage(res), c("2", "1")) expect_equal(res@spectraData$file, c("b", "a")) + res_2 <- extractByIndex(be, c(3, 1)) + expect_equal(res, res_2) + + res <- be[c(3, 1, 3)] + expect_equal(res$idx, c(3, 1, 3)) + res_2 <- extractByIndex(be, c(3, 1, 3)) + expect_equal(res, res_2) }) test_that("split,MsBackendMemory works", { diff --git a/tests/testthat/test_MsBackendMzR.R b/tests/testthat/test_MsBackendMzR.R index d051b8e6..36de14c4 100644 --- a/tests/testthat/test_MsBackendMzR.R +++ b/tests/testthat/test_MsBackendMzR.R @@ -474,6 +474,8 @@ test_that("[,MsBackendMzR works", { expect_equal(length(tmp), 13) expect_equal(tmp@spectraData$scanIndex, 13:25) expect_true(all(is.na(smoothed(tmp)))) + tmp_2 <- extractByIndex(sciex_mzr, 13:25) + expect_equal(tmp, tmp_2) ints <- intensity(tmp) spd <- spectraData(tmp) diff --git a/vignettes/MsBackend.Rmd b/vignettes/MsBackend.Rmd index a6423e63..ff0269c9 100644 --- a/vignettes/MsBackend.Rmd +++ b/vignettes/MsBackend.Rmd @@ -602,6 +602,28 @@ a <- be[c(2, 2, 2)] spectraData(a) ``` +In addition to the `[` method it is also suggested to implement a +`extractByIndex()` method. Similar to `[`, this method should extract elements +from, or subset, a backend, but it expects an integer vector with the indices as +second parameter. Hence, this method does not require conversion of +e.g. `logical` to `integer`. Also, this method helps avoiding namespace issues +sometimes encountered with parallel processing using the `SnowParam` when the +implementation of `[` for certain backends are not found. + +Data analysis methods on `Spectra` objects will use this method for most +operations. Implementation of this method is optional, since the default +implementation for `MsBackend` will fall back to `[`. Below we implement this +method for our example backend. + +```{r} +setMethod("extractByIndex", c("MsBackendTest", "ANY"), function(object, i) { + x@spectraVars <- x@spectraVars[i, ] + x@mz <- x@mz[i] + x@intensity <- x@intensity[i] + x +}) +``` + ### `backendMerge()` From 37ef79bcc104d0777db5f694628586f1bffc2bd2 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Fri, 27 Sep 2024 08:54:36 +0200 Subject: [PATCH 2/3] refactor: add `extractByIndex` method --- R/MsBackend.R | 13 ++-- R/MsBackendCached.R | 2 +- R/MsBackendDataFrame.R | 2 +- .../test_MsBackend/test_spectra_subsetting.R | 18 ++++++ man/MsBackend.Rd | 8 ++- tests/testthat/test_MsBackend.R | 2 - vignettes/MsBackend.Rmd | 64 +++++++------------ 7 files changed, 55 insertions(+), 54 deletions(-) diff --git a/R/MsBackend.R b/R/MsBackend.R index d7216c53..70cf211e 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -15,6 +15,7 @@ #' @aliases dataStorageBasePath,MsBackendMzR-method #' @aliases dataStorageBasePath<- #' @aliases dataStorageBasePath<-,MsBackendMzR-method +#' @aliases extractByIndex #' @aliases msLeveL<-,MsBackend-method #' #' @description @@ -223,7 +224,9 @@ #' allowed. Parameter `i` should support `integer` indices and `logical` #' and should throw an error if `i` is out of bounds. The #' `MsCoreUtils::i2index` could be used to check the input `i`. -#' For `i = integer()` an empty backend should be returned. +#' For `i = integer()` an empty backend should be returned. Implementation +#' of this method is optional, as the default calls the `extractByIndex()` +#' method (which has to be implemented as the main subsetting method). #' #' - `$`, `$<-`: access or set/add a single spectrum variable (column) in the #' backend. Using a `value` of `NULL` should allow deleting the specified @@ -337,8 +340,7 @@ #' that can result in implementations of `[` being not found by R (which #' can happen sometimes in parallel processing using the [SnowParam()]). This #' method is used internally by `Spectra` to extract/subset its backend. -#' Implementation is optional, as the default implementation for `MsBackend` -#' will use `[`. +#' Implementation of this method is mandatory. #' #' - `filterAcquisitionNum()`: filters the object keeping only spectra matching #' the provided acquisition numbers (argument `n`). If `dataOrigin` or @@ -1117,14 +1119,13 @@ setMethod("dropNaSpectraVariables", "MsBackend", function(object) { #' #' @export setMethod("extractByIndex", c("MsBackend", "ANY"), function(object, i) { - object[i = i] + stop("'extractByIndex' not implemented for ", class(object), ".") }) #' @rdname MsBackend #' #' @export setMethod("extractByIndex", c("MsBackend", "missing"), function(object, i) { - message("extractByIndex,MsBackend,missing") object }) @@ -1858,7 +1859,7 @@ setMethod("tic", "MsBackend", function(object, initial = TRUE) { #' #' @export setMethod("[", "MsBackend", function(x, i, j, ..., drop = FALSE) { - stop("Not implemented for ", class(x), ".") + extractByIndex(x, i2index(i, length = length(x))) }) #' @exportMethod $ diff --git a/R/MsBackendCached.R b/R/MsBackendCached.R index caf7f743..dfb4aa40 100644 --- a/R/MsBackendCached.R +++ b/R/MsBackendCached.R @@ -437,7 +437,7 @@ setMethod("show", "MsBackendCached", function(object) { cat(class(object), "with", n, "spectra\n") if (n) { idx <- unique(c(1L:min(6L, n), max(1L, n-5L):n)) - spd <- spectraData(object[idx, ], + spd <- spectraData(extractByIndex(object, idx), c("msLevel", "precursorMz", "polarity")) if (!length(rownames(spd))) rownames(spd) <- idx diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index c89e192d..c04f2f6f 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -574,5 +574,5 @@ setMethod("filterAcquisitionNum", "MsBackendDataFrame", "acquisition number(s) for sub-setting") sel_file <- .sel_file(object, dataStorage, dataOrigin) sel_acq <- acquisitionNum(object) %in% n & sel_file - object[sel_acq | !sel_file] + extractByIndex(object, which(sel_acq | !sel_file)) }) diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index fe10f10c..1782747c 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -49,6 +49,24 @@ test_that("[", { res <- be[integer()] expect_s4_class(res, class(be)[1L]) expect_true(length(res) == 0L) + + ## logical + l <- rep(FALSE, length(be)) + l[sample(seq_along(l), floor(length(l) / 2))] <- TRUE + res <- be[l] + expect_true(validObject(res)) + expect_true(length(res) == sum(l)) + expect_equal(res, be[which(l)]) +}) + +#' extractByIndex. Uses [ if not implemented +test_that("extractByIndex", { + i <- sample(seq_along(be), floor(length(be) / 2)) + res <- extractByIndex(be, i) + expect_true(validObject(res)) + expect_equal(length(res), length(i)) + expect_equal(msLevel(res), msLevel(be)[i]) + expect_equal(rtime(res), rtime(be)[i]) }) #' dropNASpectraVariables: only for not read-only diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 269b7cdd..e4424015 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -21,6 +21,7 @@ \alias{dataStorageBasePath,MsBackendMzR-method} \alias{dataStorageBasePath<-} \alias{dataStorageBasePath<-,MsBackendMzR-method} +\alias{extractByIndex} \alias{msLeveL<-,MsBackend-method} \alias{backendBpparam,MsBackend-method} \alias{backendInitialize,MsBackend-method} @@ -527,7 +528,9 @@ detailed description and examples): allowed. Parameter \code{i} should support \code{integer} indices and \code{logical} and should throw an error if \code{i} is out of bounds. The \code{MsCoreUtils::i2index} could be used to check the input \code{i}. -For \code{i = integer()} an empty backend should be returned. +For \code{i = integer()} an empty backend should be returned. Implementation +of this method is optional, as the default calls the \code{extractByIndex()} +method (which has to be implemented as the main subsetting method). \item \code{$}, \verb{$<-}: access or set/add a single spectrum variable (column) in the backend. Using a \code{value} of \code{NULL} should allow deleting the specified spectra variable. An error should be thrown if the spectra variable is not @@ -619,8 +622,7 @@ redundant to \code{[}, this methods avoids package namespace errors/problems that can result in implementations of \code{[} being not found by R (which can happen sometimes in parallel processing using the \code{\link[=SnowParam]{SnowParam()}}). This method is used internally by \code{Spectra} to extract/subset its backend. -Implementation is optional, as the default implementation for \code{MsBackend} -will use \code{[}. +Implementation of this method is mandatory. \item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or \code{dataStorage} is also provided, \code{object} is subsetted to the spectra with diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index cf36605e..f929451a 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -57,8 +57,6 @@ test_that("MsBackend methods throw errors", { expect_error(dm$a, "implemented for") expect_error(dm$a <- "a", "implemented for") expect_error(extractByIndex(dm, 1), "implemented for") - - expect_equal(extractByIndex(dm), dm) }) test_that("reset,MsBackend works", { diff --git a/vignettes/MsBackend.Rmd b/vignettes/MsBackend.Rmd index ff0269c9..9b5191ed 100644 --- a/vignettes/MsBackend.Rmd +++ b/vignettes/MsBackend.Rmd @@ -563,35 +563,39 @@ additionally available variables and the `columns` parameter of the (in addition to the required `"mz"` and `"intensity"` variables). -### `[` - -The `[` method allows to subset `MsBackend` objects. This operation is expected -to reduce a `MsBackend` object to the selected spectra. The method should -support to subset by indices or logical vectors and should also support -duplicating elements (i.e. when duplicated indices are used) as well as to -subset in arbitrary order. An error should be thrown if indices are out of -bounds, but the method should also support returning an empty backend with -`[integer()]`. Note that the `MsCoreUtils::i2index` function can be used to +### `extractByIndex()` and `[` + +The `extractByIndex()` and `[` methods allows to subset `MsBackend` objects. +This operation is expected to reduce a `MsBackend` object to the selected +spectra. These methods must also support duplication (e.g. `[c(1, 1, 1)]` and +extraction in any arbitrary order (e.g. `[c(3, 1, 5, 3)]`). While both methods +subset the object, `extractByIndex()` only supports to subset with an `integer` +index, while `[`, to be compliant with the base R implementation, should support +to subset by indices or logical vectors. An error should be thrown if indices +are out of bounds, but the method should also support returning an empty backend +with `[integer()]`. Note that the `MsCoreUtils::i2index` function can be used to check for correct input (and convert the input to an `integer` index). -Below we implement a possible `[` for our test backend class. We ignore the -parameters `j` from the definition of the `[` generic, since we treat our data -to be one-dimensional (with each spectrum being one element). +The `extractByIndex()` method is used by the data operation and analysis methods +on `Spectra` objects, while the `[` is intended to be used by the end user (if +needed). Below we implement `extractByIndex()` for our backend: ```{r} -setMethod("[", "MsBackendTest", function(x, i, j, ..., drop = FALSE) { - i <- MsCoreUtils::i2index(i, length = length(x)) - x@spectraVars <- x@spectraVars[i, ] - x@mz <- x@mz[i] - x@intensity <- x@intensity[i] - x +setMethod("extractByIndex", c("MsBackendTest", "ANY"), function(object, i) { + object@spectraVars <- object@spectraVars[i, ] + object@mz <- object@mz[i] + object@intensity <- object@intensity[i] + object }) ``` +The `[` does not need to be defined because a default implementation for +the base `MsBackend` exists. + We can now subset our backend to the last two spectra. ```{r} -a <- be[2:3] +a <- extractByIndex(be, 2:3) spectraData(a) ``` @@ -602,28 +606,6 @@ a <- be[c(2, 2, 2)] spectraData(a) ``` -In addition to the `[` method it is also suggested to implement a -`extractByIndex()` method. Similar to `[`, this method should extract elements -from, or subset, a backend, but it expects an integer vector with the indices as -second parameter. Hence, this method does not require conversion of -e.g. `logical` to `integer`. Also, this method helps avoiding namespace issues -sometimes encountered with parallel processing using the `SnowParam` when the -implementation of `[` for certain backends are not found. - -Data analysis methods on `Spectra` objects will use this method for most -operations. Implementation of this method is optional, since the default -implementation for `MsBackend` will fall back to `[`. Below we implement this -method for our example backend. - -```{r} -setMethod("extractByIndex", c("MsBackendTest", "ANY"), function(object, i) { - x@spectraVars <- x@spectraVars[i, ] - x@mz <- x@mz[i] - x@intensity <- x@intensity[i] - x -}) -``` - ### `backendMerge()` From 02d8394c08b4f4ca132cd65b70afe6163fb48b55 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Fri, 27 Sep 2024 10:31:03 +0200 Subject: [PATCH 3/3] fix: add backward compatibility --- R/MsBackend.R | 4 +++- tests/testthat/test_MsBackend.R | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/R/MsBackend.R b/R/MsBackend.R index 70cf211e..f1721a3e 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -1119,7 +1119,9 @@ setMethod("dropNaSpectraVariables", "MsBackend", function(object) { #' #' @export setMethod("extractByIndex", c("MsBackend", "ANY"), function(object, i) { - stop("'extractByIndex' not implemented for ", class(object), ".") + if (existsMethod("[", class(object)[1L])) + object[i = i] + else stop("'extractByIndex' not implemented for ", class(object)[1L], ".") }) #' @rdname MsBackend diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index f929451a..cea6af27 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -59,6 +59,30 @@ test_that("MsBackend methods throw errors", { expect_error(extractByIndex(dm, 1), "implemented for") }) +test_that("extractByIndex not implemented fallback", { + ## Backends that don't implement a dedicated `extractByIndex` method should + ## fall back to the [ method. + setClass("DummyBackend", + contains = "MsBackend", + slots = c(d = "integer")) + dm <- new("DummyBackend") + expect_error(extractByIndex(dm, 1L), "'extractByIndex' not implemented") + + dm@d <- 1:4 + + ## Have an implementation for [ but not extractByIndex: + setMethod("[", "DummyBackend", function(x, i, j, ..., drop = FALSE) { + x@d <- x@d[i] + x + }) + + res <- dm[c(3, 1)] + expect_equal(res@d, c(3L, 1L)) + + res <- extractByIndex(dm, c(3, 1)) + expect_equal(res@d, c(3L, 1L)) +}) + test_that("reset,MsBackend works", { res <- reset(sciex_mzr) expect_equal(res, sciex_mzr)