From c6bd03c31a040ec4b1a2df4ea66a09e0b25ccecd Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:43:59 +0100 Subject: [PATCH] Addition of cbind2 --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS.md | 5 +++ R/MsBackend.R | 31 ++++++++++++++++++- R/MsBackendDataFrame.R | 17 ++++++++++ R/MsBackendMemory.R | 17 ++++++++++ R/Spectra.R | 30 +++++++++++++++++- .../test_MsBackend/test_spectra_subsetting.R | 13 ++++++++ man/MsBackend.Rd | 17 ++++++++-- man/combineSpectra.Rd | 22 ++++++++++++- man/hidden_aliases.Rd | 6 ++++ tests/testthat/test_MsBackendDataFrame.R | 16 ++++++++++ tests/testthat/test_MsBackendMemory.R | 16 ++++++++++ 13 files changed, 187 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2057828e..b1a33c7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.1 +Version: 1.17.2 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index 8d8185f9..d70ef776 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,6 +75,7 @@ exportMethods(backendParallelFactor) exportMethods(backendRequiredSpectraVariables) exportMethods(bin) exportMethods(c) +exportMethods(cbind2) exportMethods(centroided) exportMethods(collisionEnergy) exportMethods(combinePeaks) @@ -309,4 +310,5 @@ importMethodsFrom(S4Vectors,extractROWS) importMethodsFrom(S4Vectors,isEmpty) importMethodsFrom(S4Vectors,lapply) importMethodsFrom(S4Vectors,split) +importMethodsFrom(methods,cbind2) importMethodsFrom(methods,show) diff --git a/NEWS.md b/NEWS.md index f498b62d..cd6c6579 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # Spectra 1.17 +## Change in 1.17.2 + +- Add `cbind2()` method to easily add multiple `spectraVariables` and their + content to the `spectraData` of a `Spectra` object. + ## Changes in 1.17.1 - Refactor `containsMz()` to support chunk-wise processing. diff --git a/R/MsBackend.R b/R/MsBackend.R index 186f26c6..8a72e416 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -182,10 +182,14 @@ #' @param value replacement value for `<-` methods. See individual #' method description or expected data type. #' -#' @param values for `filterValues()`: A `numeric` vector that define the +#' @param values For `filterValues()`: A `numeric` vector that define the #' values to filter the `object`. `values` needs to be of same length than #' parameter `spectraVariables` and in the same order. #' +#' @param y For `cbind2()`: A `data.frame` or `DataFrame` with the +#' spectra variables to be added to the backend. Need to be of the same +#' length as the number of spectra in the backend. +#' #' @param x Object extending `MsBackend`. #' #' @param ... Additional arguments. @@ -313,6 +317,11 @@ #' `dropNaSpectraVariables()` might still show columns containing `NA` values #' for *core* spectra variables. #' +#' - `cbind2()`: allows to appends multiple spectra variables to the backend at +#' once. It does so *blindly* and is therefore at the risk of the user. For a +#' more controlled way of adding spectra variables, the `joinSpectraData()` +#' should be used. +#' #' - `centroided()`, `centroided<-`: gets or sets the centroiding #' information of the spectra. `centroided()` returns a `logical` #' vector of length equal to the number of spectra with `TRUE` if a @@ -1022,6 +1031,26 @@ setMethod("peaksVariables", "MsBackend", function(object) { c("mz", "intensity") }) + +setClassUnion("dataframeOrDataFrameOrmatrix", c("data.frame", "DataFrame", "matrix")) +#' @exportMethod cbind2 +#' +#' @importMethodsFrom methods cbind2 +#' +#' @rdname MsBackend +setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + for (i in colnames(y)) { + x[[i]] <- y[, i] + } + x +}) + + #' @exportMethod centroided #' #' @aliases centroided<-,MsBackend-method diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index 6959d771..1fe4f872 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -567,6 +567,23 @@ setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) +setClassUnion("dataframeOrDataFrameOrmatrix", + c("data.frame", "DataFrame", "matrix")) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendDataFrame", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendDataFrame", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 4bde69ac..8c2b31c4 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -670,6 +670,23 @@ setMethod("[", "MsBackendMemory", function(x, i, j, ..., drop = FALSE) { .df_subset(x, i) }) +setClassUnion("dataframeOrDataFrameOrmatrix", + c("data.frame", "DataFrame", "matrix")) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendMemory", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendMemory", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/Spectra.R b/R/Spectra.R index 73520422..69661c3a 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1447,6 +1447,7 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' @aliases combineSpectra #' @aliases split #' @aliases joinSpectraData +#' @aliases cbind2 #' #' @description #' @@ -1463,6 +1464,15 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' function and to eventually (if needed) apply the processing queue using #' the [applyProcessing()] function. #' +#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, +#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so +#' *blindly* (e.g. do not check rownames compatibility) and is therefore at +#' the risk of the user. For a more controlled way of adding spectra +#' variables, the `joinSpectraData()` should be used. It will return a +#' `Spectra` object with the appended spectra variables. `cbind2()` does +#' check however that the number of rows of the `data.frame` or `DataFrame` +#' matches the number of spectra in the `Spectra` object. +#' #' - `combineSpectra()`: combines sets of spectra (defined with parameter `f`) #' into a single spectrum per set aggregating their MS data (i.e. their #' *peaks data* matrices with the *m/z* and intensity values of their @@ -1507,6 +1517,8 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' should be explored and ideally be removed using for #' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar #' functions. +#' For a more general function that allows to append `data.frame`, +#' `DataFrame` and `matrix` see `cbind2()`. #' #' - `split()`: splits the `Spectra` object based on parameter `f` into a `list` #' of `Spectra` objects. @@ -1543,7 +1555,9 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' @param x A `Spectra` object. #' -#' @param y A `DataFrame` with the spectra variables to join/add. +#' @param y For `joinSpectraData()`: `DataFrame` with the spectra variables +#' to join/add. For `cbind2()`: a `data.frame`, `DataFrame` or +#' `matrix`. #' #' @param ... Additional arguments. #' @@ -1660,6 +1674,10 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' spectraVariables(sciex2) #' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] +#' +#' ## Append new spectra variables with cbind2() +#' df <- data.frame(cola = 4:5, colb = "b") +#' data_append <- cbind2(sciex1, df) NULL #' @rdname combineSpectra @@ -1669,6 +1687,16 @@ setMethod("c", "Spectra", function(x, ...) { .concatenate_spectra(unname(list(unname(x), ...))) }) +setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) +#' @rdname combineSpectra +#' +#' @export +setMethod("cbind2", signature(x = "Spectra", + y = "dataframeOrDataFrame"), function(x, y, ...) { + x@backend <- cbind2(x@backend, y, ...) + x + }) + #' @rdname combineSpectra setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { bcknds <- split(x@backend, f, ...) diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 93adce0d..cadb2542 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -59,6 +59,19 @@ test_that("[", { expect_equal(res, be[which(l)]) }) +test_that("cbind2 works", { + seql <- length(be) + df <- data.frame(cola = seq_len(seql), colb = "b", colz = "z") + res <- cbind2(be, df) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 3) + expect_equal(res$cola, seq_len(seql)) + expect_equal(res$colb, rep("b", seql)) + expect_equal(res$colz, rep("z", seql)) + df2 <- data.frame(cola = 3:6, colb = "b", colz = "z") + expect_error(cbind2(be, df2), "does not match") +}) + #' extractByIndex. Uses [ if not implemented test_that("extractByIndex", { i <- sample(seq_along(be), floor(length(be) / 2)) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 279576a5..833cd2f6 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -34,6 +34,7 @@ \alias{acquisitionNum,MsBackend-method} \alias{peaksData,MsBackend-method} \alias{peaksVariables,MsBackend-method} +\alias{cbind2,MsBackend,dataframeOrDataFrameOrmatrix-method} \alias{centroided,MsBackend-method} \alias{centroided<-,MsBackend-method} \alias{collisionEnergy,MsBackend-method} @@ -133,6 +134,8 @@ \S4method{peaksVariables}{MsBackend}(object) +\S4method{cbind2}{MsBackend,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{centroided}{MsBackend}(object) \S4method{centroided}{MsBackend}(object) <- value @@ -325,6 +328,12 @@ in the individual \code{matrix} of the returned \code{list}. Defaults to \code{peaksVariables(object)} and depends on what \emph{peaks variables} the backend provides.} +\item{x}{Object extending \code{MsBackend}.} + +\item{y}{For \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the +spectra variables to be added to the backend. Need to be of the same +length as the number of spectra in the backend.} + \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} @@ -410,12 +419,10 @@ to be used to subset/filter \code{object}.} \item{msLevel.}{same as \code{msLevel} above.} -\item{values}{for \code{filterValues()}: A \code{numeric} vector that define the +\item{values}{For \code{filterValues()}: A \code{numeric} vector that define the values to filter the \code{object}. \code{values} needs to be of same length than parameter \code{spectraVariables} and in the same order.} -\item{x}{Object extending \code{MsBackend}.} - \item{use.names}{For \code{lengths()}: whether spectrum names should be used.} \item{drop}{For \code{[}: not considered.} @@ -600,6 +607,10 @@ object's \code{spectraData} that contain only missing values (\code{NA}). Note t while columns with only \code{NA}s are removed, a \code{spectraData()} call after \code{dropNaSpectraVariables()} might still show columns containing \code{NA} values for \emph{core} spectra variables. +\item \code{cbind2()}: allows to appends multiple spectra variables to the backend at +once. It does so \emph{blindly} and is therefore at the risk of the user. For a +more controlled way of adding spectra variables, the \code{joinSpectraData()} +should be used. \item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding information of the spectra. \code{centroided()} returns a \code{logical} vector of length equal to the number of spectra with \code{TRUE} if a diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index d4f7bdb0..07311ba6 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -5,7 +5,9 @@ \alias{combineSpectra} \alias{joinSpectraData} \alias{split} +\alias{cbind2} \alias{c,Spectra-method} +\alias{cbind2,Spectra,dataframeOrDataFrame-method} \alias{split,Spectra,ANY-method} \title{Merging, aggregating and splitting Spectra} \usage{ @@ -24,6 +26,8 @@ joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") \S4method{c}{Spectra}(x, ...) +\S4method{cbind2}{Spectra,dataframeOrDataFrame}(x, y, ...) + \S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) } \arguments{ @@ -48,7 +52,9 @@ of the spectra. Defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}. information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method of the \linkS4class{MsBackend}.} -\item{y}{A \code{DataFrame} with the spectra variables to join/add.} +\item{y}{For \code{joinSpectraData()}: \code{DataFrame} with the spectra variables +to join/add. For \code{cbind2()}: a \code{data.frame}, \code{DataFrame} or +\code{matrix}.} \item{by.x}{A \code{character(1)} specifying the spectra variable used for merging. Default is \code{"spectrumId"}.} @@ -76,6 +82,14 @@ objects. In such cases it is suggested to first change the backends of all \code{Spectra} to the same type of backend (using the \code{\link[=setBackend]{setBackend()}} function and to eventually (if needed) apply the processing queue using the \code{\link[=applyProcessing]{applyProcessing()}} function. +\item \code{cbind2()}: Appends multiple spectra variables from a \code{data.frame}, +\code{DataFrame} or \code{matrix} to the \code{Spectra} object at once. It does so +\emph{blindly} (e.g. do not check rownames compatibility) and is therefore at +the risk of the user. For a more controlled way of adding spectra +variables, the \code{joinSpectraData()} should be used. It will return a +\code{Spectra} object with the appended spectra variables. \code{cbind2()} does +check however that the number of rows of the \code{data.frame} or \code{DataFrame} +matches the number of spectra in the \code{Spectra} object. \item \code{combineSpectra()}: combines sets of spectra (defined with parameter \code{f}) into a single spectrum per set aggregating their MS data (i.e. their \emph{peaks data} matrices with the \emph{m/z} and intensity values of their @@ -120,6 +134,8 @@ throw a warning and only the last occurrence is kept. These should be explored and ideally be removed using for \code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar functions. +For a more general function that allows to append \code{data.frame}, +\code{DataFrame} and \code{matrix} see \code{cbind2()}. } \item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list} of \code{Spectra} objects. @@ -228,6 +244,10 @@ sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") spectraVariables(sciex2) spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] + +## Append new spectra variables with cbind2() +df <- data.frame(cola = 4:5, colb = "b") +data_append <- cbind2(data, df) } \seealso{ \itemize{ diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index ce4e63e9..c03adb62 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -59,6 +59,7 @@ \alias{tic,MsBackendDataFrame-method} \alias{$,MsBackendDataFrame-method} \alias{$<-,MsBackendDataFrame-method} +\alias{cbind2,MsBackendDataFrame,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} \alias{backendRequiredSpectraVariables,MsBackendHdf5Peaks-method} @@ -133,6 +134,7 @@ \alias{$,MsBackendMemory-method} \alias{$<-,MsBackendMemory-method} \alias{[,MsBackendMemory-method} +\alias{cbind2,MsBackendMemory,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} \alias{backendRequiredSpectraVariables,MsBackendMzR-method} @@ -272,6 +274,8 @@ \S4method{[}{MsBackendDataFrame}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendDataFrame,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendDataFrame,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendDataFrame}( @@ -432,6 +436,8 @@ \S4method{[}{MsBackendMemory}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendMemory,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendMemory,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendMemory}( diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index 238e945a..2cc04795 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -635,6 +635,22 @@ test_that("[,MsBackendDataFrame works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendDataFrame works", { + be <- MsBackendDataFrame() + df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("selectSpectraVariables,MsBackendDataFrame works", { be <- MsBackendDataFrame() res <- selectSpectraVariables(be, c("dataStorage", "msLevel")) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 119e2c56..c4df695f 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -564,6 +564,22 @@ test_that("[,MsBackendMemory works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendMemory works", { + be <- new("MsBackendMemory") + df <- data.frame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("split,MsBackendMemory works", { be <- new("MsBackendMemory") be <- backendInitialize(be, test_df)