From 8baf07c7acbf44a943f5f2b7dd42d0ff90bfeebb Mon Sep 17 00:00:00 2001 From: J Wokaty Date: Tue, 29 Oct 2024 10:49:53 -0400 Subject: [PATCH 1/5] bump x.y.z version to even y prior to creation of RELEASE_3_20 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7727f9b8..45f21460 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.13 +Version: 1.16.0 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different From 803c9d2fa3858749404d6be71e97a844ed2a7959 Mon Sep 17 00:00:00 2001 From: J Wokaty Date: Tue, 29 Oct 2024 10:49:53 -0400 Subject: [PATCH 2/5] bump x.y.z version to odd y following creation of RELEASE_3_20 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 45f21460..df891150 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.16.0 +Version: 1.17.0 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different From 6eeeb79f2e9a1b72e790adfdaa424f02d1ee9d82 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Wed, 20 Nov 2024 09:16:32 +0100 Subject: [PATCH 3/5] refactor: support chunk-wise processing in containsMz - Add support for chunk-wise processing to `containsMz()`. Related to issue #340. --- .editorconfig | 4 +-- .github/workflows/check-bioc.yml | 2 +- DESCRIPTION | 2 +- NEWS.md | 6 ++++ R/Spectra-functions.R | 46 ++++++------------------- R/Spectra.R | 41 ++++++++++------------ R/peaks-functions.R | 10 ++++++ tests/testthat/test_Spectra-functions.R | 26 -------------- tests/testthat/test_peaks-functions.R | 13 +++++++ 9 files changed, 62 insertions(+), 88 deletions(-) diff --git a/.editorconfig b/.editorconfig index 71842659..0cebcc70 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,7 +6,7 @@ root = true charset = utf-8 end_of_line = lf trim_trailing_whitespace = true -insert_final_newline = false +insert_final_newline = true [*.R] indent_style = space @@ -22,4 +22,4 @@ indent_style = tab [*.yml] indent_style = space -indent_size = 2 +indent_size = 2 \ No newline at end of file diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 85b29ffa..48f4ea23 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -53,7 +53,7 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - { os: macOS-latest, r: '4.4', bioc: '3.20'} - { os: windows-latest, r: '4.4', bioc: '3.20'} env: diff --git a/DESCRIPTION b/DESCRIPTION index df891150..2057828e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.0 +Version: 1.17.1 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NEWS.md b/NEWS.md index b3d0404d..f498b62d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# Spectra 1.17 + +## Changes in 1.17.1 + +- Refactor `containsMz()` to support chunk-wise processing. + # Spectra 1.15 ## Changes in 1.15.13 diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index 99014163..93d9f2db 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -63,7 +63,13 @@ NULL #' @description #' #' This function applies the processing queue and an arbitrary function to -#' the peaks matrix of each spectrum of the `Spectra` object `object`. +#' the peaks matrix of each spectrum of the `Spectra` object `object`. It has +#' build-in parallel and/or chunk-wise processing enabled through parameter +#' `f`, that allows to define how the `Spectra` (or rather its backend) needs +#' to be splitted. The default `f = .parallel_processing_factor(object)` splits +#' the backend by chunk (if a finite chunk size is defined for the `Spectra`) +#' or by it's optimal parallel processing factor. See the description of +#' the `.parallel_processing_factor()` function below for information. #' #' @param object `Spectra` object. #' @@ -78,7 +84,8 @@ NULL #' #' @param f `factor` or `vector` that can be coerced to one defining how the #' data should be split for parallel processing. Set to `NULL` or -#' `factor()` to disable splitting and parallel processing. +#' `factor()` to disable splitting and parallel processing. See function +#' description above for details and information. #' #' @param columns `character` defining the columns that should be returned. #' This will be passed to the backend's `peaksData` function. @@ -571,39 +578,8 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage, #' @description #' -#' Internal function to check if any (or all) of the provided `mz` values are -#' in the spectras' m/z. -#' -#' @param x `Spectra` object -#' -#' @param mz `numeric` of m/z value(s) to check in each spectrum of `x`. -#' -#' @param tolarance `numeric(1)` with the tolerance. -#' -#' @param ppm `numeric(1)` with the ppm. -#' -#' @param condFun `function` such as `any` or `all`. -#' -#' @param parallel `BiocParallel` parameter object. -#' -#' @return `logical` same length than `x`. -#' -#' @author Johannes Rainer -#' -#' @importFrom MsCoreUtils common -#' -#' @noRd -.has_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, condFun = any, - parallel = SerialParam()) { - mzs <- mz(x, BPPARAM = parallel) - vapply(mzs, FUN = function(z) - condFun(common(mz, z, tolerance = tolerance, ppm = ppm)), logical(1)) -} - -#' @description -#' -#' Same as `.has_mz` only that a different `mz` is used for each spectrum in -#' `x`. Length of `mz` is thus expected to be equal to length of `x`. +#' Check for presence of an m/z value in each spectrum. Each spectrum gets +#' its own m/z. #' #' @param mz `numeric` **same length as `x`**. #' diff --git a/R/Spectra.R b/R/Spectra.R index 14ebbf2c..73520422 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -3278,23 +3278,18 @@ setMethod("containsMz", "Spectra", function(object, mz = numeric(), tolerance = 0, ppm = 20, which = c("any", "all"), BPPARAM = bpparam()) { - cond_fun <- match.fun(match.arg(which)) - if (all(is.na(mz))) - return(rep(NA, length(object))) - mz <- unique(sort(mz)) - BPPARAM <- backendBpparam(object@backend, BPPARAM) - ## TODO: fix to use .peaksapply instead. - if (is(BPPARAM, "SerialParam")) - .has_mz(object, mz, tolerance = tolerance, ppm = ppm, - condFun = cond_fun, parallel = BPPARAM) - else { - sp <- SerialParam() - f <- as.factor(dataStorage(object)) - res <- .lapply(object, FUN = .has_mz, mz = mz, tolerance = tolerance, - condFun = cond_fun, parallel = sp, f = f, - BPPARAM = BPPARAM) - unsplit(res, f = f) - } + if (length(object)) { + cond_fun <- match.fun(match.arg(which)) + if (all(is.na(mz))) + return(rep(NA, length(object))) + mz <- unique(sort(mz)) + BPPARAM <- backendBpparam(object@backend, BPPARAM) + unlist(.peaksapply( + object, FUN = .peaks_contain_mz, mz = mz, tolerance = tolerance, + ppm = ppm, condFun = cond_fun, BPPARAM = BPPARAM), + use.names = FALSE + ) + } else logical() }) #' @rdname addProcessing @@ -3327,12 +3322,12 @@ setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0, #' @export setMethod("entropy", "Spectra", function(object, normalized = TRUE) { if (length(object)) { - if (normalized) entropy_fun <- nentropy - else entropy_fun <- entropy - unlist(.peaksapply( - object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), - use.names = FALSE - ) + if (normalized) entropy_fun <- nentropy + else entropy_fun <- entropy + unlist(.peaksapply( + object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), + use.names = FALSE + ) } else numeric() }) #' @rdname addProcessing diff --git a/R/peaks-functions.R b/R/peaks-functions.R index f34adde9..dc19e353 100644 --- a/R/peaks-functions.R +++ b/R/peaks-functions.R @@ -737,3 +737,13 @@ joinPeaksNone <- function(x, y, ...) { if (keep) x[sel, , drop = FALSE] else x[!sel, , drop = FALSE] } + +#' Check for presence of peaks defined by their m/z value. Note that this +#' function does **not** return a peak matrix, but only a logical of length 1! +#' +#' @return `logical(1)` +#' @noRd +.peaks_contain_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, + condFun = any, ...) { + condFun(common(mz, x[, "mz"], tolerance = tolerance, ppm = ppm)) +} diff --git a/tests/testthat/test_Spectra-functions.R b/tests/testthat/test_Spectra-functions.R index 8df50d71..2dbcf372 100644 --- a/tests/testthat/test_Spectra-functions.R +++ b/tests/testthat/test_Spectra-functions.R @@ -352,32 +352,6 @@ test_that("dropNaSpectraVariables works", { function(z) !any(is.na(z))))) }) -test_that(".has_mz works", { - sps <- Spectra(sciex_mzr)[1:10] - sps <- setBackend(sps, MsBackendDataFrame()) - mzs <- mz(sps) - x <- c(mzs[[2]][5], mzs[[3]][8]) - - res <- .has_mz(sps, mz = x, ppm = 0) - expect_true(length(res) == length(sps)) - expect_true(is.logical(res)) - - spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) - spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) - spd$intensity <- list(c(10, 20, 30, 40), c(11, 21, 31), c(12, 22, 32)) - sps <- Spectra(spd) - - res <- .has_mz(sps, mz = c(14, 34)) - expect_equal(res, c(TRUE, TRUE, FALSE)) - res <- .has_mz(sps, mz = c(14, 34), tolerance = 0.15) - expect_equal(res, c(TRUE, TRUE, TRUE)) - - res <- .has_mz(sps, mz = c(14, 34), condFun = all) - expect_true(all(!res)) - res <- .has_mz(sps, mz = c(14, 34), condFun = all, tolerance = 0.15) - expect_equal(res, c(FALSE, TRUE, TRUE)) -}) - test_that(".has_mz_each works", { spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) diff --git a/tests/testthat/test_peaks-functions.R b/tests/testthat/test_peaks-functions.R index f28452dd..ef0978c8 100644 --- a/tests/testthat/test_peaks-functions.R +++ b/tests/testthat/test_peaks-functions.R @@ -722,3 +722,16 @@ test_that(".peaks_filter_ranges works", { ranges = ranges, keep = FALSE) expect_equal(res, x) }) + +test_that(".peaks_contain_mz works", { + pks <- cbind(mz = c(1.3, 1.5, 32.1, 45.6), c(1, 2, 3, 4)) + + expect_false(.peaks_contain_mz(pks)) + expect_true(.peaks_contain_mz(pks, 1.5)) + expect_false(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any, + tolerance = 0.1)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all, + tolerance = 0.1)) +}) From 6a1367c38ae3804da4aad4b9bc017ae341dc3f30 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Wed, 20 Nov 2024 09:57:43 +0100 Subject: [PATCH 4/5] tests: update vdiffr svg --- .../_snaps/plotMzDelta/plotmzdelta-1000.svg | 823 ++++++------------ 1 file changed, 249 insertions(+), 574 deletions(-) diff --git a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg index e16506da..e041fc61 100644 --- a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg +++ b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg @@ -1,579 +1,254 @@ - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + +Histogram of Mass Delta Distributions +M/Z delta +Frequency + + + + + +50 +100 +150 +200 + + + + + + + +0 +500 +1000 +1500 +2000 +2500 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +peg +A +R +N +D +C +E +Q/K +G +H +I/L +M +F +P +S +T +W +Y +V From d52f57756a92a24f1352986a0461e0e6e33da5e1 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Wed, 20 Nov 2024 11:38:03 +0100 Subject: [PATCH 5/5] tests: add CODECOV token --- .github/workflows/check-bioc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 48f4ea23..5c1ebc66 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -239,7 +239,7 @@ jobs: - name: Test coverage if: github.ref == 'refs/heads/main' && env.run_covr == 'true' && runner.os == 'Linux' run: | - covr::codecov() + covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}") shell: Rscript {0} - name: Install package