Skip to content

Commit

Permalink
Merge pull request #341 from rformassspectrometry/jomain
Browse files Browse the repository at this point in the history
Support chunk-wise processing in containsMz
  • Loading branch information
jorainer authored Nov 21, 2024
2 parents bfffb91 + d52f577 commit 0deee7b
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 663 deletions.
4 changes: 2 additions & 2 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ root = true
charset = utf-8
end_of_line = lf
trim_trailing_whitespace = true
insert_final_newline = false
insert_final_newline = true

[*.R]
indent_style = space
Expand All @@ -22,4 +22,4 @@ indent_style = tab

[*.yml]
indent_style = space
indent_size = 2
indent_size = 2
4 changes: 2 additions & 2 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
fail-fast: false
matrix:
config:
- { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: macOS-latest, r: '4.4', bioc: '3.20'}
- { os: windows-latest, r: '4.4', bioc: '3.20'}
env:
Expand Down Expand Up @@ -239,7 +239,7 @@ jobs:
- name: Test coverage
if: github.ref == 'refs/heads/main' && env.run_covr == 'true' && runner.os == 'Linux'
run: |
covr::codecov()
covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}")
shell: Rscript {0}

- name: Install package
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Spectra
Title: Spectra Infrastructure for Mass Spectrometry Data
Version: 1.15.13
Version: 1.17.1
Description: The Spectra package defines an efficient infrastructure
for storing and handling mass spectrometry spectra and functionality to
subset, process, visualize and compare spectra data. It provides different
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Spectra 1.17

## Changes in 1.17.1

- Refactor `containsMz()` to support chunk-wise processing.

# Spectra 1.15

## Changes in 1.15.13
Expand Down
46 changes: 11 additions & 35 deletions R/Spectra-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,13 @@ NULL
#' @description
#'
#' This function applies the processing queue and an arbitrary function to
#' the peaks matrix of each spectrum of the `Spectra` object `object`.
#' the peaks matrix of each spectrum of the `Spectra` object `object`. It has
#' build-in parallel and/or chunk-wise processing enabled through parameter
#' `f`, that allows to define how the `Spectra` (or rather its backend) needs
#' to be splitted. The default `f = .parallel_processing_factor(object)` splits
#' the backend by chunk (if a finite chunk size is defined for the `Spectra`)
#' or by it's optimal parallel processing factor. See the description of
#' the `.parallel_processing_factor()` function below for information.
#'
#' @param object `Spectra` object.
#'
Expand All @@ -78,7 +84,8 @@ NULL
#'
#' @param f `factor` or `vector` that can be coerced to one defining how the
#' data should be split for parallel processing. Set to `NULL` or
#' `factor()` to disable splitting and parallel processing.
#' `factor()` to disable splitting and parallel processing. See function
#' description above for details and information.
#'
#' @param columns `character` defining the columns that should be returned.
#' This will be passed to the backend's `peaksData` function.
Expand Down Expand Up @@ -571,39 +578,8 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage,

#' @description
#'
#' Internal function to check if any (or all) of the provided `mz` values are
#' in the spectras' m/z.
#'
#' @param x `Spectra` object
#'
#' @param mz `numeric` of m/z value(s) to check in each spectrum of `x`.
#'
#' @param tolarance `numeric(1)` with the tolerance.
#'
#' @param ppm `numeric(1)` with the ppm.
#'
#' @param condFun `function` such as `any` or `all`.
#'
#' @param parallel `BiocParallel` parameter object.
#'
#' @return `logical` same length than `x`.
#'
#' @author Johannes Rainer
#'
#' @importFrom MsCoreUtils common
#'
#' @noRd
.has_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, condFun = any,
parallel = SerialParam()) {
mzs <- mz(x, BPPARAM = parallel)
vapply(mzs, FUN = function(z)
condFun(common(mz, z, tolerance = tolerance, ppm = ppm)), logical(1))
}

#' @description
#'
#' Same as `.has_mz` only that a different `mz` is used for each spectrum in
#' `x`. Length of `mz` is thus expected to be equal to length of `x`.
#' Check for presence of an m/z value in each spectrum. Each spectrum gets
#' its own m/z.
#'
#' @param mz `numeric` **same length as `x`**.
#'
Expand Down
41 changes: 18 additions & 23 deletions R/Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -3278,23 +3278,18 @@ setMethod("containsMz", "Spectra", function(object, mz = numeric(),
tolerance = 0,
ppm = 20, which = c("any", "all"),
BPPARAM = bpparam()) {
cond_fun <- match.fun(match.arg(which))
if (all(is.na(mz)))
return(rep(NA, length(object)))
mz <- unique(sort(mz))
BPPARAM <- backendBpparam(object@backend, BPPARAM)
## TODO: fix to use .peaksapply instead.
if (is(BPPARAM, "SerialParam"))
.has_mz(object, mz, tolerance = tolerance, ppm = ppm,
condFun = cond_fun, parallel = BPPARAM)
else {
sp <- SerialParam()
f <- as.factor(dataStorage(object))
res <- .lapply(object, FUN = .has_mz, mz = mz, tolerance = tolerance,
condFun = cond_fun, parallel = sp, f = f,
BPPARAM = BPPARAM)
unsplit(res, f = f)
}
if (length(object)) {
cond_fun <- match.fun(match.arg(which))
if (all(is.na(mz)))
return(rep(NA, length(object)))
mz <- unique(sort(mz))
BPPARAM <- backendBpparam(object@backend, BPPARAM)
unlist(.peaksapply(
object, FUN = .peaks_contain_mz, mz = mz, tolerance = tolerance,
ppm = ppm, condFun = cond_fun, BPPARAM = BPPARAM),
use.names = FALSE
)
} else logical()
})

#' @rdname addProcessing
Expand Down Expand Up @@ -3327,12 +3322,12 @@ setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0,
#' @export
setMethod("entropy", "Spectra", function(object, normalized = TRUE) {
if (length(object)) {
if (normalized) entropy_fun <- nentropy
else entropy_fun <- entropy
unlist(.peaksapply(
object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])),
use.names = FALSE
)
if (normalized) entropy_fun <- nentropy
else entropy_fun <- entropy
unlist(.peaksapply(
object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])),
use.names = FALSE
)
} else numeric()
})
#' @rdname addProcessing
Expand Down
10 changes: 10 additions & 0 deletions R/peaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -737,3 +737,13 @@ joinPeaksNone <- function(x, y, ...) {
if (keep) x[sel, , drop = FALSE]
else x[!sel, , drop = FALSE]
}

#' Check for presence of peaks defined by their m/z value. Note that this
#' function does **not** return a peak matrix, but only a logical of length 1!
#'
#' @return `logical(1)`
#' @noRd
.peaks_contain_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20,
condFun = any, ...) {
condFun(common(mz, x[, "mz"], tolerance = tolerance, ppm = ppm))
}
Loading

0 comments on commit 0deee7b

Please sign in to comment.