diff --git a/.BBSoptions b/.BBSoptions new file mode 100644 index 000000000..3af83ddb2 --- /dev/null +++ b/.BBSoptions @@ -0,0 +1 @@ +RunLongTests: TRUE diff --git a/.Rbuildignore b/.Rbuildignore index 76b259c34..8d034fbb5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,11 @@ -local_data/* +.editorconfig +local_data .git .travis.yml .org +^.*\.Rproj$ +^\.Rproj\.user$ +README.md +Rplots.pdf +.DS_Store +^\.github$ diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..b52f2c44b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = false + +[*.R] +indent_style = space +indent_size = 4 +max_line_length = 80 + +[Makefile] +indent_style = tab + +[.travis.yml] +indent_style = space +indent_size = 2 diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 000000000..2d19fc766 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml new file mode 100644 index 000000000..147918fab --- /dev/null +++ b/.github/workflows/check-bioc.yml @@ -0,0 +1,298 @@ +## Read more about GitHub actions the features of this GitHub Actions workflow +## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action +## +## For more details, check the biocthis developer notes vignette at +## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html +## +## You can add this workflow to other packages using: +## > biocthis::use_bioc_github_action() +## +## Using GitHub Actions exposes you to many details about how R packages are +## compiled and installed in several operating system.s +### If you need help, please follow the steps listed at +## https://github.com/r-lib/actions#where-to-find-help +## +## If you found an issue specific to biocthis's GHA workflow, please report it +## with the information that will make it easier for others to help you. +## Thank you! + +## Acronyms: +## * GHA: GitHub Action +## * OS: operating system + +on: + push: + pull_request: + +name: R-CMD-check-bioc + +## These environment variables control whether to run GHA code later on that is +## specific to testthat, covr, and pkgdown. +## +## If you need to clear the cache of packages, update the number inside +## cache-version as discussed at https://github.com/r-lib/actions/issues/86. +## Note that you can always run a GHA test without the cache by using the word +## "/nocache" in the commit message. +env: + has_testthat: 'true' + run_covr: 'true' + run_pkgdown: 'true' + has_RUnit: 'false' + has_BiocCheck: 'false' + cache-version: 'cache-v1' + +jobs: + build-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + ## Environment variables unique to this job. + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: 'devel', bioc: '3.17', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: macOS-latest, r: 'devel', bioc: '3.17'} + - { os: windows-latest, r: 'devel', bioc: '3.17'} + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + NOT_CRAN: true + TZ: UTC + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + + ## Set the R library to the directory matching the + ## R packages cache step further below when running on Docker (Linux). + - name: Set R Library home on Linux + if: runner.os == 'Linux' + run: | + mkdir /__w/_temp/Library + echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile + + ## Most of these steps are the same as the ones in + ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml + ## If they update their steps, we will also need to update ours. + - name: Checkout Repository + uses: actions/checkout@v2 + + ## R is already included in the Bioconductor docker images + - name: Setup R from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + ## pandoc is already included in the Bioconductor docker images + - name: Setup pandoc from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel- + + - name: Cache R packages on Linux + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " + uses: actions/cache@v2 + with: + path: /home/runner/work/_temp/Library + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel- + + - name: Install Linux system dependencies + if: runner.os == 'Linux' + run: | + sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') + echo $sysreqs + sudo -s eval "$sysreqs" + + - name: Install macOS system dependencies + if: matrix.config.os == 'macOS-latest' + run: | + ## Enable installing XML from source if needed + brew install libxml2 + echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + ## For textshaping, required by ragg, and required by pkgdown + brew install harfbuzz fribidi + + ## For installing usethis's dependency gert + brew install libgit2 + + ## required for ncdf4 + ## brew install netcdf ## Does not work as it is compiled with gcc + ## Use pre-compiled libraries from https://mac.r-project.org/libs-4/ + curl -O https://mac.r-project.org/libs-4/netcdf-4.7.4-darwin.17-x86_64.tar.gz + tar fvxzm netcdf-4.7.4-darwin.17-x86_64.tar.gz -C / + rm netcdf-4.7.4-darwin.17-x86_64.tar.gz + curl -O https://mac.r-project.org/libs-4/hdf5-1.12.0-darwin.17-x86_64.tar.gz + tar fvxzm hdf5-1.12.0-darwin.17-x86_64.tar.gz -C / + rm hdf5-1.12.0-darwin.17-x86_64.tar.gz + curl -O https://mac.r-project.org/libs-4/szip-2.1.1-darwin.17-x86_64.tar.gz + tar fvxzm szip-2.1.1-darwin.17-x86_64.tar.gz -C / + rm szip-2.1.1-darwin.17-x86_64.tar.gz + + - name: Install Windows system dependencies + if: runner.os == 'Windows' + run: | + ## Edit below if you have any Windows system dependencies + shell: Rscript {0} + + - name: Install BiocManager + run: | + message(paste('****', Sys.time(), 'installing BiocManager ****')) + remotes::install_cran("BiocManager") + shell: Rscript {0} + + - name: Set BiocVersion + run: | + BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE) + shell: Rscript {0} + + - name: Install dependencies pass 1 + run: | + ## Try installing the package dependencies in steps. First the local + ## dependencies, then any remaining dependencies to avoid the + ## issues described at + ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html + ## https://github.com/r-lib/remotes/issues/296 + ## Ideally, all dependencies should get installed in the first pass. + + ## Pass #1 at installing dependencies + message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = + BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) + + BiocManager::install(c("rmarkdown", "BiocStyle")) + continue-on-error: true + shell: Rscript {0} + + - name: Install dependencies pass 2 + run: | + ## Pass #2 at installing dependencies + message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) + + ## Manually install packages that seem to be skipped. + message(paste('****', Sys.time(), 'force installation of selected packages ****')) + BiocManager::install(c("faahKO")) + BiocManager::install("ProtGenerics") + BiocManager::install("MSnbase") + BiocManager::install("mzR") + BiocManager::install("Spectra") + BiocManager::install("MsBackendMgf") + BiocManager::install("magick") + + ## For running the checks + message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) + remotes::install_cran("rcmdcheck") + BiocManager::install(c("BiocCheck", "DBI")) + shell: Rscript {0} + + - name: Install BiocGenerics + if: env.has_RUnit == 'true' + run: | + ## Install BiocGenerics + BiocManager::install("BiocGenerics") + shell: Rscript {0} + + - name: Install covr + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Install pkgdown + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + remotes::install_github("r-lib/pkgdown") + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - name: Run CMD check + env: + _R_CHECK_CRAN_INCOMING_: false + run: | + rcmdcheck::rcmdcheck( + args = c("--no-build-vignettes", "--no-manual", "--timings"), + build_args = c("--no-manual", "--no-resave-data"), + error_on = "warning", + check_dir = "check" + ) + shell: Rscript {0} + + ## Might need an to add this to the if: && runner.os == 'Linux' + - name: Reveal testthat details + if: env.has_testthat == 'true' + run: find . -name testthat.Rout -exec cat '{}' ';' + + - name: Run RUnit tests + if: env.has_RUnit == 'true' + run: | + BiocGenerics:::testPackage() + shell: Rscript {0} + + - name: Run BiocCheck + if: env.has_BiocCheck == 'true' + run: | + BiocCheck::BiocCheck( + dir('check', 'tar.gz$', full.names = TRUE), + `quit-with-status` = TRUE, + `no-check-R-ver` = TRUE, + `no-check-bioc-help` = TRUE + ) + shell: Rscript {0} + + - name: Test coverage + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + covr::codecov() + shell: Rscript {0} + + - name: Install package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: R CMD INSTALL . + + - name: Deploy package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + git config --global user.email "actions@github.com" + git config --global user.name "GitHub Actions" + git config --global --add safe.directory /__w/xcms/xcms + Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)" + shell: bash {0} + ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) + ## at least one locally before this will work. This creates the gh-pages + ## branch (erasing anything you haven't version controlled!) and + ## makes the git history recognizable by pkgdown. + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-biocversion-devel-r-devel-results + path: check diff --git a/.gitignore b/.gitignore index 80e409b0e..313cd55ff 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,10 @@ src/pwiz/ readme.org local_data/ +.Rproj.user + +.Rhistory +xcms.Rproj + +Rplots.pdf +.DS_Store \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index de0d790c0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,72 +0,0 @@ -# language: r -# r: bioc-devel -# sudo: required -# r_packages: -# - knitr -# - XML -# - plyr -# - doParallel -# - foreach -# - iterators -# bioc_packages: -# - xcms -# - Rgraphviz -# apt_packages: -# - texlive-latex-extra -# - texlive-fonts-extra -# - texlive-latex-recommended -# - libnetcdf-dev -# - netcdf-bin -# - libhdf5-dev - -# # Set CXX1X for R-devel, as R-devel does not detect CXX1X support for gcc 4.6.3, -# # Thanks to jimhester pointing this out! -# before_install: -# - if [[ "$TRAVIS_R_VERSION_STRING" = 'bioc-devel' ]]; then mkdir ~/.R && echo 'CXX1X=g++ -std=c++0x -g -O2 -fPIC' > ~/.R/Makevars; fi - - -language: r -r: bioc-devel -cache: packages -sudo: false # use container based build system -warnings_are_errors: true -dist: trusty - -# Set CXX1X for R-devel, as R-devel does not detect CXX1X support for gcc 4.6.3, -# this was causing mzR installation to fail -# see https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=17189 -# workaround stolen from https://github.com/hadley/devtools/blob/1ce84b04568ff7846c3da754f28e7e22a23c8737/.travis.yml#L23-L26 -before_install: - - if [[ "$TRAVIS_R_VERSION_STRING" = 'bioc-devel' ]]; then mkdir ~/.R && echo 'CXX1X=g++ -std=c++0x -g -O2 -fPIC' > ~/.R/Makevars; fi - -addons: - apt: - packages: - - libnetcdf-dev - - netcdf-bin # libnetcdf-dev doesn't contain nc-config in ubuntu 12.04 (in 16.04 it is part of libnetcdf-dev) - - libhdf5-dev - - texlive-latex-recommended - - texlive-fonts-extra - -r_packages: - - covr - -# before_script: -# - echo "BiocParallel::register(BiocParallel::SerialParam())" > ~/.Rprofile - -script: - - | - R CMD build . - travis_wait 40 R CMD check --no-build-vignettes --no-vignettes xcms*tar.gz - -after_failure: - find *Rcheck -name '*.fail' -print -exec cat '{}' \; - -after_success: - - travis_wait 20 Rscript -e 'covr::codecov()' - -# # print timings (of examples) and sysinfo -# after_script: -# - dump_logs_by_extension "timings" -# - dump_sysinfo - diff --git a/DESCRIPTION b/DESCRIPTION index e1d0fc382..4efc1e747 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,61 +1,91 @@ Package: xcms -Version: 3.3.3 -Date: 2018-09-18 -Title: LC/MS and GC/MS Data Analysis -Author: Colin A. Smith , - Ralf Tautenhahn , - Steffen Neumann , - Paul Benton , - Christopher Conley , - Johannes Rainer -Maintainer: Steffen Neumann +Version: 3.21.3 +Title: LC-MS and GC-MS Data Analysis +Description: Framework for processing and visualization of chromatographically + separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, + mzXML, mzData and mzML files. Preprocesses data for high-throughput, untargeted + analyte profiling. +Authors@R: c( + person(given = "Colin A.", family = "Smith", + email = "csmith@scripps.edu", + role = "ctb"), + person(given = "Ralf", family = "Tautenhahn", + email = "rtautenh@gmail.com", + role = "ctb"), + person(given = "Steffen", family = "Neumann", + email = "sneumann@ipb-halle.de", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0002-7899-7192")), + person(given = "Paul", family = "Benton", + email = "hpbenton@scripps.edu", + role = "ctb"), + person(given = "Christopher", family = "Conley", + email = "cjconley@ucdavis.edu", + role = "ctb"), + person(given = "Johannes", family = "Rainer", + email = "johannes.rainer@eurac.edu", + role = "ctb", + comment = c(ORCID = "0000-0002-6977-7147")), + person(given = "Michael", family = "Witting", + email = "michael.witting@helmholtz-muenchen.de", + role = "ctb"), + person(given = "William", family = "Kumler", + email = "wkumler@uw.edu", role = "ctb", + comment = c(ORCID = "0000-0002-5022-8009")) + ) Depends: - R (>= 2.14.0), - methods, - Biobase, + R (>= 4.0.0), BiocParallel (>= 1.8.0), - MSnbase (>= 2.5.10) + MSnbase (>= 2.21.4) Imports: - mzR (>= 2.13.1), + mzR (>= 2.25.3), + methods, + Biobase, BiocGenerics, - ProtGenerics, + ProtGenerics (>= 1.25.1), lattice, RColorBrewer, plyr, RANN, - multtest, - MassSpecWavelet (>= 1.5.2), + MassSpecWavelet (>= 1.61.3), S4Vectors, - robustbase + robustbase, + IRanges, + SummarizedExperiment, + MsCoreUtils (>= 1.11.3), + MsFeatures Suggests: BiocStyle, caTools, knitr (>= 1.1.0), faahKO, - msdata, + msdata (>= 0.25.1), ncdf4, - rgl, - microbenchmark, testthat, pander, magrittr, - MALDIquant + rmarkdown, + multtest, + MALDIquant, + pheatmap, + Spectra (>= 1.1.17), + MsBackendMgf, + progress, + signal Enhances: Rgraphviz, - Rmpi, + rgl, XML -Description: Framework for processing and visualization of chromatographically - separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, - mzXML, mzData and mzML files. Preprocesses data for high-throughput, untargeted - analyte profiling. License: GPL (>= 2) + file LICENSE -URL: http://metlin.scripps.edu/download/ and https://github.com/sneumann/xcms -VignetteBuilder: knitr +URL: https://github.com/sneumann/xcms BugReports: https://github.com/sneumann/xcms/issues/new -biocViews: MassSpectrometry, Metabolomics -RoxygenNote: 6.1.0 -Collate: +VignetteBuilder: knitr +biocViews: ImmunoOncology, MassSpectrometry, Metabolomics +RoxygenNote: 7.2.3 +Collate: 'AllGenerics.R' + 'functions-XChromatograms.R' + 'functions-XChromatogram.R' 'DataClasses.R' 'Deprecated.R' 'MPI.R' @@ -69,6 +99,7 @@ Collate: 'functions-Params.R' 'do_groupChromPeaks-functions.R' 'fastMatch.R' + 'functions-Chromatogram.R' 'functions-utils.R' 'functions-IO.R' 'functions-OnDiskMSnExp.R' @@ -80,15 +111,20 @@ Collate: 'functions-xcmsFragments.R' 'functions-xcmsRaw.R' 'functions-xcmsSet.R' + 'functions-xcmsSwath.R' 'init.R' 'matchpeaks.R' 'methods-Chromatogram.R' 'methods-IO.R' + 'methods-MChromatograms.R' 'methods-MsFeatureData.R' 'methods-OnDiskMSnExp.R' 'methods-Params.R' 'methods-ProcessHistory.R' 'methods-XCMSnExp.R' + 'methods-XChromatogram.R' + 'methods-XChromatograms.R' + 'methods-group-features.R' 'methods-xcmsEIC.R' 'methods-xcmsFileSource.R' 'methods-xcmsFragments.R' @@ -97,7 +133,6 @@ Collate: 'methods-xcmsSet.R' 'models.R' 'mzClust.R' - 'netCDF.R' 'plotQC.R' 'ramp.R' 'specDist.R' @@ -105,4 +140,5 @@ Collate: 'writemzdata.R' 'writemztab.R' 'xcmsSource.R' + 'xdata.R' 'zzz.R' diff --git a/LICENSE b/LICENSE index 75bfd08bb..d3e1f7e11 100755 --- a/LICENSE +++ b/LICENSE @@ -4,42 +4,6 @@ All accompanying written materials copyright (c) 2005 Colin A. Smith with the exception that Massifquant-specific documentation copyright (c) 2013 Brigham Young University. - -NetCDF library governed by the following license: - -Copyright 1993-2004 University Corporation for Atmospheric Research/Unidata - -Portions of this software were developed by the Unidata Program at -the University Corporation for Atmospheric Research. - -Access and use of this software shall impose the following obligations -and understandings on the user. The user is granted the right, -without any fee or cost, to use, copy, modify, alter, enhance and -distribute this software, and any derivative works thereof, and its -supporting documentation for any purpose whatsoever, provided that -this entire notice appears in all copies of the software, derivative -works and supporting documentation. Further, UCAR requests that the -user credit UCAR/Unidata in any publications that result from the -use of this software or in any product that includes this software. -The names UCAR and/or Unidata, however, may not be used in any -advertising or publicity to endorse or promote any products or -commercial entity unless specific written permission is obtained -from UCAR/Unidata. The user also understands that UCAR/Unidata is -not obligated to provide the user with any support, consulting, -training or assistance of any kind with regard to the use, operation -and performance of this software nor to provide the user with any -updates, revisions, new versions or "bug fixes." - -THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER -RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE. - - All other materials are governed by the GNU GPL version 2 or greater: GNU GENERAL PUBLIC LICENSE diff --git a/NAMESPACE b/NAMESPACE index 9a19ebbd3..6838bfcaf 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,16 +2,23 @@ useDynLib(xcms) importFrom("utils", "capture.output") import("methods") -importMethodsFrom("ProtGenerics", "peaks", "chromatogram", "writeMSData") -importFrom("BiocGenerics", "updateObject", "fileName") +importMethodsFrom("ProtGenerics", "peaks", "chromatogram", "writeMSData", + "polarity<-", "centroided", "isCentroided", "peaks<-", + "isolationWindowTargetMz", "quantify", "bin") +importClassesFrom("ProtGenerics", "Param") +importFrom("BiocGenerics", "updateObject", "fileName", "subset", + "dirname", "dirname<-") ## import("Biobase") importFrom("Biobase", "AnnotatedDataFrame") -importClassesFrom("Biobase", "AnnotatedDataFrame", "Versioned") -importMethodsFrom("Biobase", "classVersion", "classVersion<-", "phenoData", - "phenoData<-", "pData", "rowMedians") +importClassesFrom("Biobase", "AnnotatedDataFrame") +importMethodsFrom("Biobase", "phenoData", + "phenoData<-", "pData", "rowMedians", "varLabels", + "fvarLabels") +importFrom("IRanges", "CharacterList", "NumericList") +importClassesFrom("IRanges", "CharacterList", "NumericList") -importFrom("graphics", "plot", "image", "boxplot", "matplot", "rect", "axis", - "grid", "mtext") +importFrom("graphics", "image", "boxplot", "matplot", "rect", "axis", + "grid", "mtext", "polygon", "box", "plot.xy") importFrom("mzR", "peaks", "close", "openMSfile", "header") importFrom("lattice", "levelplot", "panel.rect", "panel.levelplot", "level.colors", "do.breaks") @@ -19,11 +26,14 @@ importFrom("plyr", "rbind.fill") importFrom("robustbase", "lmrob", "lmrob.control") import("RColorBrewer") import("BiocParallel") +## importMethodsFrom("stats4", "plot") ## import("S4Vectors") importClassesFrom("S4Vectors", "Rle", "DataFrame") -importFrom("S4Vectors", "split", "Rle", "DataFrame") -importMethodsFrom("S4Vectors", "as.matrix") +importFrom("S4Vectors", "split", "Rle", "DataFrame", "SimpleList") +importMethodsFrom("S4Vectors", "as.matrix", "mcols", "mcols<-", "extractROWS") +importFrom("SummarizedExperiment", "SummarizedExperiment") +importFrom("MsCoreUtils", "rbindFill", "closest", "sumi", "between") ## Additional imports proposed by R CMD check: importFrom("graphics", "abline", "barplot", "close.screen", "hist", @@ -32,33 +42,37 @@ importFrom("graphics", "abline", "barplot", "close.screen", "hist", "strwidth", "text", "title") importFrom("grDevices", "col2rgb", "colorRampPalette", "dev.cur", "dev.list", "dev.off", "dev.set", "palette", "pdf", "png", - "rainbow", "rgb", "terrain.colors") + "rainbow", "rgb", "terrain.colors", "n2mfrow", "dev.flush", + "dev.hold", "xy.coords") importFrom("stats", "aov", "approx", "convolve", "cor", "deriv3", "dist", "fft", "fitted", "lm", "loess", "lsfit", "median", "na.omit", "nextn", "nls", "predict", "pt", "quantile", "runmed", "sd", "stepfun", "weighted.mean", "density", "approxfun", - "rnorm") + "rnorm", "runif") importFrom("utils", "flush.console", "head", "object.size", "packageVersion", "read.csv", "tail", "write.csv", "write.table") ## New imports from packages moved from Suggests to Imports: -importFrom("multtest", "mt.teststat") +## importFrom("multtest", "mt.teststat") importFrom("RANN", "nn2") importFrom("MassSpecWavelet", "peakDetectionCWT", "tuneInPeakInfo") ## MSnbase: importClassesFrom("MSnbase", "MSnExp", "pSet", "OnDiskMSnExp", "Chromatogram", - "Chromatograms") + "MChromatograms", "MSpectra") importMethodsFrom("MSnbase", "intensity", "mz", "rtime", "fileNames", "fromFile", "filterFile", "filterMsLevel", "msLevel", "scanIndex", - "spectra", "impute", "isCentroided", "polarity", "[", "bin", + "spectra", "impute", "isCentroided", "polarity", "[", "clean", "featureNames", "filterAcquisitionNum", "filterMz", "filterRt", "normalize", "pickPeaks", "removePeaks", "removeReporters", "smooth", "trimMz", "splitByFile", "[[", "spectrapply", "peaksCount", "precursorMz", "chromatogram", - "plot", "fData", "fData<-", "writeMSData") -importFrom("MSnbase", "as.data.frame.Spectrum", "Chromatogram", "Chromatograms") + "plot", "fData", "fData<-", "writeMSData", + "filterIsolationWindow", "filterIntensity", + "alignRt", "compareChromatograms", "transformIntensity") +importFrom("MSnbase", "as.data.frame.Spectrum", "Chromatogram", + "MChromatograms", "MSpectra", "requiredFvarLabels", "selectFeatureData") export( "etg", @@ -73,7 +87,6 @@ export( "xcmsRaw", "xcmsSet", "xcmsFragments", - "xcmsPapply", "phenoDataFromPaths" ) @@ -192,7 +205,10 @@ exportMethods( "write.mzdata", "write.mzQuantML", "xcmsSource", - "loadRaw" + "loadRaw", + "isolationWindowTargetMz", + "quantify", + "findmzROI" ) ## New functions @@ -216,9 +232,7 @@ export( "processHistoryTypes", "adjustRtimePeakGroups", "plotAdjustedRtime", - "plotChromatogram", "highlightChromPeaks", - "plotChromPeakDensity", "plotChromPeaks", "plotChromPeakImage", "isCalibrated", @@ -235,7 +249,18 @@ export( "fixedRt", "exportMetaboAnalyst", "imputeRowMin", - "imputeRowMinRand" + "imputeRowMinRand", + "chromPeakSpectra", + "featureSpectra", + "featureChromatograms", + "hasFilledChromPeaks", + "findChromPeaksIsolationWindow", + "reconstructChromPeakSpectra", + "groupOverlaps", + "estimatePrecursorIntensity", + "manualChromPeaks", + "manualFeatures", + "featureArea" ) ## New analysis methods @@ -260,7 +285,11 @@ exportClasses( "ObiwarpParam", "GenericParam", "FillChromPeaksParam", - "CalibrantMassParam" + "CalibrantMassParam", + "CleanPeaksParam", + "MergeNeighboringPeaksParam", + "FilterIntensityParam", + "ChromPeakAreaParam" ) ## Param methods exportMethods( @@ -404,21 +433,46 @@ exportMethods( "expandMz", "expandMz<-", "expandRt", - "expandRt<-" + "expandRt<-", + "subset", + "subset<-", + "subsetAdjust", + "subsetAdjust<-", + "dirname", + "dirname<-" ) ## Param class functions -export("CentWaveParam", "MatchedFilterParam", "MassifquantParam", "MSWParam", - "CentWavePredIsoParam", "PeakDensityParam", "MzClustParam", - "NearestPeaksParam", "PeakGroupsParam", "ObiwarpParam", "GenericParam", - "FillChromPeaksParam", "CalibrantMassParam") +export("CentWaveParam", + "MatchedFilterParam", + "MassifquantParam", + "MSWParam", + "CentWavePredIsoParam", + "PeakDensityParam", + "MzClustParam", + "NearestPeaksParam", + "PeakGroupsParam", + "ObiwarpParam", + "GenericParam", + "FillChromPeaksParam", + "CalibrantMassParam", + "XChromatogram", + "XChromatograms", + "CleanPeaksParam", + "MergeNeighboringPeaksParam", + "FilterIntensityParam", + "ChromPeakAreaParam") ## Param class methods. ## New Classes -exportClasses("XCMSnExp", "ProcessHistory", - "XProcessHistory" - ) +exportClasses("XCMSnExp", + "ProcessHistory", + "XProcessHistory", + "XChromatogram", + "XChromatograms") + ## New methods for these classes exportMethods("hasChromPeaks", + "hasFilledChromPeaks", "hasFeatures", "hasAdjustedRtime", "adjustedRtime", @@ -428,6 +482,8 @@ exportMethods("hasChromPeaks", "featureValues", "chromPeaks", "chromPeaks<-", + "chromPeakData", + "chromPeakData<-", "processHistory", "fileIndex", "processDate", @@ -441,7 +497,6 @@ exportMethods("hasChromPeaks", "filterMsLevel", "filterMz", "filterRt", - "normalize", "pickPeaks", "removePeaks", "smooth", @@ -452,11 +507,39 @@ exportMethods("hasChromPeaks", "rtime", "mz", "intensity", - "extractChromatograms", "fillChromPeaks", "dropFilledChromPeaks", "extractMsData", "chromatogram", "spectrapply", - "writeMSData" + "writeMSData", + "plotChromPeakDensity", + "correlate", + "plot", + "refineChromPeaks", + "removeIntensity", + "filterColumnsIntensityAbove", + "filterColumnsKeepTop", + "filterChromPeaks", + "plotChromatogramsOverlay", + "transformIntensity" ) + +## feature grouping functions and methods. +importFrom("MsCoreUtils", + "group") +importMethodsFrom("MsFeatures", + "featureGroups", + "featureGroups<-", + "groupFeatures") +importClassesFrom("MsFeatures", + "SimilarRtimeParam", + "AbundanceSimilarityParam") +importFrom("MsFeatures", + "SimilarRtimeParam", + "AbundanceSimilarityParam", + "groupSimilarityMatrix") +exportMethods("featureGroups", + "featureGroups<-") +export("plotFeatureGroups", "EicSimilarityParam") +exportClasses("EicSimilarityParam") diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 72b106a29..6a0f32b10 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,4 +1,4 @@ -## On the long run it would be nice to have all generics in here. +# On the long run it would be nice to have all generics in here. ## Alphabetically ordered. ## A @@ -38,11 +38,15 @@ setGeneric("checkBack<-", function(object, value) standardGeneric("checkBack<-") setGeneric("chromPeaks", function(object, ...) standardGeneric("chromPeaks")) setGeneric("chromPeaks<-", function(object, value) standardGeneric("chromPeaks<-")) +setGeneric("chromPeakData", function(object, ...) standardGeneric("chromPeakData")) +setGeneric("chromPeakData<-", function(object, value) + standardGeneric("chromPeakData<-")) setGeneric("collect", function(object, ...) standardGeneric("collect")) setGeneric("consecMissedLimit", function(object, ...) standardGeneric("consecMissedLimit")) setGeneric("consecMissedLimit<-", function(object, value) standardGeneric("consecMissedLimit<-")) +setGeneric("correlate", function(x, y, ...) standardGeneric("correlate")) setGeneric("criticalValue", function(object, ...) standardGeneric("criticalValue")) setGeneric("criticalValue<-", function(object, value) @@ -77,8 +81,6 @@ setGeneric("extraPeaks", function(object, ...) standardGeneric("extraPeaks")) setGeneric("extraPeaks<-", function(object, value) standardGeneric("extraPeaks<-")) -setGeneric("extractChromatograms", function(object, ...) - standardGeneric("extractChromatograms")) setGeneric("extractMsData", function(object, ...) standardGeneric("extractMsData")) @@ -106,6 +108,12 @@ setGeneric("fillPeaks.chrom", function(object, ...) setGeneric("fillPeaks.MSW", function(object, ...) standardGeneric("fillPeaks.MSW")) setGeneric("fillPeaks", function(object, ...) standardGeneric("fillPeaks")) +setGeneric("filterChromPeaks", function(object, ...) + standardGeneric("filterChromPeaks")) +setGeneric("filterColumnsIntensityAbove", function(object, ...) + standardGeneric("filterColumnsIntensityAbove")) +setGeneric("filterColumnsKeepTop", function(object, ...) + standardGeneric("filterColumnsKeepTop")) setGeneric("findChromPeaks", function(object, param, ...) standardGeneric("findChromPeaks")) setGeneric("findMZ", function(object, find, ppmE=25, print=TRUE) @@ -170,8 +178,12 @@ setGeneric("hasAdjustedRtime", function(object, ...) standardGeneric("hasAdjustedRtime")) setGeneric("hasFeatures", function(object, ...) standardGeneric("hasFeatures")) +setGeneric("hasFilledChromPeaks", function(object, ...) + standardGeneric("hasFilledChromPeaks")) setGeneric("hasChromPeaks", function(object, ...) standardGeneric("hasChromPeaks")) +setGeneric("hasFilledChromPeaks", function(object, ...) + standardGeneric("hasFilledChromPeaks")) ## I @@ -184,7 +196,6 @@ setGeneric("integrate") setGeneric("integrate<-", function(object, value) standardGeneric("integrate<-")) setGeneric("initPenalty", function(object) standardGeneric("initPenalty")) setGeneric("initPenalty<-", function(object, value) standardGeneric("initPenalty<-")) -setGeneric("isCentroided", function(object, ...) standardGeneric("isCentroided")) ## K setGeneric("kNN", function(object, ...) standardGeneric("kNN")) @@ -245,7 +256,6 @@ setGeneric("peakGroupsMatrix", function(object, ...) standardGeneric("peakGroupsMatrix")) setGeneric("peakGroupsMatrix<-", function(object, value) standardGeneric("peakGroupsMatrix<-")) -setGeneric("peaks<-", function(object, value) standardGeneric("peaks<-")) setGeneric("peakScaleRange", function(object, ...) standardGeneric("peakScaleRange")) setGeneric("peakScaleRange<-", function(object, value) @@ -256,6 +266,10 @@ setGeneric("peakThr<-", function(object, value) standardGeneric("peakThr<-")) setGeneric("peakwidth", function(object, ...) standardGeneric("peakwidth")) setGeneric("peakwidth<-", function(object, value) standardGeneric("peakwidth<-")) setGeneric("plotChrom", function(object, ...) standardGeneric("plotChrom")) +setGeneric("plotChromPeakDensity", function(object, ...) + standardGeneric("plotChromPeakDensity")) +setGeneric("plotChromatogramsOverlay", function(object, ...) + standardGeneric("plotChromatogramsOverlay")) setGeneric("plotEIC", function(object, ...) standardGeneric("plotEIC")) setGeneric("plotPeaks", function(object, ...) standardGeneric("plotPeaks")) setGeneric("plotRaw", function(object, ...) standardGeneric("plotRaw")) @@ -265,7 +279,6 @@ setGeneric("plotSpec", function(object, ...) standardGeneric("plotSpec")) setGeneric("plotSurf", function(object, ...) standardGeneric("plotSurf")) setGeneric("plotTIC", function(object, ...) standardGeneric("plotTIC")) setGeneric("plotTree", function(object, ...) standardGeneric("plotTree")) -setGeneric("polarity<-", function(object, value) standardGeneric("polarity<-")) setGeneric("ppm", function(object, ...) standardGeneric("ppm")) setGeneric("ppm<-", function(object, value) standardGeneric("ppm<-")) setGeneric("prefilter", function(object, ...) standardGeneric("prefilter")) @@ -300,6 +313,9 @@ setGeneric("progressInfoUpdate", function(object) standardGeneric("progressInfoU setGeneric("rawEIC", function(object, ...) standardGeneric("rawEIC")) setGeneric("rawMat", function(object, ...) standardGeneric("rawMat")) setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ")) +setGeneric("refineChromPeaks", function(object, param, ...) + standardGeneric("refineChromPeaks")) +setGeneric("removeIntensity", function(object, ...) standardGeneric("removeIntensity")) setGeneric("response", function(object) standardGeneric("response")) setGeneric("response<-", function(object, value) standardGeneric("response<-")) setGeneric("retcor", function(object, ...) standardGeneric("retcor")) @@ -358,6 +374,10 @@ setGeneric("stitch", function(object, lockMass, ...) standardGeneric("stitch")) setGeneric("stitch.xml", function(object, lockMass) standardGeneric("stitch.xml")) setGeneric("stitch.netCDF", function(object, lockMass) standardGeneric("stitch.netCDF")) setGeneric("stitch.netCDF.new", function(object, lockMass) standardGeneric("stitch.netCDF.new")) +setGeneric("subset<-", function(object, value) standardGeneric("subset<-")) +setGeneric("subsetAdjust", function(object, ...) standardGeneric("subsetAdjust")) +setGeneric("subsetAdjust<-", function(object, value) standardGeneric("subsetAdjust<-")) + ## T setGeneric("tuneIn", function(object, ...) standardGeneric("tuneIn")) diff --git a/R/DataClasses.R b/R/DataClasses.R index 6b938b9b4..6b58462a7 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -1,5 +1,5 @@ ## All class definitions should go in here. -#' @include AllGenerics.R +#' @include AllGenerics.R functions-XChromatogram.R functions-XChromatograms.R ############################################################ ## Class unions @@ -184,23 +184,27 @@ setClass("xcmsPeaks", contains = "matrix") ## Processing history type statics .PROCSTEP.UNKNOWN <- "Unknown" .PROCSTEP.PEAK.DETECTION <- "Peak detection" +.PROCSTEP.PEAK.REFINEMENT <- "Peak refinement" .PROCSTEP.PEAK.GROUPING <- "Peak grouping" .PROCSTEP.RTIME.CORRECTION <- "Retention time correction" .PROCSTEP.PEAK.FILLING <- "Missing peak filling" .PROCSTEP.CALIBRATION <- "Calibration" +.PROCSTEP.FEATURE.GROUPING <- "Feature grouping" .PROCSTEPS <- c( .PROCSTEP.UNKNOWN, .PROCSTEP.PEAK.DETECTION, + .PROCSTEP.PEAK.REFINEMENT, .PROCSTEP.PEAK.GROUPING, .PROCSTEP.RTIME.CORRECTION, .PROCSTEP.PEAK.FILLING, - .PROCSTEP.CALIBRATION + .PROCSTEP.CALIBRATION, + .PROCSTEP.FEATURE.GROUPING ) ############################################################ ## ProcessHistory #' @aliases ProcessHistory -#' +#' #' @title Tracking data processing #' #' @description Objects of the type \code{ProcessHistory} allow to keep track @@ -231,7 +235,6 @@ setClass("ProcessHistory", fileIndex = "integer", error = "ANY" ), - contains = "Versioned", prototype = prototype( type = .PROCSTEP.UNKNOWN, date = character(), @@ -265,13 +268,10 @@ setClass("ProcessHistory", ## BasicParam class ## CentWaveParam -setClass("Param", - representation = representation("VIRTUAL"), - contains = c("Versioned")) setClassUnion("ParamOrNULL", c("Param", "NULL")) -#' @aliases GenericParam -#' +#' @aliases GenericParam Param class:Param Param-class +#' #' @title Generic parameter class #' #' @description The \code{GenericParam} class allows to store generic parameter @@ -286,14 +286,12 @@ setClassUnion("ParamOrNULL", c("Param", "NULL")) #' of an \code{\link{XCMSnExp}} object. #' #' @slot fun \code{character} specifying the function name. -#' +#' #' @slot args \code{list} (ideally named) with the arguments to the #' function. -#' -#' @slot .__classVersion__ the version of the class. -#' +#' #' @author Johannes Rainer -#' +#' #' @rdname GenericParam #' #' @examples @@ -322,7 +320,7 @@ setClass("GenericParam", ) #' @aliases XProcessHistory -#' +#' #' @title Tracking data processing #' #' @description The \code{XProcessHistory} extends the \code{ProcessHistory} by @@ -335,7 +333,7 @@ setClass("GenericParam", #' #' @slot msLevel: \code{integer} definining the MS level(s) on which the #' analysis was performed. -#' +#' #' @rdname ProcessHistory-class setClass("XProcessHistory", slots = c( @@ -382,27 +380,33 @@ setClass("XProcessHistory", #' \item{matchedFilter}{peak detection in chromatographic space. See #' \code{\link{matchedFilter}} for more details.} #' -#' \item{massifquant}{peak detection using the Kalman filter-based +#' \item{massifquant}{peak detection using the Kalman filter-based #' method. See \code{\link{massifquant}} for more details.} #' #' \item{MSW}{single-spectrum non-chromatography MS data peak detection. #' See \code{\link{MSW}} for more details.} #' #' } -#' +#' #' @name chromatographic-peak-detection -#' +#' #' @family peak detection methods -#' +#' #' @seealso \code{\link{findPeaks}} for the \emph{old} peak detection #' methods. -#' +#' #' \code{\link{plotChromPeaks}} to plot identified chromatographic peaks #' for one file. #' #' \code{\link{highlightChromPeaks}} to highlight identified chromatographic #' peaks in an extracted ion chromatogram plot. -#' +#' +#' \code{\link{refineChromPeaks}} for methods to refine or clean identified +#' chromatographic peaks. +#' +#' \code{\link{manualChromPeaks}} to manually add/define chromatographic +#' peaks. +#' #' @author Johannes Rainer NULL #> NULL @@ -419,18 +423,18 @@ NULL #' @param ppm \code{numeric(1)} defining the maximal tolerated m/z deviation in #' consecutive scans in parts per million (ppm) for the initial ROI #' definition. -#' +#' #' @param peakwidth \code{numeric(2)} with the expected approximate #' peak width in chromatographic space. Given as a range (min, max) #' in seconds. -#' +#' #' @param snthresh \code{numeric(1)} defining the signal to noise ratio cutoff. #' #' @param prefilter \code{numeric(2)}: \code{c(k, I)} specifying the prefilter #' step for the first analysis step (ROI detection). Mass traces are only #' retained if they contain at least \code{k} peaks with intensity #' \code{>= I}. -#' +#' #' @param mzCenterFun Name of the function to calculate the m/z center of the #' chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted #' mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z @@ -439,30 +443,30 @@ NULL #' peak apex and the m/z values left and right of it and \code{"meanApex3"}: #' mean of the m/z value of the peak apex and the m/z values left and right #' of it. -#' +#' #' @param integrate Integration method. For \code{integrate = 1} peak limits #' are found through descent on the mexican hat filtered data, for #' \code{integrate = 2} the descent is done on the real data. The latter #' method is more accurate but prone to noise, while the former is more #' robust, but less exact. -#' +#' #' @param mzdiff \code{numeric(1)} representing the minimum difference in m/z #' dimension required for peaks with overlapping retention times; can be #' negative to allow overlap. During peak post-processing, peaks #' defined to be overlapping are reduced to the one peak with the largest #' signal. -#' +#' #' @param fitgauss \code{logical(1)} whether or not a Gaussian should be fitted #' to each peak. This affects mostly the retention time position of the #' peak. -#' +#' #' @param noise \code{numeric(1)} allowing to set a minimum intensity required #' for centroids to be considered in the first analysis step (centroids with #' intensity \code{< noise} are omitted from ROI detection). -#' +#' #' @param verboseColumns \code{logical(1)} whether additional peak meta data #' columns should be returned. -#' +#' #' @param roiList An optional list of regions-of-interest (ROI) representing #' detected mass traces. If ROIs are submitted the first analysis step is #' omitted and chromatographic peak detection is performed on the submitted @@ -472,14 +476,23 @@ NULL #' (number of scans), \code{intensity} (summed intensity). Each ROI should #' be represented by a \code{list} of elements or a single row #' \code{data.frame}. -#' +#' #' @param firstBaselineCheck \code{logical(1)}. If \code{TRUE} continuous #' data within regions of interest is checked to be above the first baseline. -#' +#' In detail, a first rough estimate of the noise is calculated and peak +#' detection is performed only in regions in which multiple sequential +#' signals are higher than this first estimated baseline/noise level. +#' #' @param roiScales Optional numeric vector with length equal to \code{roiList} #' defining the scale for each region of interest in \code{roiList} that #' should be used for the centWave-wavelets. #' +#' @param extendLengthMSW Option to force centWave to use all scales when +#' running centWave rather than truncating with the EIC length. Uses the "open" +#' method to extend the EIC to a integer base-2 length prior to being passed to +#' \code{convolve} rather than the default "reflect" method. See +#' https://github.com/sneumann/xcms/issues/445 for more information. +#' #' @details #' #' The centWave algorithm is most suitable for high resolution @@ -500,13 +513,12 @@ NULL #' @note These methods and classes are part of the updated and modernized #' \code{xcms} user interface which will eventually replace the #' \code{\link{findPeaks}} methods. It supports peak detection on -#' \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}} -#' objects (both defined in the \code{MSnbase} package). All of the settings -#' to the centWave algorithm can be passed with a \code{CentWaveParam} -#' object. +#' \code{\link{OnDiskMSnExp}} objects (defined in the \code{MSnbase} +#' package). All of the settings to the centWave algorithm can be passed +#' with a \code{CentWaveParam} object. #' #' @family peak detection methods -#' +#' #' @seealso #' #' The \code{\link{do_findChromPeaks_centWave}} core API function and @@ -519,9 +531,9 @@ NULL #' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly #' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} #' 2008, 9:504 -#' +#' #' @name findChromPeaks-centWave -#' +#' #' @author Ralf Tautenhahn, Johannes Rainer NULL #> NULL @@ -530,8 +542,7 @@ NULL #' for a chromatographic peak detection using the centWave method. Instances #' should be created with the \code{CentWaveParam} constructor. #' -#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed +#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,extendLengthMSW See corresponding parameter above. Slots values should exclusively be accessed #' \emph{via} the corresponding getter and setter methods listed above. #' #' @rdname findChromPeaks-centWave @@ -541,7 +552,7 @@ NULL #' ## Create a CentWaveParam object. Note that the noise is set to 10000 to #' ## speed up the execution of the example - in a real use case the default #' ## value should be used, or it should be set to a reasonable value. -#' cwp <- CentWaveParam(ppm = 20, noise = 10000) +#' cwp <- CentWaveParam(ppm = 20, noise = 10000, prefilter = c(3, 10000)) #' ## Change snthresh parameter #' snthresh(cwp) <- 25 #' cwp @@ -553,7 +564,7 @@ NULL #' library(xcms) #' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, #' full.names = TRUE) -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' raw_data <- readMSData(fls[1], mode = "onDisk") #' #' ## Perform the peak detection using the settings defined above. #' res <- findChromPeaks(raw_data, param = cwp) @@ -572,7 +583,8 @@ setClass("CentWaveParam", verboseColumns = "logical", roiList = "list", firstBaselineCheck = "logical", - roiScales = "numeric" + roiScales = "numeric", + extendLengthMSW = "logical" ), contains = c("Param"), prototype = prototype( @@ -588,7 +600,8 @@ setClass("CentWaveParam", verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, - roiScales = numeric() + roiScales = numeric(), + extendLengthMSW = FALSE ), validity = function(object) { msg <- character() @@ -645,7 +658,7 @@ setClass("CentWaveParam", msg <- c(msg, paste0("'roiList' does not provide ", "all required fields!")) } - if (length(object@roiScales) > 0) { + if (length(object@roiScales) > 0) { if (length(object@roiList) != length(object@roiScales)) msg <- c(msg, paste0("'roiScales' has to have the same", " length than 'roiList'.")) @@ -675,34 +688,34 @@ setClass("CentWaveParam", #' #' @param binSize \code{numeric(1)} specifying the width of the #' bins/slices in m/z dimension. -#' +#' #' @param impute Character string specifying the method to be used for missing #' value imputation. Allowed values are \code{"none"} (no linear #' interpolation), \code{"lin"} (linear interpolation), \code{"linbase"} #' (linear interpolation within a certain bin-neighborhood) and #' \code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details. -#' +#' #' @param fwhm \code{numeric(1)} specifying the full width at half maximum #' of matched filtration gaussian model peak. Only used to calculate the #' actual sigma, see below. -#' +#' #' @param sigma \code{numeric(1)} specifying the standard deviation (width) #' of the matched filtration model peak. -#' +#' #' @param max \code{numeric(1)} representing the maximum number of peaks #' that are expected/will be identified per slice. -#' +#' #' @param snthresh \code{numeric(1)} defining the signal to noise cutoff #' to be used in the chromatographic peak detection step. -#' +#' #' @param steps \code{numeric(1)} defining the number of bins to be #' merged before filtration (i.e. the number of neighboring bins that will #' be joined to the slice in which filtration and peak detection will be #' performed). -#' +#' #' @param mzdiff \code{numeric(1)} defining the minimum difference #' in m/z for peaks with overlapping retention times -#' +#' #' @param index \code{logical(1)} specifying whether indicies should be #' returned instead of values for m/z and retention times. #' @@ -717,17 +730,17 @@ setClass("CentWaveParam", #' @note These methods and classes are part of the updated and modernized #' \code{xcms} user interface which will eventually replace the #' \code{\link{findPeaks}} methods. It supports chromatographic peak -#' detection on \code{\link{MSnExp}} and -#' \code{\link{OnDiskMSnExp}} objects (both defined in the +#' detection on +#' \code{\link{OnDiskMSnExp}} objects (defined in the #' \code{MSnbase} package). All of the settings to the matchedFilter #' algorithm can be passed with a \code{MatchedFilterParam} object. #' #' @inheritParams imputeLinInterpol -#' +#' #' @inheritParams findChromPeaks-centWave #' #' @family peak detection methods -#' +#' #' @seealso #' #' The \code{\link{do_findChromPeaks_matchedFilter}} core API function @@ -735,7 +748,7 @@ setClass("CentWaveParam", #' #' \code{\link{peaksWithMatchedFilter}} for functions to perform matchedFilter #' peak detection in purely chromatographic data. -#' +#' #' @references #' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and #' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite @@ -753,9 +766,10 @@ NULL #' method. Instances should be created with the \code{MatchedFilterParam} #' constructor. #' -#' @slot .__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index +#' See corresponding parameter above. Slots values should exclusively +#' be accessed \emph{via} the corresponding getter and setter methods listed +#' above. #' #' @rdname findChromPeaks-matchedFilter #' @@ -775,11 +789,10 @@ NULL #' library(MSnbase) #' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, #' full.names = TRUE) -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' raw_data <- readMSData(fls[1], mode = "onDisk") #' ## Perform the chromatographic peak detection using the settings defined #' ## above. Note that we are also disabling parallel processing in this #' ## example by registering a "SerialParam" -#' register(SerialParam()) #' res <- findChromPeaks(raw_data, param = mfp) #' head(chromPeaks(res)) setClass("MatchedFilterParam", @@ -869,12 +882,12 @@ setClass("MatchedFilterParam", #' For \code{withWave = TRUE} the second argument represents the maximum #' peak length subject to being greater than the mininum peak length #' (see also documentation of \code{\link{do_findChromPeaks_centWave}}). -#' +#' #' @param prefilter \code{numeric(2)}. The first argument is only used #' if (\code{withWave = TRUE}); see \code{\link{findChromPeaks-centWave}} #' for details. The second argument specifies the minimum threshold for the #' maximum intensity of a chromatographic peak that must be met. -#' +#' #' @param criticalValue \code{numeric(1)}. Suggested values: #' (\code{0.1-3.0}). This setting helps determine the the Kalman Filter #' prediciton margin of error. A real centroid belonging to a bonafide @@ -884,13 +897,13 @@ setClass("MatchedFilterParam", #' reported by the Kalman Filter. If the peak in the XC-MS sample have #' a small mass deviance in ppm error, a smaller critical value might be #' better and vice versa. -#' +#' #' @param consecMissedLimit \code{integer(1)} Suggested values: (\code{1,2,3}). #' While a peak is in the proces of being detected by a Kalman Filter, the #' Kalman Filter may not find a predicted centroid in every scan. After 1 #' or more consecutive failed predictions, this setting informs Massifquant #' when to stop a Kalman Filter from following a candidate peak. -#' +#' #' @param unions \code{integer(1)} set to \code{1} if apply t-test union on #' segmentation; set to \code{0} if no t-test to be applied on #' chromatographically continous peaks sharing same m/z range. @@ -902,7 +915,7 @@ setClass("MatchedFilterParam", #' program identifies segmented peaks and combines them (merges them) #' into one with a two sample t-test. The potential danger of this option #' is that some truly distinct peaks may be merged. -#' +#' #' @param checkBack \code{integer(1)} set to \code{1} if turned on; set to #' \code{0} if turned off. The convergence of a Kalman Filter to a peak's #' precise m/z mapping is very fast, but sometimes it incorporates erroneous @@ -912,7 +925,7 @@ setClass("MatchedFilterParam", #' affect identification of a peak because it is a postprocessing measure; #' it has not shown to be a extremely useful thus far and the default is set #' to being turned off. -#' +#' #' @param withWave \code{logical(1)} if \code{TRUE}, the peaks identified first #' with Massifquant are subsequently filtered with the second step of the #' centWave algorithm, which includes wavelet estimation. @@ -937,15 +950,15 @@ setClass("MatchedFilterParam", #' @note These methods and classes are part of the updated and modernized #' \code{xcms} user interface which will eventually replace the #' \code{\link{findPeaks}} methods. It supports chromatographic peak -#' detection on \code{\link{MSnExp}} and -#' \code{\link{OnDiskMSnExp}} objects (both defined in the +#' detection on +#' \code{\link{OnDiskMSnExp}} objects (defined in the #' \code{MSnbase} package). All of the settings to the massifquant and #' centWave algorithm can be passed with a \code{MassifquantParam} object. #' #' @inheritParams findChromPeaks-centWave #' #' @family peak detection methods -#' +#' #' @seealso The \code{\link{do_findChromPeaks_massifquant}} core API function #' and \code{\link{findPeaks.massifquant}} for the old user interface. #' @@ -965,9 +978,10 @@ NULL #' method eventually in combination with the centWave algorithm. Instances #' should be created with the \code{MassifquantParam} constructor. #' -#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave +#' See corresponding parameter above. Slots values should +#' exclusively be accessed \emph{via} the corresponding getter and setter +#' methods listed above. #' #' @rdname findChromPeaks-massifquant #' @@ -975,8 +989,9 @@ NULL #' #' ## Create a MassifquantParam object. #' mqp <- MassifquantParam() -#' ## Change snthresh parameter +#' ## Change snthresh prefilter parameters #' snthresh(mqp) <- 30 +#' prefilter(mqp) <- c(6, 10000) #' mqp #' #' ## Perform the peak detection using massifquant on the files from the @@ -986,7 +1001,7 @@ NULL #' library(MSnbase) #' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, #' full.names = TRUE) -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' raw_data <- readMSData(fls[1], mode = "onDisk") #' ## Perform the peak detection using the settings defined above. #' res <- findChromPeaks(raw_data, param = mqp) #' head(chromPeaks(res)) @@ -1104,14 +1119,14 @@ setClass("MassifquantParam", #' @note These methods and classes are part of the updated and modernized #' \code{xcms} user interface which will eventually replace the #' \code{\link{findPeaks}} methods. It supports peak detection on -#' \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}} -#' objects (both defined in the \code{MSnbase} package). All of the settings +#' \code{\link{OnDiskMSnExp}} +#' objects (defined in the \code{MSnbase} package). All of the settings #' to the algorithm can be passed with a \code{MSWParam} object. #' #' @inheritParams findChromPeaks-centWave #' #' @family peak detection methods -#' +#' #' @seealso The \code{\link{do_findPeaks_MSW}} core API function #' and \code{\link{findPeaks.MSW}} for the old user interface. #' @@ -1125,9 +1140,8 @@ NULL #' settings for a peak detection using the MSW method. Instances should be #' created with the \code{MSWParam} constructor. #' -#' @slot .__classVersion__,snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams See corresponding parameter above. \code{.__classVersion__} stores the version from the class. Slots values -#' should exclusively be accessed \emph{via} the corresponding getter and -#' setter methods listed above. +#' @slot snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams +#' See corresponding parameter above. #' #' @rdname findPeaks-MSW #' @@ -1141,9 +1155,9 @@ NULL #' #' ## Loading a small subset of direct injection, single spectrum files #' library(msdata) -#' fticrf <- list.files(system.file("fticr", package = "msdata"), +#' fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), #' recursive = TRUE, full.names = TRUE) -#' fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") +#' fticr <- readMSData(fticrf[1], msLevel. = 1, mode = "onDisk") #' #' ## Perform the MSW peak detection on these: #' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, @@ -1253,20 +1267,20 @@ setClass("MSWParam", #' @note These methods and classes are part of the updated and modernized #' \code{xcms} user interface which will eventually replace the #' \code{\link{findPeaks}} methods. It supports chromatographic peak -#' detection on \code{\link{MSnExp}} and -#' \code{\link{OnDiskMSnExp}} objects (both defined in the +#' detection on +#' \code{\link{OnDiskMSnExp}} objects (defined in the #' \code{MSnbase} package). All of the settings to the algorithm can be #' passed with a \code{CentWavePredIsoParam} object. #' #' @family peak detection methods -#' +#' #' @seealso The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core #' API function and \code{\link{findPeaks.centWave}} for the old user #' interface. \code{\link{CentWaveParam}} for the class the #' \code{CentWavePredIsoParam} extends. #' #' @name findChromPeaks-centWaveWithPredIsoROIs -#' +#' #' @author Hendrik Treutler, Johannes Rainer NULL #> NULL @@ -1279,9 +1293,8 @@ NULL #' \code{\link{CentWaveParam}} for all methods and arguments this class #' inherits. #' -#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity +#' See corresponding parameter above. #' #' @rdname findChromPeaks-centWaveWithPredIsoROIs #' @@ -1349,7 +1362,7 @@ setClass("CentWavePredIsoParam", #' #' The implemented peak grouping methods are: #' \describe{ -#' +#' #' \item{density}{peak grouping based on time dimension peak densities. #' See \code{\link{groupChromPeaks-density}} for more details.} #' @@ -1360,93 +1373,106 @@ setClass("CentWavePredIsoParam", #' \item{nearest}{chromatographic peak grouping based on their proximity in #' the mz-rt space. See \code{\link{groupChromPeaks-nearest}} for more #' details.} -#' +#' #' } #' @name groupChromPeaks -#' +#' #' @family peak grouping methods -#' -#' @seealso \code{\link{group}} for the \emph{old} peak grouping methods. -#' \code{\link{featureDefinitions}} and -#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak -#' grouping results. -#' +#' +#' @seealso +#' +#' \code{\link{featureDefinitions}} and +#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak +#' grouping results. +#' +#' \code{\link{featureChromatograms}} to extract ion chromatograms for each +#' feature. +#' +#' \code{\link{group}} for the \emph{old} peak grouping methods. +#' #' @author Johannes Rainer NULL #> NULL #' @title Peak grouping based on time dimension peak densities #' -#' @description This method performs performs correspondence (chromatographic -#' peak grouping) based on the density (distribution) of identified peaks -#' along the retention time axis within slices of overlapping mz ranges. -#' All peaks (from the same or from different samples) being close on the -#' retention time axis are grouped into a feature (\emph{peak group}). +#' @description +#' +#' This method performs performs correspondence (chromatographic +#' peak grouping) based on the density (distribution) of identified peaks +#' along the retention time axis within slices of overlapping mz ranges. +#' All peaks (from the same or from different samples) being close on the +#' retention time axis are grouped into a feature (*peak group*). #' #' @note These methods and classes are part of the updated and modernized -#' \code{xcms} user interface which will eventually replace the -#' \code{\link{group}} methods. All of the settings to the algorithm -#' can be passed with a \code{PeakDensityParam} object. +#' `xcms` user interface. All of the settings to the algorithm +#' can be passed with a `PeakDensityParam` object. #' #' @param sampleGroups A vector of the same length than samples defining the #' sample group assignments (i.e. which samples belong to which sample -#' group). This parameter is mandatory for the \code{PeakDensityParam} +#' group). This parameter is mandatory for the `PeakDensityParam` #' and has to be provided also if there is no sample grouping in the #' experiment (in which case all samples should be assigned to the #' same group). #' -#' @param bw \code{numeric(1)} defining the bandwidth (standard deviation ot the +#' @param bw `numeric(1)` defining the bandwidth (standard deviation ot the #' smoothing kernel) to be used. This argument is passed to the -#' \code{\link{density}} method. +#' [density() method. #' -#' @param minFraction \code{numeric(1)} defining the minimum fraction of samples +#' @param minFraction `numeric(1)` defining the minimum fraction of samples #' in at least one sample group in which the peaks have to be present to be #' considered as a peak group (feature). #' -#' @param minSamples \code{numeric(1)} with the minimum number of samples in at +#' @param minSamples `numeric(1)` with the minimum number of samples in at #' least one sample group in which the peaks have to be detected to be #' considered a peak group (feature). #' -#' @param binSize \code{numeric(1)} defining the size of the overlapping slices +#' @param binSize `numeric(1)` defining the size of the overlapping slices #' in mz dimension. #' -#' @param maxFeatures \code{numeric(1)} with the maximum number of peak groups +#' @param maxFeatures `numeric(1)` with the maximum number of peak groups #' to be identified in a single mz slice. -#' +#' #' @family peak grouping methods -#' -#' @seealso The \code{\link{do_groupChromPeaks_density}} core -#' API function and \code{\link{group.density}} for the old user interface. -#' -#' @seealso \code{\link{plotChromPeakDensity}} to plot peak densities and -#' evaluate different algorithm settings. -#' \code{\link{featureDefinitions}} and -#' \code{\link{featureValues,XCMSnExp-method}} for methods to access the -#' features (i.e. the peak grouping results). +#' +#' @seealso +#' +#' The [do_groupChromPeaks_density()] core API function and [group.density()] +#' for the old user interface. +#' +#' [plotChromPeakDensity()] to plot peak densities and evaluate different +#' algorithm settings. +#' +#' [featureDefinitions()] and [featureValues()] for methods to access the +#' features (i.e. the peak grouping results). #' #' @name groupChromPeaks-density -#' +#' +#' @md +#' #' @author Colin Smith, Johannes Rainer #' #' @references #' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and #' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite #' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -#' \emph{Anal. Chem.} 2006, 78:779-787. +#' Anal. Chem. 2006, 78:779-787. NULL #> NULL -#' @description The \code{PeakDensityParam} class allows to specify all -#' settings for the peak grouping based on peak densities along the time -#' dimension. Instances should be created with the \code{PeakDensityParam} -#' constructor. +#' @description #' -#' @slot .__classVersion__,sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' The `PeakDensityParam` class allows to specify all settings for the peak +#' grouping based on peak densities along the time dimension. Instances should +#' be created with the [PeakDensityParam()] constructor. +#' +#' @slot sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures See +#' corresponding parameter above. #' #' @rdname groupChromPeaks-density #' +#' @md +#' #' @examples #' #' ## Create a PeakDensityParam object @@ -1458,20 +1484,15 @@ NULL #' ############################## #' ## Chromatographic peak detection and grouping. #' ## -#' ## Below we perform first a peak detection (using the matchedFilter -#' ## method) on some of the test files from the faahKO package followed by -#' ## a peak grouping using the density method. -#' library(faahKO) -#' library(MSnbase) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") #' -#' ## Perform the chromatographic peak detection using the matchedFilter method. -#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -#' res <- findChromPeaks(raw_data, param = mfp) +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' res <- faahko_sub #' #' head(chromPeaks(res)) #' ## The number of peaks identified per sample: @@ -1488,7 +1509,7 @@ NULL #' ## Using the featureValues method to extract a matrix with the #' ## intensities of the features per sample. #' head(featureValues(res, value = "into")) -#' +#' #' ## The process history: #' processHistory(res) setClass("PeakDensityParam", @@ -1534,46 +1555,55 @@ setClass("PeakDensityParam", ## Main group.mzClust documentation. #' @title High resolution peak grouping for single spectra samples #' -#' @description This method performs high resolution correspondence for single -#' spectra samples. +#' @description +#' +#' This method performs high resolution correspondence for single spectra +#' samples. #' #' @note These methods and classes are part of the updated and modernized -#' \code{xcms} user interface which will eventually replace the -#' \code{\link{group}} methods. All of the settings to the algorithm -#' can be passed with a \code{MzClustParam} object. +#' `xcms` user interface which will eventually replace the +#' [group()] methods. All of the settings to the algorithm +#' can be passed with a [MzClustParam] object. #' #' @inheritParams groupChromPeaks-density #' -#' @param ppm \code{numeric(1)} representing the relative mz error for the +#' @param ppm `numeric(1)` representing the relative mz error for the #' clustering/grouping (in parts per million). -#' -#' @param absMz \code{numeric(1)} representing the absolute mz error for the +#' +#' @param absMz `numeric(1)` representing the absolute mz error for the #' clustering. -#' +#' #' @family peak grouping methods -#' -#' @seealso The \code{\link{do_groupPeaks_mzClust}} core API function and -#' \code{\link{group.mzClust}} for the old user interface. -#' \code{\link{featureDefinitions}} and -#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak -#' grouping results (i.e. the features). +#' +#' @seealso +#' +#' The [do_groupPeaks_mzClust()] core API function and [group.mzClust()] for +#' the old user interface. +#' +#' [featureDefinitions()] and [featureValues()] for methods to access peak +#' grouping results (i.e. the features). #' #' @name groupChromPeaks-mzClust #' +#' @md +#' #' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill -#' and David F. Grant\cr \emph{Alignment of high resolution mass spectra: -#' development of a heuristic approach for metabolomics}.\cr Metabolomics, +#' and David F. Grant\cr Alignment of high resolution mass spectra: +#' development of a heuristic approach for metabolomics.\cr Metabolomics, #' Vol. 2, No. 2, 75-83 (2006) NULL #> NULL -#' @description The \code{MzClustParam} class allows to specify all -#' settings for the peak grouping based on the \emph{mzClust} algorithm. -#' Instances should be created with the \code{MzClustParam} constructor. +#' @description #' -#' @slot .__classVersion__,sampleGroups,ppm,absMz,minFraction,minSamples See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' The `MzClustParam` class allows to specify all settings for the peak +#' grouping based on the *mzClust* algorithm. +#' Instances should be created with the `MzClustParam` constructor. +#' +#' @slot sampleGroups,ppm,absMz,minFraction,minSamples See corresponding +#' parameter above. +#' +#' @md #' #' @rdname groupChromPeaks-mzClust #' @@ -1581,10 +1611,13 @@ NULL #' #' ## Loading a small subset of direct injection, single spectrum files #' library(msdata) -#' fticrf <- list.files(system.file("fticr", package = "msdata"), +#' fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), #' recursive = TRUE, full.names = TRUE) #' fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") #' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' #' ## Perform the MSW peak detection on these: #' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, #' SNR.method = "data.mean", winSize.noise = 500) @@ -1638,56 +1671,63 @@ setClass("MzClustParam", ## Main group.nearest documentation. #' @title Peak grouping based on proximity in the mz-rt space #' -#' @description This method is inspired by the grouping algorithm of mzMine -#' [Katajamaa 2006] and performs correspondence based on proximity of peaks -#' in the space spanned by retention time and mz values. -#' The method creates first a \emph{master peak list} consisting of all -#' chromatographic peaks from the sample in which most peaks were -#' identified, and starting from that, calculates distances to peaks from -#' the sample with the next most number of peaks. If peaks are closer than -#' the defined threshold they are grouped together. +#' @description #' -#' @note These methods and classes are part of the updated and modernized -#' \code{xcms} user interface which will eventually replace the -#' \code{\link{group}} methods. All of the settings to the algorithm -#' can be passed with a \code{NearestPeaksParam} object. +#' This method is inspired by the grouping algorithm of mzMine +#' (Katajamaa 2006) and performs correspondence based on proximity of peaks +#' in the space spanned by retention time and mz values. +#' The method creates first a *master peak list* consisting of all +#' chromatographic peaks from the sample in which most peaks were +#' identified, and starting from that, calculates distances to peaks from +#' the sample with the next most number of peaks. If peaks are closer than +#' the defined threshold they are grouped together. +#' +#' @note +#' +#' These methods and classes are part of the updated and modernized +#' `xcms` user interface. All of the settings to the algorithm +#' can be passed with a `NearestPeaksParam` object. #' #' @inheritParams groupChromPeaks-density #' -#' @param mzVsRtBalance \code{numeric(1)} representing the factor by which mz +#' @param mzVsRtBalance `numeric(1)` representing the factor by which mz #' values are multiplied before calculating the (euclician) distance between #' two peaks. #' -#' @param absMz \code{numeric(1)} maximum tolerated distance for mz values. +#' @param absMz `numeric(1)` maximum tolerated distance for mz values. #' -#' @param absRt \code{numeric(1)} maximum tolerated distance for rt values. +#' @param absRt `numeric(1)` maximum tolerated distance for rt values. #' -#' @param kNN \code{numeric(1)} representing the number of nearest neighbors +#' @param kNN `numeric(1)` representing the number of nearest neighbors #' to check. -#' +#' #' @family peak grouping methods -#' -#' @seealso The \code{\link{do_groupChromPeaks_nearest}} core -#' API function and \code{\link{group.nearest}} for the old user interface. -#' \code{\link{featureDefinitions}} and -#' \code{\link{featureValues,XCMSnExp-method}} for methods to access -#' peak grouping results (i.e. the features). +#' +#' @seealso +#' +#' The [do_groupChromPeaks_nearest()] core API function. +#' +#' [featureDefinitions()] and [featureValues()] for methods to access +#' peak grouping results (i.e. the features). #' #' @name groupChromPeaks-nearest #' +#' @md +#' #' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for #' processing and visualization of mass spectrometry based molecular profile -#' data. \emph{Bioinformatics} 2006, 22:634-636. +#' data. Bioinformatics 2006, 22:634-636. NULL #> NULL -#' @description The \code{NearestPeaksParam} class allows to specify all -#' settings for the peak grouping based on the \emph{nearest} algorithm. -#' Instances should be created with the \code{NearestPeaksParam} constructor. +#' @description The `NearestPeaksParam` class allows to specify all +#' settings for the peak grouping based on the *nearest* algorithm. +#' Instances should be created with the `NearestPeaksParam` constructor. #' -#' @slot .__classVersion__,sampleGroups,mzVsRtBalance,absMz,absRt,kNN See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot sampleGroups,mzVsRtBalance,absMz,absRt,kNN See corresponding parameter +#' above. +#' +#' @md #' #' @rdname groupChromPeaks-nearest #' @@ -1697,23 +1737,14 @@ NULL #' p <- NearestPeaksParam(kNN = 3) #' p #' -#' ############################## -#' ## Chromatographic peak detection and grouping. -#' ## -#' ## Below we perform first a chromatographic peak detection (using the -#' ## matchedFilter method) on some of the test files from the faahKO package -#' ## followed by a peaks grouping using the "nearest" method. -#' library(faahKO) -#' library(MSnbase) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' res <- faahko_sub #' -#' ## Perform the peak detection using the matchedFilter method. -#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -#' res <- findChromPeaks(raw_data, param = mfp) +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' head(chromPeaks(res)) #' ## The number of peaks identified per sample: @@ -1784,13 +1815,13 @@ setClass("NearestPeaksParam", #' \code{\link{adjustRtime-obiwarp}} for more details.} #' } #' @name adjustRtime -#' +#' #' @family retention time correction methods -#' -#' @seealso \code{\link{retcor}} for the \emph{old} retention time correction +#' +#' @seealso \code{\link{retcor}} for the \emph{old} retention time correction #' methods. #' \code{\link{plotAdjustedRtime}} for visualization of alignment results. -#' +#' #' @author Johannes Rainer NULL #> NULL @@ -1799,33 +1830,67 @@ NULL #' @title Retention time correction based on alignment of house keeping peak #' groups #' -#' @description This method performs retention time adjustment based on the -#' alignment of chromatographic peak groups present in all/most samples -#' (hence corresponding to house keeping compounds). First the retention -#' time deviation of these peak groups is described by fitting either a -#' polynomial (\code{smooth = "loess"}) or a linear ( -#' \code{smooth = "linear"}) model to the data points. These models are -#' subsequently used to adjust the retention time of each spectrum in -#' each sample. -#' -#' @note These methods and classes are part of the updated and modernized -#' \code{xcms} user interface which will eventually replace the -#' \code{\link{group}} methods. All of the settings to the alignment -#' algorithm can be passed with a \code{PeakGroupsParam} object. +#' @description #' -#' The matrix with the (raw) retention times of the peak groups used -#' in the alignment is added to the \code{peakGroupsMatrix} slot of the -#' \code{PeakGroupsParam} object that is stored into the corresponding -#' \emph{process history step} (see \code{\link{processHistory}} for how -#' to access the process history). +#' This method performs retention time adjustment based on the +#' alignment of chromatographic peak groups present in all/most samples +#' (hence corresponding to house keeping compounds). First the retention +#' time deviation of these peak groups is described by fitting either a +#' polynomial (\code{smooth = "loess"}) or a linear ( +#' \code{smooth = "linear"}) model to the data points. These models are +#' subsequently used to adjust the retention time of each spectrum in +#' each sample. +#' +#' It is also possible to exclude certain samples within an experiment from +#' the estimation of the alignment models. The parameter \code{subset} +#' allows to define the indices of samples within \code{object} that should +#' be aligned. Samples not part of this \code{subset} are left out in the +#' estimation of the alignment models, but their retention times are +#' subsequently adjusted based on the alignment results of the closest sample +#' in \code{subset} (close in terms of position within the \code{object}). +#' Alignment could thus be performed on only \emph{real} samples leaving out +#' e.g. blanks, which are then in turn adjusted based on the closest real +#' sample. Here it is up to the user to ensure that the samples within +#' \code{object} are ordered correctly (e.g. by injection index). +#' +#' How the non-subset samples are adjusted bases also on the parameter +#' \code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset +#' sample is adjusted based on the closest previous subset sample which results +#' in most cases with adjusted retention times of the non-subset sample being +#' identical to the subset sample on which the adjustment bases. The second, +#' default, option is to use \code{subsetAdjust = "average"} in which case +#' each non subset sample is adjusted based on the average retention time +#' adjustment from the previous and following subset sample. For the average +#' a weighted mean is used with weights being the inverse of the distance of +#' the non-subset sample to the subset samples used for alignment. +#' +#' See also section \emph{Alignment of experiments including blanks} in the +#' \emph{xcms} vignette for an example. +#' +#' @note +#' +#' These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{group}} methods. All of the settings to the alignment +#' algorithm can be passed with a \code{PeakGroupsParam} object. +#' +#' The matrix with the (raw) retention times of the peak groups used +#' in the alignment is added to the \code{peakGroupsMatrix} slot of the +#' \code{PeakGroupsParam} object that is stored into the corresponding +#' \emph{process history step} (see \code{\link{processHistory}} for how +#' to access the process history). #' #' @param minFraction \code{numeric(1)} between 0 and 1 defining the minimum #' required fraction of samples in which peaks for the peak group were #' identified. Peak groups passing this criteria will aligned across #' samples and retention times of individual spectra will be adjusted #' based on this alignment. For \code{minFraction = 1} the peak group -#' has to contain peaks in all samples of the experiment. -#' +#' has to contain peaks in all samples of the experiment. Note that if +#' \code{subset} is provided, the specified fraction is relative to the +#' defined subset of samples and not to the total number of samples within +#' the experiment (i.e. a peak has to be present in the specified +#' proportion of subset samples). +#' #' @param extraPeaks \code{numeric(1)} defining the maximal number of #' additional peaks for all samples to be assigned to a peak group (i.e. #' feature) for retention time correction. For a data set with 6 samples, @@ -1850,18 +1915,28 @@ NULL #' the peak groups on which the alignment should be performed. Each column #' represents a sample, each row a feature/peak group. Such a matrix is #' for example returned by the \code{\link{adjustRtimePeakGroups}} method. -#' +#' +#' @param subset \code{integer} with the indices of samples within the +#' experiment on which the alignment models should be estimated. Samples +#' not part of the subset are adjusted based on the closest subset sample. +#' See description above for more details. +#' +#' @param subsetAdjust \code{character} specifying the method with which +#' non-subset samples should be adjusted. Supported options are +#' \code{"previous"} and \code{"average"} (default). See description above +#' for more information. +#' #' @family retention time correction methods -#' +#' #' @seealso The \code{\link{do_adjustRtime_peakGroups}} core #' API function and \code{\link{retcor.peakgroups}} for the old user #' interface. #' \code{\link{plotAdjustedRtime}} for visualization of alignment results. -#' +#' #' @name adjustRtime-peakGroups #' #' @author Colin Smith, Johannes Rainer -#' +#' #' @references #' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and #' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite @@ -1875,37 +1950,26 @@ NULL #' peak groups present in most samples. #' Instances should be created with the \code{PeakGroupsParam} constructor. #' -#' @slot .__classVersion__,minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix,subset,subsetAdjust See corresponding parameter above. #' #' @rdname adjustRtime-peakGroups #' #' @examples -#' ############################## -#' ## Chromatographic peak detection and grouping. -#' ## -#' ## Below we perform first a peak detection (using the matchedFilter -#' ## method) on some of the test files from the faahKO package followed by -#' ## a peak grouping. -#' library(faahKO) -#' library(xcms) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' res <- faahko_sub #' -#' ## Perform the peak detection using the matchedFilter method. -#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -#' res <- findChromPeaks(raw_data, param = mfp) +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' head(chromPeaks(res)) #' ## The number of peaks identified per sample: #' table(chromPeaks(res)[, "sample"]) #' #' ## Performing the peak grouping using the "peak density" method. -#' p <- PeakDensityParam(sampleGroups = c(1, 1)) +#' p <- PeakDensityParam(sampleGroups = c(1, 1, 1)) #' res <- groupChromPeaks(res, param = p) #' #' ## Perform the retention time adjustment using peak groups found in both @@ -1924,7 +1988,7 @@ NULL #' segments(x0 = pkGrps[, 1], x1 = pkGrps[, 2], #' y0 = rep(1, nrow(pkGrps)), y1 = rep(2, nrow(pkGrps))) #' grid() -#' axis(side = 2, at = c(1, 2), labels = colnames(pkGrps)) +#' axis(side = 2, at = c(1, 2, 3), labels = colnames(pkGrps)) #' #' ## Next we perform the alignment. #' res <- adjustRtime(res, param = fgp) @@ -1933,15 +1997,13 @@ NULL #' hasFeatures(res) #' #' ## Plot the raw against the adjusted retention times. -#' plot(rtime(raw_data), rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) +#' plot(rtime(res, adjusted = FALSE), +#' rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) #' #' ## Adjusterd retention times can be accessed using #' ## rtime(object, adjusted = TRUE) and adjustedRtime #' all.equal(rtime(res), adjustedRtime(res)) #' -#' ## To get the raw, unadjusted retention times: -#' all.equal(rtime(res, adjusted = FALSE), rtime(raw_data)) -#' #' ## To extract the retention times grouped by sample/file: #' rts <- rtime(res, bySample = TRUE) setClass("PeakGroupsParam", @@ -1950,7 +2012,9 @@ setClass("PeakGroupsParam", smooth = "character", span = "numeric", family = "character", - peakGroupsMatrix = "matrix"), + peakGroupsMatrix = "matrix", + subset = "integer", + subsetAdjust = "character"), contains = "Param", prototype = prototype( minFraction = 0.9, @@ -1958,7 +2022,9 @@ setClass("PeakGroupsParam", smooth = "loess", span = 0.2, family = "gaussian", - peakGroupsMatrix = matrix(ncol = 0, nrow = 0) + peakGroupsMatrix = matrix(ncol = 0, nrow = 0), + subset = integer(), + subsetAdjust = "average" ), validity = function(object) { msg <- character() @@ -1989,25 +2055,57 @@ setClass("PeakGroupsParam", #' @title Align retention times across samples using Obiwarp #' -#' @description This method performs retention time adjustment using the -#' Obiwarp method [Prince 2006]. It is based on the code at -#' \url{http://obi-warp.sourceforge.net} but supports alignment of multiple -#' samples by aligning each against a \emph{center} sample. The alignment is -#' performed directly on the \code{\link{profile-matrix}} and can hence be -#' performed independently of the peak detection or peak grouping. +#' @description +#' +#' This method performs retention time adjustment using the +#' Obiwarp method [Prince 2006]. It is based on the code at +#' \url{http://obi-warp.sourceforge.net} but supports alignment of multiple +#' samples by aligning each against a \emph{center} sample. The alignment is +#' performed directly on the \code{\link{profile-matrix}} and can hence be +#' performed independently of the peak detection or peak grouping. +#' +#' It is also possible to exclude certain samples within an experiment from +#' the estimation of the alignment models. The parameter \code{subset} +#' allows to define the indices of samples within \code{object} that should +#' be aligned. Samples not part of this \code{subset} are left out in the +#' estimation of the alignment models, but their retention times are +#' subsequently adjusted based on the alignment results of the closest sample +#' in \code{subset} (close in terms of position within the \code{object}). +#' Alignment could thus be performed on only \emph{real} samples leaving out +#' e.g. blanks, which are then in turn adjusted based on the closest real +#' sample. Here it is up to the user to ensure that the samples within +#' \code{object} are ordered correctly (e.g. by injection index). +#' +#' How the non-subset samples are adjusted bases also on the parameter +#' \code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset +#' sample is adjusted based on the closest previous subset sample which results +#' in most cases with adjusted retention times of the non-subset sample being +#' identical to the subset sample on which the adjustment bases. The second, +#' default, option is to use \code{subsetAdjust = "average"} in which case +#' each non subset sample is adjusted based on the average retention time +#' adjustment from the previous and following subset sample. For the average +#' a weighted mean is used with weights being the inverse of the distance of +#' the non-subset sample to the subset samples used for alignment. +#' +#' See also section \emph{Alignment of experiments including blanks} in the +#' \emph{xcms} vignette for an example. +#' +#' @note +#' +#' These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{retcor}} methods. All of the settings to the alignment +#' algorithm can be passed with a \code{ObiwarpParam} object. #' -#' @note These methods and classes are part of the updated and modernized -#' \code{xcms} user interface which will eventually replace the -#' \code{\link{retcor}} methods. All of the settings to the alignment -#' algorithm can be passed with a \code{ObiwarpParam} object. -#' #' @param binSize \code{numeric(1)} defining the bin size (in mz dimension) #' to be used for the \emph{profile matrix} generation. See \code{step} #' parameter in \code{\link{profile-matrix}} documentation for more details. #' #' @param centerSample \code{integer(1)} defining the index of the center sample #' in the experiment. It defaults to -#' \code{floor(median(1:length(fileNames(object))))}. +#' \code{floor(median(1:length(fileNames(object))))}. Note that if +#' \code{subset} is used, the index passed with \code{centerSample} is +#' within these subset samples. #' #' @param response \code{numeric(1)} defining the \emph{responsiveness} of #' warping with \code{response = 0} giving linear warping on start and end @@ -2043,16 +2141,18 @@ setClass("PeakGroupsParam", #' #' @param initPenalty \code{numeric(1)} defining the penalty for initiating an #' alignment (for local alignment only). -#' +#' +#' @inheritParams adjustRtime-peakGroups +#' #' @family retention time correction methods -#' +#' #' @seealso \code{\link{retcor.obiwarp}} for the old user interface. #' \code{\link{plotAdjustedRtime}} for visualization of alignment results. #' #' @name adjustRtime-obiwarp #' #' @author Colin Smith, Johannes Rainer -#' +#' #' @references #' John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of #' ESI-LC-MS Proteomics Data Sets by Ordered Bijective Interpolated Warping" @@ -2066,49 +2166,32 @@ NULL #' method. Class Instances should be created using the #' \code{ObiwarpParam} constructor. #' -#' @slot .__classVersion__,binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty See corresponding parameter above. \code{.__classVersion__} stores -#' the version from the class. Slots values should exclusively be accessed -#' \emph{via} the corresponding getter and setter methods listed above. +#' @slot binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty,subset,subsetAdjust See +#' corresponding parameter above. #' #' @rdname adjustRtime-obiwarp #' #' @examples -#' library(faahKO) -#' library(MSnbase) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") #' -#' ## Perform retention time correction on the OnDiskMSnExp: -#' res <- adjustRtime(raw_data, param = ObiwarpParam()) -#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Perform retention time correction: +#' res <- adjustRtime(faahko_sub, param = ObiwarpParam()) +#' #' ## As a result we get a numeric vector with the adjusted retention times for #' ## all spectra. #' head(res) #' #' ## We can split this by file to get the adjusted retention times for each #' ## file -#' resL <- split(res, fromFile(raw_data)) -#' -#' ############################## -#' ## Perform retention time correction on an XCMSnExp: -#' ## -#' ## Perform first the chromatographic peak detection using the matchedFilter -#' ## method. -#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -#' res <- findChromPeaks(raw_data, param = mfp) -#' -#' ## Performing the retention time adjustment using obiwarp. -#' res_2 <- adjustRtime(res, param = ObiwarpParam()) +#' resL <- split(res, fromFile(res)) #' -#' head(rtime(res_2)) -#' head(rtime(raw_data)) -#' -#' ## Also the retention times of the detected peaks were adjusted. -#' tail(chromPeaks(res)) -#' tail(chromPeaks(res_2)) setClass("ObiwarpParam", slots = c(binSize = "numeric", centerSample = "integer", @@ -2119,7 +2202,9 @@ setClass("ObiwarpParam", factorDiag = "numeric", factorGap = "numeric", localAlignment = "logical", - initPenalty = "numeric"), + initPenalty = "numeric", + subset = "integer", + subsetAdjust = "character"), contains = "Param", prototype = prototype( binSize = 1, @@ -2131,7 +2216,9 @@ setClass("ObiwarpParam", factorDiag = 2, factorGap = 1, localAlignment = FALSE, - initPenalty = 0), + initPenalty = 0, + subset = integer(), + subsetAdjust = "average"), validity = function(object) { msg <- character() if (length(object@binSize) > 1 | @@ -2176,11 +2263,9 @@ setClass("ObiwarpParam", else TRUE }) -#' @description The \code{FillChromPeaksParam} object encapsules all settings for -#' the signal integration for missing peaks. -#' -#' @slot .__classVersion__,expandMz,expandRt,ppm,fixedMz,fixedRt See corresponding parameter above. \code{.__classVersion__} stores the version of the class. -#' +#' @slot expandMz,expandRt,ppm,fixedMz,fixedRt See corresponding parameter +#' above. +#' #' @rdname fillChromPeaks setClass("FillChromPeaksParam", slots = c(expandMz = "numeric", @@ -2208,13 +2293,24 @@ setClass("FillChromPeaksParam", if (length(object@fixedMz) > 1) msg <- c(msg, "'fixedMz' has to be a numeric of length 1") if (length(object@fixedRt) > 1) - msg <- c(msg, "'fixedRt' has to be a numeric of length 1") + msg <- c(msg, "'fixedRt' has to be a numeric of length 1") if (length(msg)) msg else TRUE } ) +#' @rdname fillChromPeaks +#' +#' @slot rtmin,rtmax,mzmin,mzmax See corresponding parameter above. +setClass("ChromPeakAreaParam", + slots = c(rtmin = "function", + rtmax = "function", + mzmin = "function", + mzmax = "function"), + contains = "Param" + ) + #' @aliases MsFeatureData #' #' @title Data container storing xcms preprocessing results @@ -2228,9 +2324,9 @@ setClass("FillChromPeaksParam", #' sample. #' #' @noRd -#' +#' #' @rdname XCMSnExp-class -setClass("MsFeatureData", contains = c("environment", "Versioned"), +setClass("MsFeatureData", contains = c("environment"), prototype = prototype(.xData = new.env(parent = emptyenv()))) .REQ_PEAKS_COLS <- c("mz", "mzmin", "mzmax", "rt", "rtmin", @@ -2238,8 +2334,8 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), .REQ_PEAKG_COLS <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", "peakidx") -#' @aliases XCMSnExp -#' +#' @aliases XCMSnExp updateObject,XCMSnExp-method +#' #' @title Data container storing xcms preprocessing results #' #' @description @@ -2248,7 +2344,7 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' data preprocessing that comprises chromatographic peak detection, alignment #' and correspondence. These results can be accessed with the \code{chromPeaks}, #' \code{adjustedRtime} and \code{featureDefinitions} functions; see below -#' (after the Usage, Arguments, Value and Slots sections) for more details). +#' (after the Usage, Arguments, Value and Slots sections) for more details). #' Along with the results, the object contains the processing history that #' allows to track each processing step along with the used settings. This #' can be extracted with the \code{\link{processHistory}} method. @@ -2262,8 +2358,10 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' preprocessing results will be passed along to the resulting #' \code{xcmsSet} object. #' -#' General functions for \code{XCMSnExp} objects are: -#' +#' General functions for \code{XCMSnExp} objects are (see further below for +#' specific function to handle chromatographic peak data, alignment and +#' correspondence results): +#' #' @section Chromatographic peak data: #' #' Chromatographic peak data is added to an \code{XCMSnExp} object by the @@ -2293,10 +2391,10 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' #' \item \code{\link{highlightChromPeaks}} add chromatographic peaks to an #' existing plot of a \code{\link{Chromatogram}} (see respective help page). -#' +#' #' } -#' -#' +#' +#' #' @section Adjusted retention times: #' #' Adjusted retention times are stored in an \code{XCMSnExp} object besides the @@ -2317,12 +2415,12 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' #' \item \code{\link{applyAdjustedRtime}} replace the raw retention times with #' the adjusted ones (see respective help page). -#' +#' #' \item \code{\link{plotAdjustedRtime}} plot differences between adjusted and #' raw retention times (see respective help page). -#' +#' #' } -#' +#' #' #' @section Correspondence results, features: #' @@ -2347,11 +2445,14 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' \item \code{\link{featureSummary}} perform a simple summary of the defined #' features (see respective help page). #' -#' \item \code{link{overlappingFeatures}} identify features that are +#' \item \code{\link{overlappingFeatures}} identify features that are #' overlapping or close in the m/z - rt space (see respective help page). -#' +#' +#' \item \code{\link{quantify}} extract feature intensities and put them, along +#' with feature definitions and phenodata information, into a +#' \code{\link{SummarizedExperiment}}. See help page for details. #' } -#' +#' #' @note The \code{"chromPeaks"} element in the \code{msFeatureData} slot is #' equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the #' \code{"featureDefinitions"} contains information from the \code{@groups} @@ -2383,6 +2484,7 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' detected peaks. See return value for the \code{chromPeaks} method for the #' expected format. #' +#' #' @author Johannes Rainer #' #' @seealso \code{\linkS4class{xcmsSet}} for the old implementation. @@ -2391,7 +2493,7 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' #' \code{\link{findChromPeaks}} for available peak detection methods #' returning a \code{XCMSnExp} object as a result. -#' +#' #' \code{\link{groupChromPeaks}} for available peak grouping #' methods and \code{\link{featureDefinitions}} for the method to extract #' the feature definitions representing the peak grouping results. @@ -2399,45 +2501,53 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' #' \code{\link{chromatogram}} to extract MS data as #' \code{\link{Chromatogram}} objects. -#' +#' #' \code{\link{as}} (\code{as(x, "data.frame")}) in the \code{MSnbase} #' package for the method to extract MS data as \code{data.frame}s. #' #' \code{\link{featureSummary}} to calculate basic feature summaries. +#' +#' \code{\link{featureChromatograms}} to extract chromatograms for each +#' feature. +#' +#' \code{\link{chromPeakSpectra}} to extract MS2 spectra with the m/z of +#' the precursor ion within the m/z range of a peak and a retention time +#' within its retention time range. +#' +#' \code{\link{featureSpectra}} to extract MS2 spectra associated with +#' identified features. +#' #' @rdname XCMSnExp-class #' #' @examples #' -#' ## Loading the data from 2 files of the faahKO package. -#' library(faahKO) -#' od <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' system.file("cdf/KO/ko16.CDF", package = "faahKO")), -#' mode = "onDisk") -#' ## Now we perform a chromatographic peak detection on this data set using the -#' ## matched filter method. We are tuning the settings such that it performs -#' ## faster. -#' mfp <- MatchedFilterParam(binSize = 6) -#' xod <- findChromPeaks(od, param = mfp) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' ## The results from the peak detection are now stored in the XCMSnExp #' ## object -#' xod +#' faahko_sub #' #' ## The detected peaks can be accessed with the chromPeaks method. -#' head(chromPeaks(xod)) +#' head(chromPeaks(faahko_sub)) #' #' ## The settings of the chromatographic peak detection can be accessed with #' ## the processHistory method -#' processHistory(xod) +#' processHistory(faahko_sub) #' #' ## Also the parameter class for the peak detection can be accessed -#' processParam(processHistory(xod)[[1]]) +#' processParam(processHistory(faahko_sub)[[1]]) #' #' ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes #' ## defined in Bioconductor's MSnbase package. To access the (raw) retention #' ## time for each spectrum we can use the rtime method. Setting bySample = TRUE #' ## would cause the retention times to be grouped by sample -#' head(rtime(xod)) +#' head(rtime(faahko_sub)) #' #' ## Similarly it is possible to extract the mz values or the intensity values #' ## using the mz and intensity method, respectively, also with the option to @@ -2446,15 +2556,15 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' ## spectra method which returns Spectrum objects containing all raw data. #' ## Note that all these methods read the information from the original input #' ## files and subsequently apply eventual data processing steps to them. -#' mzs <- mz(xod, bySample = TRUE) +#' mzs <- mz(faahko_sub, bySample = TRUE) #' length(mzs) #' lengths(mzs) #' #' ## The full data could also be read using the spectra data, which returns #' ## a list of Spectrum object containing the mz, intensity and rt values. -#' ## spctr <- spectra(xod) +#' ## spctr <- spectra(faahko_sub) #' ## To get all spectra of the first file we can split them by file -#' ## head(split(spctr, fromFile(xod))[[1]]) +#' ## head(split(spctr, fromFile(faahko_sub))[[1]]) #' #' ############ #' ## Filtering @@ -2464,19 +2574,17 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' ## retention time correction and peak grouping results) will be dropped. #' ## Below we filter the XCMSnExp object by file to extract the results for #' ## only the second file. -#' xod_2 <- filterFile(xod, file = 2) +#' xod_2 <- filterFile(faahko_sub, file = 2) #' xod_2 #' #' ## Now the objects contains only the idenfified peaks for the second file #' head(chromPeaks(xod_2)) #' -#' head(chromPeaks(xod)[chromPeaks(xod)[, "sample"] == 2, ]) -#' #' ########## #' ## Coercing to an xcmsSet object #' ## #' ## We can also coerce the XCMSnExp object into an xcmsSet object: -#' xs <- as(xod, "xcmsSet") +#' xs <- as(faahko_sub, "xcmsSet") #' head(peaks(xs)) setClass("XCMSnExp", slots = c( @@ -2499,7 +2607,6 @@ setClass("XCMSnExp", "objects are allowed in slot ", ".processHistory!")) } - ## TODO @jo add checks: ## 1) call validMsFeatureData msg <- c(msg, validateMsFeatureData(object@msFeatureData)) if (length(msg)) return(msg) @@ -2513,6 +2620,9 @@ setClass("XCMSnExp", "peaks in the 'chromPeaks' ", "element of the msFeatureData ", "slot!")) + if (!any(ls(object@msFeatureData) == "chromPeakData")) + return(paste0("Missing 'chromPeakData'. Please update", + " the object with 'updateObject'")) } ## 3) Check that the length of the adjustedRtime matches! if (any(ls(object@msFeatureData) == "adjustedRtime")) { @@ -2542,46 +2652,28 @@ setClass("XCMSnExp", } ) -.CPEAKS_CHROMPEAKS_REQ_NAMES <- c("row", "col", "rt", "rtmin", "rtmax", "into", - "maxo", "sn") -## Info: (issue #281) -## Store peaks per chromatogram in a matrix with the same dimension than -## Chromatograms. This makes subetting etc much easier. chromPeaks,CPeaks -## could then return a matrix in the format as chromPeaks,XCMSnExp does, -## and a chromPeaksMatrix would return the matrix with the same dimensions. -setClass("CPeaks", - slots = c( - .processHistory = "list", - chromPeaks = "matrix" - ), +.CHROMPEAKS_REQ_NAMES <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn") +.CHROMPEAKDATA_REQ_NAMES <- c("ms_level", "is_filled") +setClass("XChromatogram", + slots = c(chromPeaks = "matrix", + chromPeakData = "DataFrame"), prototype = prototype( - .processHistory = list(), - chromPeaks = matrix(nrow = 0, - ncol = 0) + chromPeaks = matrix(nrow = 0, ncol = length(.CHROMPEAKS_REQ_NAMES), + dimnames = list(character(), + .CHROMPEAKS_REQ_NAMES)), + chromPeakData = DataFrame(ms_level = integer(), + is_filled = logical()) ), - contains = c("Chromatograms"), - validity = function(object) { - ## TODO @jo - ## 1) chromPeaks has to have the required columns. - ## 2) if processHistory not empty -> has to extend ProcessHistory - ## 3) if nrow(chromPeaks) > 0 row and column have to match the - ## dimension of object - msg <- character() - ## if (length(object@.processHistory) > 0) { - ## isOK <- unlist(lapply(object@.processHistory, function(z) { - ## return(inherits(z, "ProcessHistory")) - ## })) - ## if (!all(isOK)) - ## msg <- c(msg, paste0("Only 'ProcessHistory' ", - ## "objects are allowed in slot ", - ## ".processHistory!")) - ## } - if (length(msg)) - msg - else TRUE - } -) + contains = "Chromatogram", + validity = .validXChromatogram) +setClass("XChromatograms", + slots = c(.processHistory = "list", + featureDefinitions = "DataFrame"), + prototype = prototype(.processHistory = list(), + featureDefinitions = DataFrame()), + contains = "MChromatograms", + validity = .validXChromatograms) #' @aliases mz,CalibrantMassParam #' @@ -2610,7 +2702,7 @@ setClass("CPeaks", #' @param method `character(1)` defining the method that should be used to #' estimate the calibration function. Can be `"shift"`, `"linear"` (default) #' or `"edgeshift"`. -#' +#' #' @details The method does first identify peaks that are close to the provided #' mz values and, given that there difference to the calibrants is smaller #' than the user provided cut off (based on arguments `mzabs` and `mzppm`), @@ -2619,7 +2711,7 @@ setClass("CPeaks", #' or estimated by a linear model through all calibrants. #' Peaks are considered close to a calibrant mz if the difference between #' the calibrant and its mz is `<= mzabs + mz * mzppm /1e6`. -#' +#' #' **Adjustment methods**: adjustment function/factor is estimated using #' the difference between calibrant and peak mz values only for peaks #' that are close enough to the calibrants. The availabel methods are: @@ -2633,7 +2725,7 @@ setClass("CPeaks", #' #' For more information, details and examples refer to the #' *xcms-direct-injection* vignette. -#' +#' #' @note `CalibrantMassParam` classes don't have exported getter or setter #' methods. #' @@ -2641,11 +2733,11 @@ setClass("CPeaks", #' For `calibrate`: an [XCMSnExp] object with chromatographic peaks being #' calibrated. **Be aware** that the actual raw mz values are not (yet) #' calibrated, but **only** the identified chromatographic peaks. -#' +#' #' @author Joachim Bargsten, Johannes Rainer #' #' @md -#' +#' #' @rdname calibrate-calibrant-mass setClass("CalibrantMassParam", slots = c( @@ -2697,3 +2789,75 @@ setClass("CalibrantMassParam", else TRUE }) + +setClass("CleanPeaksParam", + slots = c(maxPeakwidth = "numeric"), + contains = "Param", + prototype = prototype( + maxPeakwidth = 10), + validity = function(object) { + msg <- character() + if (length(object@maxPeakwidth) > 1 || object@maxPeakwidth < 0) + msg <- c(msg, paste0("'maxPeakwidth' has to be a positive ", + "number of length 1")) + if (length(msg)) + msg + else TRUE + }) + +setClass("MergeNeighboringPeaksParam", + slots = c(expandRt = "numeric", + expandMz = "numeric", + ppm = "numeric", + minProp = "numeric"), + contains = "Param", + prototype = prototype( + expandRt = 2.0, + expandMz = 0.0, + ppm = 10.0, + minProp = 0.75), + validity = function(object) { + msg <- character() + if (length(object@expandRt) > 1 || !is.finite(object@expandRt)) + msg <- c(msg, paste0("'expandRt' has to be a (defined) ", + "numeric of length 1")) + if (length(object@expandMz) > 1 || !is.finite(object@expandMz)) + msg <- c(msg, paste0("'expandMz' has to be a (defined) ", + "numeric of length 1")) + if (length(object@ppm) > 1 || !is.finite(object@ppm) || + object@ppm < 0) + msg <- c(msg, paste0("'ppm' has to be a positive numeric ", + "of length 1")) + if (length(object@minProp) > 1 || !is.finite(object@minProp) || + object@minProp < 0) + msg <- c(msg, paste0("'minProp' has to be a positive ", + "number of length 1")) + if (length(msg)) + msg + else TRUE + }) + +setClass("FilterIntensityParam", + slots = c(threshold = "numeric", + nValues = "integer", + value = "character"), + contains = "Param", + prototype = prototype( + threshold = 0, + nValues = 1L, + value = "maxo"), + validity = function(object) { + msg <- character() + if (length(object@threshold) > 1 || object@threshold < 0) + msg <- c(msg, paste0("'threshold' has to be a positive ", + "number of length 1")) + if (length(object@nValues) > 1 || object@nValues < 1) + msg <- c(msg, paste0("'nValues' has to be a positive ", + "number of length 1")) + if (length(object@value) > 1) + msg <- c(msg, paste0("'value' has to be a character ", + "of length 1")) + if (length(msg)) + msg + else TRUE + }) diff --git a/R/Deprecated.R b/R/Deprecated.R index 69ba6bbd9..f331fbd12 100644 --- a/R/Deprecated.R +++ b/R/Deprecated.R @@ -1,5 +1,5 @@ -## Put all deprecated methods/functions in here, so they will be -## easier to defunct/remove later. +## ## Put all deprecated methods/functions in here, so they will be +## ## easier to defunct/remove later. xcmsParallelSetup <- function(nSlaves) { .Deprecated(msg = "Use of 'xcmsParallelSetup' is deprecated! Use 'BPPARAM' arguments instead.") @@ -333,105 +333,104 @@ xcmsClusterApply <- function(cl, x, fun, msgfun=NULL, ...) { } -setMethod("extractChromatograms", - signature(object = "OnDiskMSnExp"), - function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { - .Deprecated(msg = paste0("Use of 'extractChromatograms' is ", - "deprecated, please use 'chromatogram' ", - "instead.")) - chromatogram(object, rt = rt, mz = mz, - aggregationFun = aggregationFun, missing = missing) - }) - -plotChromatogram <- function(x, rt, col = "#00000060", - lty = 1, type = "l", xlab = "retention time", - ylab = "intensity", main = NULL, ...) { - .Deprecated(msg = paste0("Use of 'plotChromatogram' is deprecated, please ", - "use 'plot' instead.")) - if (!is.list(x) & !is(x, "Chromatogram")) - stop("'x' should be a Chromatogram object or a list of Chromatogram", - " objects.") - if (is(x, "Chromatogram")) - x <- list(x) - isOK <- lapply(x, function(z) { - if (is(z, "Chromatogram")) { - return(TRUE) - } else { - if (is.na(z)) - return(TRUE) - } - FALSE - }) - if (any(!unlist(isOK))) - stop("if 'x' is a list it should only contain Chromatogram objects") - ## Subset the Chromatogram objects if rt provided. - if (!missing(rt)) { - rt <- range(rt) - x <- lapply(x, function(z) { - if (is(z, "Chromatogram")) - filterRt(z, rt = rt) - }) - } - if (length(col) != length(x)) { - col <- rep(col[1], length(x)) - } - ## If main is NULL use the mz range. - if (is.null(main)) { - mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) - main <- paste0(format(mzr, digits = 7), collapse = " - ") - } - ## Number of measurements we've got per chromatogram. This can be different - ## between samples, from none (if not a single measurement in the rt/mz) - ## to the number of data points that were actually measured. - lens <- unique(lengths(x)) - max_len <- max(lens) - max_len_vec <- rep_len(NA, max_len) - ## Generate the matrix of rt values, columns are samples, rows retention - ## time values. Fill each column with NAs up to the maximum number of values - ## we've got in a sample/file. - rts <- do.call(cbind, lapply(x, function(z) { - cur_len <- length(z) - if (cur_len == 0) - max_len_vec - else { - ## max_len_vec[,] <- NA ## don't need that. get's copied. - max_len_vec[seq_len(cur_len)] <- rtime(z) - max_len_vec - } - })) - ## Same for the intensities. - ints <- do.call(cbind, lapply(x, function(z) { - cur_len <- length(z) - if (length(z) == 0) - max_len_vec - else { - ## max_len_vec[,] <- NA ## don't need that. get's copied. - max_len_vec[seq_len(cur_len)] <- intensity(z) - max_len_vec - } - })) - ## Define the x and y limits - x_lim <- c(0, 1) - y_lim <- c(0, 1) - if (all(is.na(rts))) - if (!missing(rt)) - x_lim <- range(rt) - else - x_lim <- range(rts, na.rm = TRUE, finite = TRUE) - if (!all(is.na(ints))) - y_lim <- range(ints, na.rm = TRUE, finite = TRUE) - ## Identify columns that have only NAs in either intensity or rt - these - ## will not be plotted. - keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | - apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) - ## Finally plot the data. - if (length(keepCol)) { - matplot(x = rts[, keepCol, drop = FALSE], - y = ints[, keepCol, drop = FALSE], type = type, lty = lty, - col = col[keepCol], xlab = xlab, ylab = ylab, main = main, - ...) - } else - plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, - xlim = x_lim, ylim = y_lim) -} - +## setMethod("extractChromatograms", +## signature(object = "OnDiskMSnExp"), +## function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { +## .Deprecated(msg = paste0("Use of 'extractChromatograms' is ", +## "deprecated, please use 'chromatogram' ", +## "instead.")) +## chromatogram(object, rt = rt, mz = mz, +## aggregationFun = aggregationFun, missing = missing) +## }) + +## plotChromatogram <- function(x, rt, col = "#00000060", +## lty = 1, type = "l", xlab = "retention time", +## ylab = "intensity", main = NULL, ...) { +## .Deprecated(msg = paste0("Use of 'plotChromatogram' is deprecated, please ", +## "use 'plot' instead.")) +## if (!is.list(x) & !is(x, "Chromatogram")) +## stop("'x' should be a Chromatogram object or a list of Chromatogram", +## " objects.") +## if (is(x, "Chromatogram")) +## x <- list(x) +## isOK <- lapply(x, function(z) { +## if (is(z, "Chromatogram")) { +## return(TRUE) +## } else { +## if (is.na(z)) +## return(TRUE) +## } +## FALSE +## }) +## if (any(!unlist(isOK))) +## stop("if 'x' is a list it should only contain Chromatogram objects") +## ## Subset the Chromatogram objects if rt provided. +## if (!missing(rt)) { +## rt <- range(rt) +## x <- lapply(x, function(z) { +## if (is(z, "Chromatogram")) +## filterRt(z, rt = rt) +## }) +## } +## if (length(col) != length(x)) { +## col <- rep(col[1], length(x)) +## } +## ## If main is NULL use the mz range. +## if (is.null(main)) { +## mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) +## main <- paste0(format(mzr, digits = 7), collapse = " - ") +## } +## ## Number of measurements we've got per chromatogram. This can be different +## ## between samples, from none (if not a single measurement in the rt/mz) +## ## to the number of data points that were actually measured. +## lens <- unique(lengths(x)) +## max_len <- max(lens) +## max_len_vec <- rep_len(NA, max_len) +## ## Generate the matrix of rt values, columns are samples, rows retention +## ## time values. Fill each column with NAs up to the maximum number of values +## ## we've got in a sample/file. +## rts <- do.call(cbind, lapply(x, function(z) { +## cur_len <- length(z) +## if (cur_len == 0) +## max_len_vec +## else { +## ## max_len_vec[,] <- NA ## don't need that. get's copied. +## max_len_vec[seq_len(cur_len)] <- rtime(z) +## max_len_vec +## } +## })) +## ## Same for the intensities. +## ints <- do.call(cbind, lapply(x, function(z) { +## cur_len <- length(z) +## if (length(z) == 0) +## max_len_vec +## else { +## ## max_len_vec[,] <- NA ## don't need that. get's copied. +## max_len_vec[seq_len(cur_len)] <- intensity(z) +## max_len_vec +## } +## })) +## ## Define the x and y limits +## x_lim <- c(0, 1) +## y_lim <- c(0, 1) +## if (all(is.na(rts))) +## if (!missing(rt)) +## x_lim <- range(rt) +## else +## x_lim <- range(rts, na.rm = TRUE, finite = TRUE) +## if (!all(is.na(ints))) +## y_lim <- range(ints, na.rm = TRUE, finite = TRUE) +## ## Identify columns that have only NAs in either intensity or rt - these +## ## will not be plotted. +## keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | +## apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) +## ## Finally plot the data. +## if (length(keepCol)) { +## matplot(x = rts[, keepCol, drop = FALSE], +## y = ints[, keepCol, drop = FALSE], type = type, lty = lty, +## col = col[keepCol], xlab = xlab, ylab = ylab, main = main, +## ...) +## } else +## plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, +## xlim = x_lim, ylim = y_lim) +## } diff --git a/R/cwTools.R b/R/cwTools.R index 527818e76..371d1fbcf 100644 --- a/R/cwTools.R +++ b/R/cwTools.R @@ -1,4 +1,4 @@ -MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") +MSW.cwt <- function (ms, scales = 1, wavelet = "mexh", extendLengthMSW = FALSE) { ## modified from package MassSpecWavelet if (wavelet == "mexh") { psi_xval <- seq(-6, 6, length = 256) @@ -22,7 +22,20 @@ MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") stop("Unsupported wavelet!") } oldLen <- length(ms) - ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2) + # IF extendLengthMSW is TRUE: + # The new length is determined by the scales argument, so a larger peakwidth + # will ensure more scales are run, but may slow it down. See + # https://github.com/sneumann/xcms/issues/445 for more information about + # a change from using extendNBase to extendLength. + if(extendLengthMSW){ + newLen <- 2^(ceiling(log2(max(scales)*12))) + ms <- MSW.extendLength(x = ms, addLength = (newLen-length(ms)), + method = "open") + } else { + ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2) + } + + len <- length(ms) nbscales <- length(scales) wCoefs <- NULL @@ -53,6 +66,9 @@ MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") wCoefs } +# This function is no longer used by MSW.cwt(): see above note about the +# switch from extendNBase to calling extendLength directly. +# Possibly now unecessary? MSW.extendNBase <- function(x, nLevel=1, base=2, ...) { ## from package MassSpecWavelet if (!is.matrix(x)) x <- matrix(x, ncol=1) diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index 956732f99..7a6a7e4e0 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -4,34 +4,45 @@ #' @title Align spectrum retention times across samples using peak groups #' found in most samples #' -#' @description The function performs retention time correction by assessing -#' the retention time deviation across all samples using peak groups -#' (features) containg chromatographic peaks present in most/all samples. -#' The retention time deviation for these features in each sample is -#' described by fitting either a polynomial (\code{smooth = "loess"}) or -#' a linear (\code{smooth = "linear"}) model to the data points. The -#' models are subsequently used to adjust the retention time for each -#' spectrum in each sample. +#' @description +#' +#' The function performs retention time correction by assessing +#' the retention time deviation across all samples using peak groups +#' (features) containg chromatographic peaks present in most/all samples. +#' The retention time deviation for these features in each sample is +#' described by fitting either a polynomial (\code{smooth = "loess"}) or +#' a linear (\code{smooth = "linear"}) model to the data points. The +#' models are subsequently used to adjust the retention time for each +#' spectrum in each sample. #' #' @note The method ensures that returned adjusted retention times are #' increasingly ordered, just as the raw retention times. -#' -#' @details The alignment bases on the presence of compounds that can be found -#' in all/most samples of an experiment. The retention times of individual -#' spectra are then adjusted based on the alignment of the features -#' corresponding to these \emph{house keeping compounds}. The paraneters -#' \code{minFraction} and \code{extraPeaks} can be used to fine tune which -#' features should be used for the alignment (i.e. which features -#' most likely correspond to the above mentioned house keeping compounds). +#' +#' @details +#' +#' The alignment bases on the presence of compounds that can be found +#' in all/most samples of an experiment. The retention times of individual +#' spectra are then adjusted based on the alignment of the features +#' corresponding to these \emph{house keeping compounds}. The paraneters +#' \code{minFraction} and \code{extraPeaks} can be used to fine tune which +#' features should be used for the alignment (i.e. which features +#' most likely correspond to the above mentioned house keeping compounds). +#' +#' Parameter \code{subset} allows to define a subset of samples within the +#' experiment that should be aligned. All samples not being part of the subset +#' will be aligned based on the adjustment of the closest sample within the +#' subset. This allows to e.g. exclude blank samples from the alignment process +#' with their retention times being still adjusted based on the alignment +#' results of the \emph{real} samples. #' #' @inheritParams adjustRtime-peakGroups -#' +#' #' @param peaks a \code{matrix} or \code{data.frame} with the identified #' chromatographic peaks in the samples. #' #' @param peakIndex a \code{list} of indices that provides the grouping #' information of the chromatographic peaks (across and within samples). -#' +#' #' @param rtime a \code{list} of \code{numeric} vectors with the retention #' times per file/sample. #' @@ -41,7 +52,7 @@ #' this matrix will be determined depending on parameters #' \code{minFraction} and \code{extraPeaks}. If provided, #' \code{minFraction} and \code{extraPeaks} will be ignored. -#' +#' #' @return A \code{list} with \code{numeric} vectors with the adjusted #' retention times grouped by sample. #' @@ -58,8 +69,10 @@ do_adjustRtime_peakGroups <- function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1, smooth = c("loess", "linear"), span = 0.2, family = c("gaussian", "symmetric"), - peakGroupsMatrix = matrix(ncol = 0, nrow = 0)) + peakGroupsMatrix = matrix(ncol = 0, nrow = 0), + subset = integer(), subsetAdjust = c("average", "previous")) { + subsetAdjust <- match.arg(subsetAdjust) ## Check input. if (missing(peaks) | missing(peakIndex) | missing(rtime)) stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!") @@ -69,15 +82,14 @@ do_adjustRtime_peakGroups <- if (any(minFraction > 1) | any(minFraction < 0)) stop("'minFraction' has to be between 0 and 1!") ## Check peaks: - OK <- .validChromPeaksMatrix(peaks) + OK <- xcms:::.validChromPeaksMatrix(peaks) if (is.character(OK)) stop(OK) ## Check peakIndex: if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks))))) stop("Some indices listed in 'peakIndex' are outside of ", "1:nrow(peaks)!") - ## Check rtime: in line with the total number of samples we've got in - ## peaks? + ## Check rtime: if (!is.list(rtime)) stop("'rtime' should be a list of numeric vectors with the retention ", "times of the spectra per sample!") @@ -86,10 +98,23 @@ do_adjustRtime_peakGroups <- "times of the spectra per sample!") if (length(rtime) != max(peaks[, "sample"])) stop("The length of 'rtime' does not match with the total number of ", - "samples according to the 'peaks' matrix!") - nSamples <- length(rtime) + "samples according to the 'peaks' matrix!") + total_samples <- length(rtime) + if (length(subset)) { + if (!is.numeric(subset)) + stop("If provided, 'subset' is expected to be an integer") + if (!all(subset %in% seq_len(total_samples))) + stop("One or more indices in 'subset' are out of range.") + if (length(subset) < 2) + stop("Length of 'subset' too small: minimum required samples for ", + "alignment is 2.") + } else subset <- seq_len(total_samples) ## Translate minFraction to number of allowed missing samples. + nSamples <- length(subset) missingSample <- nSamples - (nSamples * minFraction) + ## Remove peaks not present in "subset" from the peakIndex + peaks_in_subset <- which(peaks[, "sample"] %in% subset) + peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset]) ## Check if we've got a valid peakGroupsMatrix ## o Same number of samples. ## o range of rt values is within the rtime. @@ -104,44 +129,14 @@ do_adjustRtime_peakGroups <- " the retention time range of the experiment!") rt <- peakGroupsMatrix } else - rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, nSamples, + rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset, missingSample, extraPeaks) ## Fix for issue #175 if (length(rt) == 0) stop("No peak groups found in the data for the provided settings") - ## ## Check if we have peak groups with almost the same retention time. If yes - ## ## select the best matching peaks among these. - ## rtmeds <- rowMedians(rt, na.rm = TRUE) - ## sim_rt <- which(diff(rtmeds) < 1e-6) - ## if (length(sim_rt)) { - ## pk_grps <- list() - ## current_idxs <- NULL - ## last_idx <- -1 - ## for (current_idx in sim_rt) { - ## if ((current_idx - last_idx) > 1) { - ## if (!is.null(current_idxs)) - ## pk_grps <- c(pk_grps, list(current_idxs)) - ## current_idxs <- c(current_idx - 1, current_idx) - ## } else { - ## ## Just add the index. - ## current_idxs <- c(current_idxs, current_idx) - ## } - ## last_idx <- current_idx - ## } - ## pk_grps <- c(pk_grps, list(current_idxs)) - ## ## Now, for each of these select one present in most samples. - ## sel_idx <- unlist(lapply(pk_grps, function(z) { - ## tmp <- rt[z, , drop = FALSE] - ## z[which.max(apply(tmp, MARGIN = 1, function(zz) sum(!is.na(zz))))] - ## })) - ## ## Define the other peaks that we can keep as.is - ## if (any(!(1:nrow(rt) %in% unique(unlist(pk_grps))))) - ## spec_idx <- (1:nrow(rt))[-unique(unlist(pk_grps))] - ## else spec_idx <- NULL - ## sel_idx <- sort(c(spec_idx, sel_idx)) - ## rt <- rt[sel_idx, , drop = FALSE] - ## } - + if (ncol(rt) != length(subset)) + stop("Length of 'subset' and number of columns of the peak group ", + "matrix do not match.") message("Performing retention time correction using ", nrow(rt), " peak groups.") @@ -168,8 +163,11 @@ do_adjustRtime_peakGroups <- rtdevrange <- range(rtdev, na.rm = TRUE) warn.overcorrect <- FALSE warn.tweak.rt <- FALSE - - for (i in 1:nSamples) { + + rtime_adj <- rtime + ## Adjust samples in subset. + for (i in seq_along(subset)) { + i_all <- subset[i] # Index of sample in whole dataset. pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) ## order the data.frame such that rt and rtdev are increasingly ordered. @@ -178,27 +176,28 @@ do_adjustRtime_peakGroups <- if (smooth == "loess") { lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, degree = 1, family = family)) - - rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) + + rtdevsmo[[i]] <- xcms:::na.flatfill( + predict(lo, data.frame(rt = rtime[[i_all]]))) ## Remove singularities from the loess function rtdevsmo[[i]][abs(rtdevsmo[[i]]) > quantile(abs(rtdevsmo[[i]]), 0.9, na.rm = TRUE) * 2] <- NA if (length(naidx <- which(is.na(rtdevsmo[[i]])))) rtdevsmo[[i]][naidx] <- suppressWarnings( - approx(na.omit(data.frame(rtime[[i]], rtdevsmo[[i]])), - xout = rtime[[i]][naidx], rule = 2)$y + approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])), + xout = rtime[[i_all]][naidx], rule = 2)$y ) ## Check if there are adjusted retention times that are not ordered ## increasingly. If there are, search for each first unordered rt ## the next rt that is larger and linearly interpolate the values ## in between (see issue #146 for an illustration). - while (length(decidx <- which(diff(rtime[[i]] - rtdevsmo[[i]]) < 0))) { + while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) { warn.tweak.rt <- TRUE ## Warn that we had to tweak the rts. - rtadj <- rtime[[i]] - rtdevsmo[[i]] + rtadj <- rtime[[i_all]] - rtdevsmo[[i]] rtadj_start <- rtadj[decidx[1]] ## start interpolating from here - ## Define the + ## Define the next_larger <- which(rtadj > rtadj[decidx[1]]) if (length(next_larger) == 0) { ## Fix if there is no larger adjusted rt up to the end. @@ -211,10 +210,10 @@ do_adjustRtime_peakGroups <- ## linearly interpolate the values in between. adj_idxs <- (decidx[1] + 1):(next_larger - 1) incr <- (rtadj_end - rtadj_start) / length(adj_idxs) - rtdevsmo[[i]][adj_idxs] <- rtime[[i]][adj_idxs] - + rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] - (rtadj_start + (1:length(adj_idxs)) * incr) } - + rtdevsmorange <- range(rtdevsmo[[i]]) if (any(rtdevsmorange / rtdevrange > 2)) warn.overcorrect <- TRUE @@ -225,17 +224,19 @@ do_adjustRtime_peakGroups <- } ## Use lm instead? fit <- lsfit(pts$rt, pts$rtdev) - rtdevsmo[[i]] <- rtime[[i]] * fit$coef[2] + fit$coef[1] + rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1] ptsrange <- range(pts$rt) - minidx <- rtime[[i]] < ptsrange[1] - maxidx <- rtime[[i]] > ptsrange[2] + minidx <- rtime[[i_all]] < ptsrange[1] + maxidx <- rtime[[i_all]] > ptsrange[2] rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] } ## Finally applying the correction - rtime[[i]] <- rtime[[i]] - rtdevsmo[[i]] + rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]] } - + ## Adjust the remaining samples. + rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset, + method = subsetAdjust) if (warn.overcorrect) { warning("Fitted retention time deviation curves exceed points by more", " than 2x. This is dangerous and the algorithm is probably ", @@ -252,8 +253,7 @@ do_adjustRtime_peakGroups <- "Eventually consider to increase the value of the 'span' ", "parameter.") } - - return(rtime) + rtime_adj } ## That's the original code that fails to fix unsorted adjusted retention times ## (see issue #146). @@ -269,7 +269,7 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, ## minFraction if (any(minFraction > 1) | any(minFraction < 0)) stop("'minFraction' has to be between 0 and 1!") - + ## Check peaks: OK <- .validChromPeaksMatrix(peaks) if (is.character(OK)) @@ -289,12 +289,12 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, if (length(rtime) != max(peaks[, "sample"])) stop("The length of 'rtime' does not match with the total number of ", "samples according to the 'peaks' matrix!") - + nSamples <- length(rtime) ## Translate minFraction to number of allowed missing samples. missingSample <- nSamples - (nSamples * minFraction) - - rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, nSamples, + + rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, seq_len(nSamples), missingSample, extraPeaks) message("Performing retention time correction using ", nrow(rt), @@ -329,7 +329,7 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, if (smooth == "loess") { lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, degree = 1, family = family)) - + rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) ## Remove singularities from the loess function rtdevsmo[[i]][abs(rtdevsmo[[i]]) > @@ -385,20 +385,35 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, return(rtime) } -#' This function adjusts retentin times in the vector/matrix \code{x} given the -#' provided \code{numeric} vectors \code{rtraw} and \code{rtadj}. +#' This function adjusts retentin times in the vector/matrix `x` given the +#' provided `numeric` vectors `rtraw` and `rtadj`. +#' +#' @note #' -#' @details The function uses the \code{stepfun} to adjust \code{x} and adjusts -#' it given \code{rtraw} towards \code{rtadj}. Hence it is possible to -#' perform or to revert retention time correction in \code{x} depending -#' on what is provided with parameters \code{rtraw} and \code{rtadj}. -#' See examples for details. -#' -#' @param x A numeric or matrix with retention time values that should be +#' Values in `x` that are outside of the range of `rtraw` are linearly shifted +#' by the difference of the first/last adjusted value. +#' +#' @details +#' +#' The function uses `stepfun` to adjust `x` given adjustment from `rtraw` +#' to `rtadj`. It is possible to perform or to revert retention time +#' correction in `x` depending on whether raw or adjusted retention times are +#' provided with `rtraw` or `rtadj`, respectively. +#' See examples for details. +#' +#' @param x A `numeric` or `matrix` with retention time values that should be #' adjusted. -#' +#' +#' @param rtraw `numeric` with raw retention times. +#' +#' @param rtadj `numeric` with adjusted retention times. +#' #' @noRd -#' +#' +#' @author Johannes Rainer +#' +#' @md +#' #' @examples #' #' ## Perform retention time correction: @@ -413,21 +428,31 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, ## re-order everything if rtraw is not sorted; issue #146 if (is.unsorted(rtraw)) { idx <- order(rtraw) - adjFun <- stepfun(rtraw[idx][-1] - diff(rtraw[idx]) / 2, rtadj[idx]) - ## if (!is.null(dim(x))) - ## return(adjFun(x[idx, ])) - ## else - ## return(adjFun(x[idx])) - } else { - adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj) + rtraw <- rtraw[idx] + rtadj <- rtadj[idx] + } + adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj) + res <- adjFun(x) + ## Fix margins. + idx_low <- which(x < rtraw[1]) + if (length(idx_low)) { + first_adj <- idx_low[length(idx_low)] + 1 + res[idx_low] <- x[idx_low] + res[first_adj] - x[first_adj] + } + idx_high <- which(x > rtraw[length(rtraw)]) + if (length(idx_high)) { + last_adj <- idx_high[1] - 1 + res[idx_high] <- x[idx_high] + res[last_adj] - x[last_adj] } - adjFun(x) + if (is.null(dim(res))) + names(res) <- names(x) + res } #' Helper function to apply retention time adjustment to already identified #' peaks in the peaks matrix of an XCMSnExp (or peaks matrix of an #' xcmsSet). -#' +#' #' @noRd .applyRtAdjToChromPeaks <- function(x, rtraw, rtadj) { if (!is.list(rtraw) | !is.list(rtadj)) @@ -453,15 +478,16 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, #' #' @details This function is called internally by the #' do_adjustRtime_peakGroups function and the retcor.peakgroups method. +#' #' @noRd -.getPeakGroupsRtMatrix <- function(peaks, peakIndex, nSamples, +.getPeakGroupsRtMatrix <- function(peaks, peakIndex, sampleIndex, missingSample, extraPeaks) { ## For each feature: ## o extract the retention time of the peak with the highest intensity. ## o skip peak groups if they are not assigned a peak in at least a ## minimum number of samples OR if have too many peaks from the same ## sample assigned to it. - seq_samp <- seq_len(nSamples) + nSamples <- length(sampleIndex) rt <- lapply(peakIndex, function(z) { cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE] ## Return NULL if we've got less samples that required or is the total @@ -472,8 +498,8 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, if (nsamp < (nSamples - missingSample) | nrow(cur_fts) > (nsamp + extraPeaks)) return(NULL) - cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), ] - cur_fts[match(seq_samp, cur_fts[, 3]), 1] + cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), , drop = FALSE] + cur_fts[match(sampleIndex, cur_fts[, 3]), 1] }) rt <- do.call(rbind, rt) ## Order them by median retention time. NOTE: this is different from the @@ -487,3 +513,154 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, } rt } + +#' For a given index `x` return one from `idx` that is the *closest*, can be +#' either simply the `"next"`, or the `"closet"` (smallest difference to `x`). +#' For `"next"`: if there is no *next* index, it takes the previous. +#' +#' @noRd +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' .get_closest_index(3, c(2, 4, 6, 8)) +.get_closest_index <- function(x, idx, method = c("next", "previous", + "closest")) { + method <- match.arg(method) + switch(method, + `next` = { + nxt <- idx > x + if (any(nxt)) + idx[nxt][1] + else idx[!nxt][sum(!nxt)] + }, + `previous` = { + prv <- idx < x + if (any(prv)) + idx[prv][sum(prv)] + else idx[!prv][1] + }, + closest = { + dst <- abs(idx - x) + idx[which.min(dst)] + }) +} + +#' *align* two vectors with each other. If they have a different length they +#' will be trimmed to have the same length starting from the end with the +#' smaller average difference (between 5 datapoints). +#' +#' @param x `list` of two `numeric` vectors with potentially different length. +#' +#' @return `list` of two `numeric` vectors with equal length. +#' +#' @noRd +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' x <- list(a = 1:10, b = 3:10) +#' .match_trim_vectors(x) +#' +#' x <- list(a = 1:10, b = 2:15) +#' .match_trim_vectors(x) +#' +#' x <- list(a = 1:20, b = 1:5) +#' .match_trim_vectors(x) +.match_trim_vector_index <- function(x, n = 5) { + lens <- lengths(x) + min_len <- min(lens) + if (length(unique(lens)) == 1) + replicate(n = length(lens), seq_len(min_len)) + hd <- vapply(x, function(z) mean(head(z, n = n)), numeric(1)) + tl <- vapply(x, function(z) mean(tail(z, n = n)), numeric(1)) + if (diff(range(hd)) <= diff(range(tl))) + replicate(n = length(x), 1:min_len, FALSE) + else + lapply(lens, function(z) (z - min_len + 1):z) +} + +.match_trim_vectors <- function(x, n = 5, idxs) { + if (missing(idxs)) + idxs <- .match_trim_vector_index(x, n = n) + mapply(x, idxs, FUN = function(z, idx) z[idx], SIMPLIFY = FALSE) +} + +#' @title Adjust retention times based on alignment result from subset +#' +#' @description +#' +#' This function adjusts retention times based on the alignment results on a +#' subset of samples from an experiment. Specifically, the samples **not** part +#' of `subset` are adjusted based on the adjusted retention times of the +#' *closest* `subset` samples: +#' +#' How the retention times will be adjusted depends on `method`: +#' - `"previous"`: adjusted retention times will match the adjusted retention +#' times of the closest previous subset-sample. +#' - `"average"`: adjusted retention times for a non-subset sample is calculated +#' based on the average retention times of the closest previous and following +#' subset-sample. The average is calculated based on an weighted average +#' with weights representing the distance of the non-subset sample to the +#' closest subset samples. +#' +#' @param rtraw `list` of raw retention times, one element/vector per sample. +#' +#' @param rtadj `list` of adjusted retention times, one element/vector per +#' sample. Has to have the same length than `rtraw`. +#' +#' @param subset `integer` with the indices of the `subset` on which the +#' alignment has been performed. +#' +#' @param method `character` specifying the method with which the non-subset +#' samples are adjusted: either `"previous"` or `"average"`. See details. +#' +#' @return `list` of adjusted retention times. +#' +#' @author Johannes Rainer +#' +#' @noRd +#' +#' @md +adjustRtimeSubset <- function(rtraw, rtadj, subset, + method = c("average", "previous")) { + method <- match.arg(method) + if (length(rtraw) != length(rtadj)) + stop("Lengths of 'rtraw' and 'rtadj' have to match.") + if (missing(subset)) + subset <- seq_along(rtraw) + if (!all(subset %in% seq_along(rtraw))) + stop("'subset' is out of bounds.") + ## if (length(subset) == length(rtraw)) { + ## cat("return rtadj\n") + ## return(rtadj) + ## } + no_subset <- seq_len(length(rtraw))[-subset] + for (i in no_subset) { + message("Aligning sample number ", i, " against subset ... ", + appendLF = FALSE) + if (method == "previous") { + i_adj <- xcms:::.get_closest_index(i, subset, method = "previous") + rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]], + rtadj[[i_adj]]) + } + if (method == "average") { + i_ref <- c(xcms:::.get_closest_index(i, subset, method = "previous"), + xcms:::.get_closest_index(i, subset, method = "next")) + trim_idx <- .match_trim_vector_index(rtraw[i_ref]) + rt_raw_ref <- do.call( + cbind, .match_trim_vectors(rtraw[i_ref], idxs = trim_idx)) + rt_adj_ref <- do.call( + cbind, .match_trim_vectors(rtadj[i_ref], idxs = trim_idx)) + wghts <- 1 / abs(i_ref - i) # weights depending on distance to i + rt_raw_ref <- apply(rt_raw_ref, 1, weighted.mean, w = wghts) + rt_adj_ref <- apply(rt_adj_ref, 1, weighted.mean, w = wghts) + rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rt_raw_ref, + rt_adj_ref) + } + message("OK") + } + rtadj +} diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index 9f4ed488f..b6d8d5e22 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -47,13 +47,13 @@ #' #' @param mz Numeric vector with the individual m/z values from all scans/ #' spectra of one file/sample. -#' +#' #' @param int Numeric vector with the individual intensity values from all #' scans/spectra of one file/sample. -#' +#' #' @param scantime Numeric vector of length equal to the number of #' spectra/scans of the data representing the retention time of each scan. -#' +#' #' @param valsPerSpect Numeric vector with the number of values for each #' spectrum. #' @@ -62,7 +62,7 @@ #' generated visualizing the identified chromatographic peak. Note: this #' argument is for backward compatibility only and will be removed in #' future. -#' +#' #' @inheritParams findChromPeaks-centWave #' #' @family core peak detection functions @@ -76,7 +76,7 @@ #' A matrix, each row representing an identified chromatographic peak, #' with columns: #' \describe{ -#' +#' #' \item{mz}{Intensity weighted mean of m/z values of the peak across #' scans.} #' \item{mzmin}{Minimum m/z of the peak.} @@ -93,7 +93,7 @@ #' } #' Additional columns for \code{verboseColumns = TRUE}: #' \describe{ -#' +#' #' \item{mu}{Gaussian parameter mu.} #' \item{sigma}{Gaussian parameter sigma.} #' \item{h}{Gaussian parameter h.} @@ -104,28 +104,33 @@ #' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} #' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} #' } -#' +#' #' @author Ralf Tautenhahn, Johannes Rainer #' #' @seealso \code{\link{centWave}} for the standard user interface method. #' #' @examples #' ## Load the test file -#' library(faahKO) -#' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -#' xr <- xcmsRaw(fs, profstep = 0) +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Subset to one file and restrict to a certain retention time range +#' data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) +#' +#' ## Get m/z and intensity values +#' mzs <- mz(data) +#' ints <- intensity(data) #' -#' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -#' mzVals <- xr@env$mz -#' intVals <- xr@env$intensity #' ## Define the values per spectrum: -#' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +#' valsPerSpect <- lengths(mzs) #' -#' ## Calling the function. We're using a large value for noise to speed up -#' ## the call in the example performance - in a real use case we would either +#' ## Calling the function. We're using a large value for noise and prefilter +#' ## to speed up the call in the example - in a real use case we would either #' ## set the value to a reasonable value or use the default value. -#' res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, -#' scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) +#' res <- do_findChromPeaks_centWave(mz = unlist(mzs), int = unlist(ints), +#' scantime = rtime(data), valsPerSpect = valsPerSpect, noise = 10000, +#' prefilter = c(3, 10000)) #' head(res) do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ppm = 25, @@ -141,7 +146,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL, - sleep = 0) { + sleep = 0, + extendLengthMSW = FALSE) { if (getOption("originalCentWave", default = TRUE)) { ## message("DEBUG: using original centWave.") .centWave_orig(mz = mz, int = int, scantime = scantime, @@ -151,7 +157,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales, sleep = sleep) + roiScales = roiScales, sleep = sleep, + extendLengthMSW = extendLengthMSW) } else { ## message("DEBUG: using modified centWave.") .centWave_new(mz = mz, int = int, scantime = scantime, @@ -172,9 +179,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, ## noise.local=TRUE, sleep = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = NULL) { - ## TODO @jo Ensure in upstream method that data is in centroided mode! - ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange + firstBaselineCheck = TRUE, roiScales = NULL, + extendLengthMSW = FALSE) { ## Input argument checking. if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", @@ -368,7 +374,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) - omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), + omz <- .Call("getWeightedMZ", mz, int, scanindex, as.double(mzrange), as.integer(scrange), as.integer(length(scantime)), PACKAGE = 'xcms') ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) @@ -404,7 +410,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, noise <- estimateChromNoise(noised, trim = 0.05, minPts = 3 * minPeakWidth) ## any continuous data above 1st baseline ? - if (firstBaselineCheck & + if (firstBaselineCheck && !continuousPtsAboveThreshold(fd, threshold = noise, num = minPtsAboveBaseLine)) next @@ -419,7 +425,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ## is there any data above S/N * threshold ? if (!(any(fd - baseline >= sdthr))) next - wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh') + wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh', + extendLengthMSW = extendLengthMSW) if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr))) next if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise @@ -491,7 +498,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzmean <- do.call(mzCenterFun, list(mz = mz.value, intensity = mz.int)) - + ## Compute dppm only if needed dppm <- NA if (verboseColumns) { @@ -556,7 +563,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, lm <- .narrow_rt_boundaries(lm, d) lm_seq <- lm[1]:lm[2] pd <- d[lm_seq] - + peakrange <- td[lm] peaks[p, "rtmin"] <- scantime[peakrange[1]] peaks[p, "rtmax"] <- scantime[peakrange[2]] @@ -704,7 +711,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ## measured intensity. This avoids mz ranges from 0 to max mz of the peak, ## with the mz=0 corresponding actually to scans in which no intensity was ## measured. Search for "@MOD1" to jump to the respective code. -## +## ## 2) The intensities for the peak are reloaded with the refined mz range during ## the postprocessing. Search for "@MOD2" to jump to the respective code. ## @@ -723,8 +730,6 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, firstBaselineCheck = TRUE, roiScales = NULL) { if (sleep) warning("Parameter 'sleep' is defunct") - ## TODO @jo Ensure in upstream method that data is in centroided mode! - ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange ## Input argument checking. if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", @@ -903,7 +908,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ## original mzROI range idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) - omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), + omz <- .Call("getWeightedMZ", mz, int, scanindex, as.double(mzrange), as.integer(scrange), as.integer(length(scantime)), PACKAGE = 'xcms') if (all(omz == 0)) { @@ -936,7 +941,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, noise <- estimateChromNoise(noised, trim = 0.05, minPts = 3 * minPeakWidth) ## any continuous data above 1st baseline ? - if (firstBaselineCheck & + if (firstBaselineCheck && !continuousPtsAboveThreshold(fd, threshold = noise, num = minPtsAboveBaseLine)) next @@ -1126,7 +1131,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, lm <- .narrow_rt_boundaries(lm, d) lm_seq <- lm[1]:lm[2] pd <- current_ints[lm_seq] - + peakrange <- td[lm] peaks[p, "rtmin"] <- scantime[peakrange[1]] peaks[p, "rtmax"] <- scantime[peakrange[2]] @@ -1203,7 +1208,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, if (!verboseColumns) p <- p[, basenames, drop = FALSE] return(p) - + uorder <- order(p[, "into"], decreasing = TRUE) pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE]) uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks @@ -1243,7 +1248,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, #' The \code{ppm} and \code{checkBack} parameters have shown less influence #' than the other parameters and exist to give users flexibility and #' better accuracy. -#' +#' #' @inheritParams do_findChromPeaks_centWave #' #' @inheritParams findChromPeaks-centWave @@ -1264,12 +1269,12 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, #' \item{into}{Integrated (original) intensity of the peak.} #' \item{maxo}{Maximum intensity of the peak.} #' } -#' +#' #' If \code{withWave} is set to \code{TRUE}, the result is the same as #' returned by the \code{\link{do_findChromPeaks_centWave}} method. -#' +#' #' @family core peak detection functions -#' +#' #' @seealso \code{\link{massifquant}} for the standard user interface method. #' #' @references @@ -1280,22 +1285,27 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, #' @author Christopher Conley #' #' @examples -#' library(faahKO) -#' library(xcms) -#' cdfpath <- system.file("cdf", package = "faahKO") -#' cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) -#' -#' ## Read the first file -#' xraw <- xcmsRaw(cdffiles[1]) -#' ## Extract the required data -#' mzVals <- xraw@env$mz -#' intVals <- xraw@env$intensity +#' +#' ## Load the test file +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Subset to one file and restrict to a certain retention time range +#' data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) +#' +#' ## Get m/z and intensity values +#' mzs <- mz(data) +#' ints <- intensity(data) +#' #' ## Define the values per spectrum: -#' valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) +#' valsPerSpect <- lengths(mzs) #' -#' ## Perform the peak detection using massifquant -#' res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, -#' scantime = xraw@scantime, valsPerSpect = valsPerSpect) +#' ## Perform the peak detection using massifquant - setting prefilter to +#' ## a high value to speed up the call for the example +#' res <- do_findChromPeaks_massifquant(mz = unlist(mzs), int = unlist(ints), +#' scantime = rtime(data), valsPerSpect = valsPerSpect, +#' prefilter = c(3, 10000)) #' head(res) do_findChromPeaks_massifquant <- function(mz, int, @@ -1473,11 +1483,11 @@ do_findChromPeaks_massifquant <- function(mz, #' method). #' #' @inheritParams do_findChromPeaks_centWave -#' +#' #' @inheritParams findChromPeaks-centWave -#' +#' #' @inheritParams imputeLinInterpol -#' +#' #' @inheritParams findChromPeaks-matchedFilter #' #' @return A matrix, each row representing an identified chromatographic peak, @@ -1496,7 +1506,7 @@ do_findChromPeaks_massifquant <- function(mz, #' \item{i}{Rank of peak in merged EIC (\code{<= max}).} #' \item{sn}{Signal to noise ratio of the peak} #' } -#' +#' #' @references #' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and #' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite @@ -1510,21 +1520,26 @@ do_findChromPeaks_massifquant <- function(mz, #' @seealso \code{\link{binYonX}} for a binning function, #' \code{\link{imputeLinInterpol}} for the interpolation of missing values. #' \code{\link{matchedFilter}} for the standard user interface method. -#' +#' #' @examples +#' #' ## Load the test file -#' library(faahKO) -#' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -#' xr <- xcmsRaw(fs) +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Subset to one file and restrict to a certain retention time range +#' data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) +#' +#' ## Get m/z and intensity values +#' mzs <- mz(data) +#' ints <- intensity(data) #' -#' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -#' mzVals <- xr@env$mz -#' intVals <- xr@env$intensity #' ## Define the values per spectrum: -#' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +#' valsPerSpect <- lengths(mzs) #' -#' res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, -#' scantime = xr@scantime, valsPerSpect = valsPerSpect) +#' res <- do_findChromPeaks_matchedFilter(mz = unlist(mzs), int = unlist(ints), +#' scantime = rtime(data), valsPerSpect = valsPerSpect) #' head(res) do_findChromPeaks_matchedFilter <- function(mz, int, @@ -1707,7 +1722,7 @@ do_findChromPeaks_matchedFilter <- function(mz, Sys.sleep(sleep) } ## -- end sleep plot - + yfilt[peakrange[1]:peakrange[2]] <- 0 num <- num + 1 ## Double the size of the output matrix if it's full @@ -1778,7 +1793,7 @@ do_findChromPeaks_matchedFilter <- function(mz, " 'sum(valsPerSpect)'.") ## Generate the 'profile' matrix, i.e. perform the binning: - mrange <- range(mz) + mrange <- range(mz[mz > 0]) mass <- seq(floor(mrange[1] / binSize) * binSize, ceiling(mrange[2] / binSize) * binSize, by = binSize) @@ -1910,7 +1925,7 @@ do_findChromPeaks_matchedFilter <- function(mz, Sys.sleep(sleep) } ## end sleep/plot - + yfilt[peakrange[1]:peakrange[2]] <- 0 num <- num + 1 ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy, @@ -1964,7 +1979,7 @@ do_findChromPeaks_matchedFilter <- function(mz, #' \emph{xcmsDirect} vignette for more information. #' #' @inheritParams do_findChromPeaks_centWave -#' +#' #' @inheritParams findChromPeaks-centWave #' #' @param ... Additional parameters to be passed to the @@ -1990,7 +2005,7 @@ do_findChromPeaks_matchedFilter <- function(mz, #' @seealso \code{\link{MSW}} for the standard user interface #' method. \code{\link{peakDetectionCWT}} from the #' \code{MassSpecWavelet} package. -#' +#' #' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer do_findPeaks_MSW <- function(mz, int, snthresh = 3, verboseColumns = FALSE, ...) { @@ -2333,10 +2348,21 @@ do_define_isotopes <- function(peaks., maxCharge = 3, maxIso = 5, } #' @param peaks. see do_define_isotopes -#' +#' #' @param polarity character(1) defining the polarity, either \code{"positive"} #' or \code{"negative"}. -#' +#' +#' @note +#' +#' Reference for considered adduct distances: +#' Huang N.; Siegel M.M.1; Kruppa G.H.; Laukien F.H.; J Am Soc Mass Spectrom +#' 1999, 10, 1166-1173. +#' +#' Reference for contaminants: +#' Interferences and comtaminants encountered in modern mass spectrometry +#' Bernd O. Keller, Jie Sui, Alex B. Young and Randy M. Whittal, ANALYTICA +#' CHIMICA ACTA, 627 (1): 71-81) +#' #' @return see do_define_isotopes. #' #' @noRd @@ -2351,16 +2377,6 @@ do_define_adducts <- function(peaks., polarity = "positive") { } if (is.data.frame(peaks.)) peaks. <- as.matrix(peaks.) - ## considered adduct distances - ## reference: Huang N.; Siegel M.M.1; Kruppa G.H.; Laukien F.H.; J Am Soc - ## Mass Spectrom 1999, 10, 1166–1173; Automation of a Fourier transform ion - ## cyclotron resonance mass spectrometer for acquisition, analysis, and - ## e-mailing of high-resolution exact-mass electrospray ionization mass - ## spectral data - ## see also for contaminants: Interferences and contaminants encountered - ## in modern mass spectrometry (Bernd O. Keller, Jie Sui, Alex B. Young - ## and Randy M. Whittal, ANALYTICA CHIMICA ACTA, 627 (1): 71-81) - mH <- 1.0078250322 mNa <- 22.98976928 mK <- 38.96370649 @@ -2569,13 +2585,13 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect, #' \code{\link{centWave}}. #' #' @inheritParams findChromPeaks-centWave -#' +#' #' @inheritParams findChromPeaks-centWaveWithPredIsoROIs -#' +#' #' @inheritParams do_findChromPeaks_centWave #' #' @family core peak detection functions -#' +#' #' @return A matrix, each row representing an identified chromatographic peak. #' All non-overlapping peaks identified in both centWave runs are reported. #' The matrix columns are: @@ -2605,7 +2621,7 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect, #' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} #' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} #' } -#' +#' #' @rdname do_findChromPeaks_centWaveWithPredIsoROIs #' #' @author Hendrik Treutler, Johannes Rainer @@ -2616,7 +2632,7 @@ do_findChromPeaks_centWaveWithPredIsoROIs <- verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL, snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, - polarity = "unknown") { + polarity = "unknown", extendLengthMSW = FALSE) { ## Input argument checking: most of it will be done in ## do_findChromPeaks_centWave polarity <- match.arg(polarity, c("positive", "negative", "unknown")) @@ -2636,7 +2652,8 @@ do_findChromPeaks_centWaveWithPredIsoROIs <- verboseColumns = TRUE, roiList = roiList, firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales) + roiScales = roiScales, + extendLengthMSW = extendLengthMSW) return(do_findChromPeaks_addPredIsoROIs(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, @@ -2678,7 +2695,7 @@ do_findChromPeaks_centWaveWithPredIsoROIs <- #' (first) centWave run. #' #' @inheritParams findChromPeaks-centWave -#' +#' #' @inheritParams do_findChromPeaks_centWave #' #' @rdname do_findChromPeaks_centWaveWithPredIsoROIs @@ -2689,8 +2706,6 @@ do_findChromPeaks_addPredIsoROIs <- verboseColumns = FALSE, peaks. = NULL, maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, polarity = "unknown") { - ## Input argument checking: most of it will be done in - ## do_findChromPeaks_centWave polarity <- match.arg(polarity, c("positive", "negative", "unknown")) ## These variables might at some point be added as function args. @@ -2706,6 +2721,8 @@ do_findChromPeaks_addPredIsoROIs <- tittle[expand_mz] f_mod[expand_mz, "mzmax"] <- peaks.[expand_mz, "mz"] + tittle[expand_mz] } + ## issue #545: with fitgauss = TRUE the scmin and scmax can be -1 + f_mod <- f_mod[f_mod[, "scmin"] < f_mod[, "scmax"], , drop = FALSE] ## Add predicted ROIs if (addNewIsotopeROIs) { iso_ROIs <- do_define_isotopes(peaks. = f_mod, @@ -2749,7 +2766,7 @@ do_findChromPeaks_addPredIsoROIs <- "valid signal found!") return(peaks.) } - + ## 3) centWave using the identified ROIs. roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) feats_2 <- do_findChromPeaks_centWave(mz = mz, int = int, @@ -2775,19 +2792,18 @@ do_findChromPeaks_addPredIsoROIs <- feats_2 <- feats_2[!any_na, , drop = FALSE] no_mz_width <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 no_rt_width <- (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 - ## remove empty area - ## no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || - ## (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 - no_area <- no_mz_width || no_rt_width + no_area <- no_mz_width | no_rt_width if (any(no_area)) feats_2 <- feats_2[!no_area, , drop = FALSE] } ## 4) Check and remove ROIs overlapping with peaks. if (nrow(feats_2) > 0) { - ## Comparing each ROI with each peak; slightly modified from the original - ## code in which we prevent calling apply followed by two lapply. + ## Comparing each ROI with each peak; slightly modified from the + ## original code in which we prevent calling apply followed by + ## two lapply. + ## Update: we're no longer removing original peaks, as they are + ## themselfs overlapping, thus we would remove too many. removeROIs <- rep(FALSE, nrow(feats_2)) - removeFeats <- rep(FALSE, nrow(peaks.)) overlapProportionThreshold <- 0.01 for (i in 1:nrow(feats_2)) { ## Compare ROI i with all peaks (peaks) and check if its @@ -2807,26 +2823,23 @@ do_findChromPeaks_addPredIsoROIs <- overlappingRt <- abs(peakRtCenter - roiRtCenter) <= (roiRtRadius + peakRtRadius) is_overlapping <- overlappingMz & overlappingRt - ## Now determine whether we remove the ROI or the peak, depending - ## on the raw signal intensity. + ## Now determine whether we remove the ROI or the peak, + ## depending on the raw signal intensity. if (any(is_overlapping)) { - if (any(peaks.[is_overlapping, "into"] > feats_2[i, "into"])) { + if (any(peaks.[is_overlapping, "into"] > + feats_2[i, "into"])) removeROIs[i] <- TRUE - } else { - removeFeats[is_overlapping] <- TRUE - } } } feats_2 <- feats_2[!removeROIs, , drop = FALSE] - peaks. <- peaks.[!removeFeats, , drop = FALSE] } if (!verboseColumns) peaks. <- peaks.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn")] - if (nrow(feats_2) == 0) - return(peaks.) + if (nrow(feats_2)) + rbind(peaks., feats_2) else - return(rbind(peaks., feats_2)) + peaks. } do_findChromPeaks_addPredIsoROIs_mod <- @@ -2897,7 +2910,7 @@ do_findChromPeaks_addPredIsoROIs_mod <- return(peaks.) } cat("No. of input peaks: ", nrow(peaks.), "\n") - + ## 3) centWave using the identified ROIs. roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) cat("Identified iso ROIs: ", length(roiL), "\n") @@ -3015,7 +3028,7 @@ do_findChromPeaks_addPredIsoROIs_mod <- #' in the peak detection step. #' #' @param ... currently ignored. -#' +#' #' @family peak detection functions for chromatographic data #' #' @seealso [matchedFilter] for a detailed description of the peak detection @@ -3024,10 +3037,10 @@ do_findChromPeaks_addPredIsoROIs_mod <- #' @author Johannes Rainer #' #' @return -#' +#' #' A matrix, each row representing an identified chromatographic peak, with #' columns: -#' +#' #' - `"rt"`: retention time of the peak's midpoint (time of the maximum signal). #' - `"rtmin"`: minimum retention time of the peak. #' - `"rtmax"`: maximum retention time of the peak. @@ -3041,12 +3054,16 @@ do_findChromPeaks_addPredIsoROIs_mod <- #' #' @examples #' -#' ## Read one file from the faahKO package -#' od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' mode = "onDisk") +#' ## Load the test file +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Subset to one file and drop identified chromatographic peaks +#' data <- dropChromPeaks(filterFile(faahko_sub, 1)) #' #' ## Extract chromatographic data for a small m/z range -#' chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] +#' chr <- chromatogram(data, mz = c(272.1, 272.3), rt = c(3000, 3200))[1, 1] #' #' pks <- peaksWithMatchedFilter(intensity(chr), rtime(chr)) #' pks @@ -3065,7 +3082,7 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, ## Replace NAs with 0 - that's how the original code handled it. nas <- is.na(int) int[nas] <- 0 - + n_vals <- length(int) N <- nextn(n_vals) rtrange <- range(rt) @@ -3117,7 +3134,7 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' @title Identify peaks in chromatographic data using centWave #' #' @description -#' +#' #' `peaksWithCentWave` identifies (chromatographic) peaks in purely #' chromatographic data, i.e. based on intensity and retention time values #' without m/z values. @@ -3131,7 +3148,7 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' defines the corresponding positions +/- `peakwidth[2]` as the ROIs. Noise #' estimation bases also on these ROIs and can thus be different from [centWave] #' resulting in different signal to noise ratios. -#' +#' #' @param int `numeric` with intensity values. #' #' @param rt `numeric` with the retention time for the intensities. Length has @@ -3142,7 +3159,7 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' #' @param snthresh `numeric(1)` defining the signal to noise ratio cutoff. #' Peaks with a signal to noise ratio < `snthresh` are omitted. -#' +#' #' @param prefilter `numeric(2)` (`c(k, I)`): only regions of interest with at #' least `k` centroids with signal `>= I` are returned in the first #' step. @@ -3155,7 +3172,7 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' #' @param fitgauss `logical(1)` whether or not a Gaussian should be fitted #' to each peak. -#' +#' #' @param noise `numeric(1)` defining the minimum required intensity for #' centroids to be considered in the first analysis step (definition of #' the *regions of interest*). @@ -3164,10 +3181,16 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' columns should be returned. #' #' @param firstBaselineCheck `logical(1)`. If `TRUE` continuous data within -#' regions of interest is checked to be above the first baseline. +#' regions of interest is checked to be above the first baseline. In detail, +#' a first *rough* estimate of the noise is calculated and peak detection +#' is performed only in regions in which multiple sequential signals are +#' higher than this first estimated baseline/noise level. +#' +#' @param extendLengthMSW `logical(1)`. If `TRUE` the "open" method of EIC +#' extension is used, rather than the default "reflect" method. #' #' @param ... currently ignored. -#' +#' #' @family peak detection functions for chromatographic data #' #' @seealso [centWave] for a detailed description of the peak detection @@ -3176,10 +3199,10 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' @author Johannes Rainer #' #' @return -#' +#' #' A matrix, each row representing an identified chromatographic peak, with #' columns: -#' +#' #' - `"rt"`: retention time of the peak's midpoint (time of the maximum signal). #' - `"rtmin"`: minimum retention time of the peak. #' - `"rtmax"`: maximum retention time of the peak. @@ -3187,11 +3210,11 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' - `"intb"`: per-peak baseline corrected integrated peak intensity. #' - `"maxo"`: maximum (original) intensity of the peak. #' - `"sn"`: signal to noise ratio of the peak defined as -#' `(maxo - baseline)/sd` with `sd` being the standard defiatio of the local +#' `(maxo - baseline)/sd` with `sd` being the standard deviation of the local #' chromatographic noise. #' #' Additional columns for `verboseColumns = TRUE`: -#' +#' #' - `"mu"`: gaussian parameter mu. #' - `"sigma"`: gaussian parameter sigma. #' - `"h"`: gaussian parameter h. @@ -3213,14 +3236,14 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' #' ## Extract chromatographic data for a small m/z range #' mzr <- c(272.1, 272.2) -#' chr <- chromatogram(od, mz = mzr)[1, 1] +#' chr <- chromatogram(od, mz = mzr, rt = c(3000, 3300))[1, 1] #' #' int <- intensity(chr) #' rt <- rtime(chr) #' #' ## Plot the region #' plot(chr, type = "h") -#' +#' #' ## Identify peaks in the chromatographic data #' pks <- peaksWithCentWave(intensity(chr), rtime(chr)) #' pks @@ -3238,6 +3261,7 @@ peaksWithCentWave <- function(int, rt, noise = 0, ## noise.local=TRUE, verboseColumns = FALSE, firstBaselineCheck = TRUE, + extendLengthMSW = FALSE, ... ) { if (length(peakwidth) != 2) @@ -3246,7 +3270,7 @@ peaksWithCentWave <- function(int, rt, int[is.na(int)] <- 0 rois <- .getRtROI(int, rt, peakwidth = peakwidth, noise = noise, prefilter = prefilter) - + basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn") verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale", @@ -3264,8 +3288,9 @@ peaksWithCentWave <- function(int, rt, warning("No scales? Please check peak width!") if (verboseColumns) basenames <- c(basenames, verbosenames) - return(invisible(matrix(nrow = 0, ncol = length(basenames), - dimnames = list(character(), basenames)))) + return(invisible( + matrix(nrow = 0, ncol = length(basenames), + dimnames = list(character(), basenames))[, -(1:3), drop = FALSE])) } if (length(scalerange) > 1) scales <- seq(from = scalerange[1], to = scalerange[2], by = 2) @@ -3288,7 +3313,7 @@ peaksWithCentWave <- function(int, rt, for (i in seq_len(nrow(rois))) { scmin <- rois[i, "scmin"] scmax <- rois[i, "scmax"] - + N <- scmax - scmin + 1 peaks <- matrix(ncol = peaks_ncols, nrow = 0, dimnames = list(character(), peaks_names)) @@ -3331,7 +3356,7 @@ peaksWithCentWave <- function(int, rt, noise <- xcms:::estimateChromNoise(noised, trim = 0.05, minPts = 3 * minPeakWidth) ## any continuous data above 1st baseline ? - if (firstBaselineCheck & + if (firstBaselineCheck && !continuousPtsAboveThreshold(fd, threshold = noise, num = minPtsAboveBaseLine)) next @@ -3346,7 +3371,8 @@ peaksWithCentWave <- function(int, rt, ## is there any data above S/N * threshold ? if (!(any(fd - baseline >= sdthr))) next - wCoefs <- xcms:::MSW.cwt(d, scales = scales, wavelet = 'mexh') + wCoefs <- xcms:::MSW.cwt(d, scales = scales, wavelet = 'mexh', + extendLengthMSW = extendLengthMSW) if (!(!is.null(dim(wCoefs)) && any((wCoefs - baseline) >= sdthr))) next if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise @@ -3409,7 +3435,7 @@ peaksWithCentWave <- function(int, rt, if (is.na(p1)) p1 <- 1 if (is.na(p2)) p2 <- N maxint <- max(od[p1:p2]) - + peaks <- rbind( peaks, c(1, 1, 1, # mz, mzmin, mzmax, @@ -3472,7 +3498,7 @@ peaksWithCentWave <- function(int, rt, peaks[p, "intb"] <- pwid * sum(db[db>0]) peaks[p, "lmin"] <- lm[1] peaks[p, "lmax"] <- lm[2] - + if (fitgauss) { ## perform gaussian fits, use wavelets for inital parameters td_lm <- td[lm_range] @@ -3508,11 +3534,11 @@ peaksWithCentWave <- function(int, rt, peaks <- joinOverlappingPeaks(td, d, otd, rep(1, length(otd)), od, rt, scan.range, peaks, maxGaussOverlap, mzCenterFun = mzCenter.wMean) - + if (!is.null(peaks)) peaklist[[length(peaklist) + 1]] <- peaks } # end of for (i in seq_len(nrow(rois))) - + if (length(peaklist) == 0) { warning("No peaks found!") if (verboseColumns) @@ -3521,7 +3547,7 @@ peaksWithCentWave <- function(int, rt, else nopeaks <- matrix(nrow = 0, ncol = length(basenames), dimnames = list(character(), basenames)) - return(nopeaks) + return(nopeaks[, -(1:3), drop = FALSE]) } p <- do.call(rbind, peaklist) if (!verboseColumns) @@ -3549,7 +3575,7 @@ peaksWithCentWave <- function(int, rt, #' #' @param peakwidth `numeric(2)` with the lowe and upper bound for the expected #' peak widths. -#' +#' #' @param prefilter `numeric(2)` (`c(k, I)`): only regions of interest with at #' least `k` centroids with signal `>= I` are returned. #' @@ -3559,7 +3585,7 @@ peaksWithCentWave <- function(int, rt, #' @return `matrix` with two columns `"scmin"`, `"scmax"` and `"sccent"` with #' the index of (lower and upper) bound defining the region of interest and #' the position of the center. -#' +#' #' @md #' #' @noRd @@ -3598,7 +3624,7 @@ peaksWithCentWave <- function(int, rt, scmin <- sapply(pk_idx - up_bound, max, y = 1) scmax <- sapply(pk_idx + up_bound, min, y = int_len) ## Second filter: at least k values larger I - roi_idxs <- mapply(scmin, scmax, FUN = seq) + roi_idxs <- mapply(scmin, scmax, FUN = seq, SIMPLIFY = FALSE) ok <- vapply(roi_idxs, FUN = function(x, k, I) { sum(int[x] >= I) >= k @@ -3642,4 +3668,3 @@ peaksWithCentWave <- function(int, rt, } lm } - diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R index d87c101d0..c8ad0e6ff 100644 --- a/R/do_groupChromPeaks-functions.R +++ b/R/do_groupChromPeaks-functions.R @@ -1,77 +1,83 @@ ## Correspondence functions. #' @include functions-Params.R -##' @title Core API function for peak density based chromatographic peak -##' grouping -##' -##' @description The \code{do_groupChromPeaks_density} function performs -##' chromatographic peak grouping based on the density (distribution) of peaks, -##' found in different samples, along the retention time axis in slices of -##' overlapping mz ranges. -##' -##' @details For overlapping slices along the mz dimension, the function -##' calculates the density distribution of identified peaks along the -##' retention time axis and groups peaks from the same or different samples -##' that are close to each other. See [Smith 2006] for more details. -##' -##' @note The default settings might not be appropriate for all LC/GC-MS setups, -##' especially the \code{bw} and \code{binSize} parameter should be adjusted -##' accordingly. -##' -##' @param peaks A \code{matrix} or \code{data.frame} with the mz values and -##' retention times of the identified chromatographic peaks in all samples of an -##' experiment. Required columns are \code{"mz"}, \code{"rt"} and -##' \code{"sample"}. The latter should contain \code{numeric} values representing -##' the index of the sample in which the peak was found. -##' -##' @inheritParams groupChromPeaks-density -##' -##' @param sleep \code{numeric(1)} defining the time to \emph{sleep} between -##' iterations and plot the result from the current iteration. -##' -##' @return A \code{list} with elements \code{"featureDefinitions"} and -##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row -##' representing a (mz-rt) feature (i.e. a peak group) with columns: -##' \describe{ -##' \item{"mzmed"}{median of the peaks' apex mz values.} -##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -##' \item{"rtmed"}{the median of the peaks' retention times.} -##' \item{"rtmin"}{the smallest retention time of the peaks in the group.} -##' \item{"rtmax"}{the largest retention time of the peaks in the group.} -##' \item{"npeaks"}{the total number of peaks assigned to the feature. -##' Note that this number can be larger than the total number of samples, since -##' multiple peaks from the same sample could be assigned to a feature.} -##' } -##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a -##' feature in the \code{peaks} input matrix. -##' -##' @family core peak grouping algorithms -##' -##' @references -##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and -##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite -##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -##' \emph{Anal. Chem.} 2006, 78:779-787. -##' -##' @author Colin Smith, Johannes Rainer -##' -##' @examples -##' ## Load the test data set -##' library(faahKO) -##' data(faahko) -##' -##' ## Extract the matrix with the identified peaks from the xcmsSet: -##' fts <- peaks(faahko) -##' -##' ## Perform the peak grouping with default settings: -##' res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) -##' -##' ## The feature definitions: -##' head(res$featureDefinitions) -##' -##' ## The assignment of peaks from the input matrix to the features -##' head(res$peakIndex) +#' @title Core API function for peak density based chromatographic peak +#' grouping +#' +#' @description +#' +#' The `do_groupChromPeaks_density` function performs chromatographic peak +#' grouping based on the density (distribution) of peaks, found in different +#' samples, along the retention time axis in slices of overlapping mz ranges. +#' +#' @details For overlapping slices along the mz dimension, the function +#' calculates the density distribution of identified peaks along the +#' retention time axis and groups peaks from the same or different samples +#' that are close to each other. See (Smith 2006) for more details. +#' +#' @note The default settings might not be appropriate for all LC/GC-MS setups, +#' especially the `bw` and `binSize` parameter should be adjusted +#' accordingly. +#' +#' @param peaks A `matrix` or `data.frame` with the mz values and +#' retention times of the identified chromatographic peaks in all samples of an +#' experiment. Required columns are `"mz"`, `"rt"` and +#' `"sample"`. The latter should contain `numeric` values representing +#' the index of the sample in which the peak was found. +#' +#' @inheritParams groupChromPeaks-density +#' +#' @param sleep `numeric(1)` defining the time to *sleep* between +#' iterations and plot the result from the current iteration. +#' +#' @return +#' +#' A `data.frame`, each row representing a (mz-rt) feature (i.e. a peak group) +#' with columns: +#' +#' - `"mzmed"`: median of the peaks' apex mz values. +#' - `"mzmin"`: smallest mz value of all peaks' apex within the feature. +#' - `"mzmax"`:largest mz value of all peaks' apex within the feature. +#' - `"rtmed"`: the median of the peaks' retention times. +#' - `"rtmin"`: the smallest retention time of the peaks in the group. +#' - `"rtmax"`: the largest retention time of the peaks in the group. +#' - `"npeaks"`: the total number of peaks assigned to the feature. +#' - `"peakidx"`: a `list` with the indices of all peaks in a feature in the +#' `peaks` input matrix. +#' +#' Note that this number can be larger than the total number of samples, since +#' multiple peaks from the same sample could be assigned to a feature. +#' +#' @references +#' +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' Anal. Chem. 2006, 78:779-787. +#' +#' @author Colin Smith, Johannes Rainer +#' +#' @family core peak grouping algorithms +#' +#' @md +#' +#' @examples +#' ## Load the test file +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Extract the matrix with the identified peaks from the xcmsSet: +#' pks <- chromPeaks(faahko_sub) +#' +#' ## Perform the peak grouping with default settings: +#' res <- do_groupChromPeaks_density(pks, sampleGroups = rep(1, 3)) +#' +#' ## The feature definitions: +#' head(res) do_groupChromPeaks_density <- function(peaks, sampleGroups, bw = 30, minFraction = 0.5, minSamples = 1, binSize = 0.25, maxFeatures = 50, @@ -82,7 +88,7 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, "assignment of the samples.") if (missing(peaks)) stop("Parameter 'peaks' is missing!") - if (!is.matrix(peaks) | is.data.frame(peaks)) + if (!(is.matrix(peaks) | is.data.frame(peaks))) stop("'peaks' has to be a 'matrix' or a 'data.frame'!") ## Check that we've got all required columns .reqCols <- c("mz", "rt", "sample") @@ -102,10 +108,12 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, if (max(peaks[, "sample"]) > length(sampleGroups)) stop("Sample indices in 'peaks' are larger than there are sample", " groups specified with 'sampleGroups'!") - + + peaks <- cbind(peaks[, .reqCols, drop = FALSE], + index = seq_len(nrow(peaks))) + ## Order peaks matrix by mz - peakOrder <- order(peaks[, "mz"]) - peaks <- peaks[peakOrder, .reqCols, drop = FALSE] + peaks <- peaks[order(peaks[, "mz"]), , drop = FALSE] rownames(peaks) <- NULL rtRange <- range(peaks[, "rt"]) @@ -113,256 +121,88 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, ## value >= mass[i]. mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, by = binSize / 2) - masspos <- findEqualGreaterM(peaks[,"mz"], mass) - - groupmat <- matrix(nrow = 512, ncol = 7 + nSampleGroups) - groupindex <- vector("list", 512) + masspos <- findEqualGreaterM(peaks[, "mz"], mass) densFrom <- rtRange[1] - 3 * bw densTo <- rtRange[2] + 3 * bw ## Increase the number of sampling points for the density distribution. densN <- max(512, 2 * 2^(ceiling(log2(diff(rtRange) / (bw / 2))))) endIdx <- 0 - num <- 0 - gcount <- integer(nSampleGroups) - message("Processing ", length(mass) - 1, " mz slices ... ", appendLF = FALSE) + message("Processing ", length(mass) - 1, " mz slices ... ", + appendLF = FALSE) + resL <- vector("list", (length(mass) - 2)) for (i in seq_len(length(mass)-2)) { ## That's identifying overlapping mz slices. startIdx <- masspos[i] endIdx <- masspos[i + 2] - 1 if (endIdx - startIdx < 0) next - curMat <- peaks[startIdx:endIdx, , drop = FALSE] - den <- density(curMat[, "rt"], bw = bw, from = densFrom, to = densTo, - n = densN) - maxden <- max(den$y) - deny <- den$y - ## gmat <- matrix(nrow = 5, ncol = 2 + gcount) - snum <- 0 - ## What's that 20 there? - while (deny[maxy <- which.max(deny)] > maxden / 20 && snum < maxFeatures) { - grange <- descendMin(deny, maxy) - deny[grange[1]:grange[2]] <- 0 - gidx <- which(curMat[,"rt"] >= den$x[grange[1]] & - curMat[,"rt"] <= den$x[grange[2]]) - ## Determine the sample group of the samples in which the peaks - ## were detected and check if they correspond to the required limits. - tt <- table(sampleGroups[unique(curMat[gidx, "sample"])]) - if (!any(tt / sampleGroupTable[names(tt)] >= minFraction & - tt >= minSamples)) - next - snum <- snum + 1 - num <- num + 1 - ## Double the size of the output containers if they're full - if (num > nrow(groupmat)) { - groupmat <- rbind(groupmat, - matrix(nrow = nrow(groupmat), - ncol = ncol(groupmat))) - groupindex <- c(groupindex, vector("list", length(groupindex))) - } - gcount <- rep(0, length(sampleGroupNames)) - names(gcount) <- sampleGroupNames - gcount[names(tt)] <- as.numeric(tt) - groupmat[num, 1] <- median(curMat[gidx, "mz"]) - groupmat[num, 2:3] <- range(curMat[gidx, "mz"]) - groupmat[num, 4] <- median(curMat[gidx, "rt"]) - groupmat[num, 5:6] <- range(curMat[gidx, "rt"]) - groupmat[num, 7] <- length(gidx) - groupmat[num, 7 + seq(along = gcount)] <- gcount - groupindex[[num]] <- sort(peakOrder[(startIdx:endIdx)[gidx]]) - } - if (sleep > 0) { - ## Plot the density - plot(den, main = paste(round(min(curMat[,"mz"]), 2), "-", - round(max(curMat[,"mz"]), 2))) - ## Highlight peaks per sample group. - for (j in 1:nSampleGroups) { - ## Which peaks belong to this sample group. - cur_group_samples <- which(sampleGroups == sampleGroupNames[j]) - idx <- curMat[, "sample"] %in% cur_group_samples - points(curMat[idx, "rt"], curMat[idx, "into"] / - max(curMat[, "into"]) * maxden, - col = j, pch=20) - } - for (j in seq(length = snum)) - abline(v = groupmat[num - snum + j, 5:6], lty = "dashed", col = j) - Sys.sleep(sleep) - } + resL[[i]] <- .group_peaks_density(peaks[startIdx:endIdx, , drop = FALSE], + bw = bw, densFrom = densFrom, + densTo = densTo, densN = densN, + sampleGroups = sampleGroups, + sampleGroupTable = sampleGroupTable, + minFraction = minFraction, + minSamples = minSamples, + maxFeatures = maxFeatures, + sleep = sleep) } message("OK") - - colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", - "npeaks", sampleGroupNames) - - groupmat <- groupmat[seq_len(num), , drop = FALSE] - groupindex <- groupindex[seq_len(num)] - - ## Remove groups that overlap with more "well-behaved" groups - numsamp <- rowSums(groupmat[, (match("npeaks", - colnames(groupmat))+1):ncol(groupmat), - drop = FALSE]) - uorder <- order(-numsamp, groupmat[, "npeaks"]) - - uindex <- rectUnique(groupmat[, c("mzmin","mzmax","rtmin","rtmax"), - drop = FALSE], - uorder) - - return(list(featureDefinitions = groupmat[uindex, , drop = FALSE], - peakIndex = groupindex[uindex])) -} - -## Just to check if we could squeeze a little bit more out using parallel -## processing... -do_groupChromPeaks_density_par <- function(peaks, sampleGroups, - bw = 30, minFraction = 0.5, - minSamples = 1, binSize = 0.25, - maxFeatures = 50) { - if (missing(sampleGroups)) - stop("Parameter 'sampleGroups' is missing! This should be a vector of ", - "length equal to the number of samples specifying the group ", - "assignment of the samples.") - if (missing(peaks)) - stop("Parameter 'peaks' is missing!") - if (!is.matrix(peaks) | is.data.frame(peaks)) - stop("Peaks has to be a 'matrix' or a 'data.frame'!") - ## Check that we've got all required columns - .reqCols <- c("mz", "rt", "sample") - if (!all(.reqCols %in% colnames(peaks))) - stop("Required columns ", - paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", - collapse = ", "), " not found in 'peaks' parameter") - - sampleGroups <- as.character(sampleGroups) - sampleGroupNames <- unique(sampleGroups) - sampleGroupTable <- table(sampleGroups) - nSampleGroups <- length(sampleGroupTable) - - ## Order peaks matrix by mz - peakOrder <- order(peaks[, "mz"]) - peaks <- peaks[peakOrder, .reqCols, drop = FALSE] - rownames(peaks) <- NULL - rtRange <- range(peaks[, "rt"]) - - ## Define the mass slices and the index in the peaks matrix with an mz - ## value >= mass[i]. - mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, - by = binSize / 2) - masspos <- findEqualGreaterM(peaks[,"mz"], mass) - - groupmat <- matrix(nrow = 512, ncol = 7 + nSampleGroups) - groupindex <- vector("list", 512) - - ## Create the list of peak data subsets. - ftsL <- vector("list", length(mass)) - for (i in seq_len(length(mass) - 2)) { - startIdx <- masspos[i] - endIdx <- masspos[i + 2] - 1 - ftsL[[i]] <- cbind(peaks[startIdx:endIdx, , drop = FALSE], - idx = startIdx:endIdx) + res <- do.call(rbind, resL) + + if (nrow(res)) { + ## Remove groups that overlap with more "well-behaved" groups + numsamp <- rowSums( + as.matrix(res[, (match("npeaks", colnames(res)) +1):(ncol(res) -1), + drop = FALSE])) + uorder <- order(-numsamp, res[, "npeaks"]) + + uindex <- rectUnique( + as.matrix(res[, c("mzmin", "mzmax", "rtmin", "rtmax"), + drop = FALSE]), uorder) + res <- res[uindex, , drop = FALSE] + rownames(res) <- NULL } - ftsL <- ftsL[lengths(ftsL) > 0] - ## Here we can run bplapply: - res <- bplapply(ftsL, function(z, rtr, bw, maxF, sampleGrps, - sampleGroupTbl, minFr, minSmpls, - sampleGroupNms, peakOrdr) { - den <- density(z[, "rt"], bw = bw, from = rtr[1] - 3 * bw, - to = rtr[2] + 3 * bw, - n = max(512, 2^(ceiling(log2(diff(rtr) / (bw / 2)))))) - maxden <- max(den$y) - deny <- den$y - snum <- 0 - tmpL <- vector("list", maxF) - tmpL2 <- tmpL - while (deny[maxy <- which.max(deny)] > maxden / 20 && snum < maxF) { - grange <- xcms:::descendMin(deny, maxy) - deny[grange[1]:grange[2]] <- 0 - gidx <- which(z[,"rt"] >= den$x[grange[1]] & - z[,"rt"] <= den$x[grange[2]]) - ## Determine the sample group of the samples in which the peaks - ## were detected and check if they correspond to the required limits. - tt <- table(sampleGrps[unique(z[gidx, "sample"])]) - if (!any(tt / sampleGroupTbl[names(tt)] >= minFr & - tt >= minSmpls)) - next - snum <- snum + 1 - gcount <- rep(0, length(sampleGroupNms)) - names(gcount) <- sampleGroupNms - gcount[names(tt)] <- as.numeric(tt) - - tmpL[[snum]] <- c(median(z[gidx, "mz"]), - range(z[gidx, "mz"]), - median(z[gidx, "rt"]), - range(z[gidx, "rt"]), - length(gidx), - gcount) - tmpL2[[snum]] <- sort(peakOrdr[z[, "idx"][gidx]]) - } - tmpL <- tmpL[lengths(tmpL) > 0] - tmpL2 <- tmpL2[lengths(tmpL2) > 0] - if (length(tmpL)) - return(list(grps = do.call(rbind, tmpL), idx = tmpL2)) - }, rtr = rtRange, bw = bw, maxF = maxFeatures, sampleGrps = sampleGroups, - sampleGroupTbl = sampleGroupTable, minFr = minFraction, - minSmpls = minSamples, sampleGroupNms = sampleGroupNames, - peakOrdr = peakOrder) - - res <- res[lengths(res) > 0] - ## Now we have to process that list of results. - groupmat <- do.call(rbind, lapply(res, function(z) z[["grps"]])) - groupidx <- unlist(lapply(res, function(z) z[["idx"]]), recursive = FALSE) - - colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", - "npeaks", sampleGroupNames) - - ## groupmat <- groupmat[seq_len(num), , drop = FALSE] - ## groupindex <- groupindex[seq_len(num)] - - ## Remove groups that overlap with more "well-behaved" groups - numsamp <- rowSums(groupmat[, (match("npeaks", - colnames(groupmat))+1):ncol(groupmat), - drop = FALSE]) - uorder <- order(-numsamp, groupmat[, "npeaks"]) - - uindex <- rectUnique(groupmat[, c("mzmin","mzmax","rtmin","rtmax"), - drop = FALSE], - uorder) - - return(list(featureDefinitions = groupmat[uindex, , drop = FALSE], - peakIndex = groupidx[uindex])) + res } -##' @title Core API function for peak grouping using mzClust -##' -##' @description The \code{do_groupPeaks_mzClust} function performs high -##' resolution correspondence on single spectra samples. -##' -##' @inheritParams groupChromPeaks-density -##' @inheritParams do_groupChromPeaks_density -##' @inheritParams groupChromPeaks-mzClust -##' -##' @return A \code{list} with elements \code{"featureDefinitions"} and -##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row -##' representing an (mz-rt) feature (i.e. peak group) with columns: -##' \describe{ -##' \item{"mzmed"}{median of the peaks' apex mz values.} -##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -##' \item{"rtmed"}{always \code{-1}.} -##' \item{"rtmin"}{always \code{-1}.} -##' \item{"rtmax"}{always \code{-1}.} -##' \item{"npeaks"}{the total number of peaks assigned to the feature. -##' Note that this number can be larger than the total number of samples, since -##' multiple peaks from the same sample could be assigned to a group.} -##' } -##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a -##' peak group in the \code{peaks} input matrix. -##' -##' @family core peak grouping algorithms -##' -##' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill -##' and David F. Grant\cr \emph{Alignment of high resolution mass spectra: -##' development of a heuristic approach for metabolomics}.\cr Metabolomics, -##' Vol. 2, No. 2, 75-83 (2006) +#' @title Core API function for peak grouping using mzClust +#' +#' @description +#' +#' The `do_groupPeaks_mzClust` function performs high resolution +#' correspondence on single spectra samples. +#' +#' @inheritParams groupChromPeaks-density +#' +#' @inheritParams do_groupChromPeaks_density +#' +#' @inheritParams groupChromPeaks-mzClust +#' +#' @return A `list` with elements `"featureDefinitions"` and +#' `"peakIndex"`. `"featureDefinitions"` is a `matrix`, each row +#' representing an (mz-rt) feature (i.e. peak group) with columns: +#' - `"mzmed"`: median of the peaks' apex mz values. +#' - `"mzmin"`: smallest mz value of all peaks' apex within the feature. +#' - `"mzmax"`: largest mz value of all peaks' apex within the feature. +#' - `"rtmed"`: always `-1`. +#' - `"rtmin"`: always `-1`. +#' - `"rtmax"`: always `-1`. +#' - `"npeaks"`: the total number of peaks assigned to the feature. Note that +#' this number can be larger than the total number of samples, since +#' multiple peaks from the same sample could be assigned to a group. +#' +#' `"peakIndex"` is a `list` with the indices of all peaks in a peak group in +#' the `peaks` input matrix. +#' +#' @md +#' +#' @family core peak grouping algorithms +#' +#' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill +#' and David F. Grant\cr \emph{Alignment of high resolution mass spectra: +#' development of a heuristic approach for metabolomics}.\cr Metabolomics, +#' Vol. 2, No. 2, 75-83 (2006) do_groupPeaks_mzClust <- function(peaks, sampleGroups, ppm = 20, absMz = 0, minFraction = 0.5, minSamples = 1) { @@ -372,7 +212,7 @@ do_groupPeaks_mzClust <- function(peaks, sampleGroups, ppm = 20, "assignment of the samples.") if (missing(peaks)) stop("Parameter 'peaks' is missing!") - if (!is.matrix(peaks) | is.data.frame(peaks)) + if (!(is.matrix(peaks) | is.data.frame(peaks))) stop("Peaks has to be a 'matrix' or a 'data.frame'!") ## Check that we've got all required columns .reqCols <- c("mz", "sample") @@ -386,13 +226,12 @@ do_groupPeaks_mzClust <- function(peaks, sampleGroups, ppm = 20, sampleGroupTable <- table(sampleGroups) nSampleGroups <- length(sampleGroupTable) ##sampleGroups <- as.numeric(sampleGroups) - + ## Check that sample groups matches with sample column. if (max(peaks[, "sample"]) > length(sampleGroups)) stop("Sample indices in 'peaks' are larger than there are sample", " groups specified with 'sampleGroups'!") - - ##peaks <- peaks[, .reqCols, drop = FALSE] + peaks <- .fix_mz_clust_peaks(peaks) grps <- mzClustGeneric(peaks[, .reqCols, drop = FALSE], sampclass = sampleGroups, mzppm = ppm, @@ -411,40 +250,44 @@ do_groupPeaks_mzClust <- function(peaks, sampleGroups, ppm = 20, grpmat[, 4:ncol(grpmat), drop = FALSE]) colnames(grpmat) <- c(cns[1:3], c("rtmed", "rtmin", "rtmax"), cns[4:length(cns)]) - return(list(featureDefinitions = grpmat, peakIndex = grps$idx)) + return(list(featureDefinitions = grpmat, peakIndex = grps$idx)) } -##' @title Core API function for chromatic peak grouping using a nearest -##' neighbor approach -##' -##' @description The \code{do_groupChromPeaks_nearest} function groups peaks -##' across samples by creating a master peak list and assigning corresponding -##' peaks from all samples to each peak group (i.e. feature). The method is -##' inspired by the correspondence algorithm of mzMine [Katajamaa 2006]. -##' -##' @inheritParams do_groupChromPeaks_density -##' @inheritParams groupChromPeaks-nearest -##' -##' @return A \code{list} with elements \code{"featureDefinitions"} and -##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row -##' representing an (mz-rt) feature (i.e. peak group) with columns: -##' \describe{ -##' \item{"mzmed"}{median of the peaks' apex mz values.} -##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -##' \item{"rtmed"}{the median of the peaks' retention times.} -##' \item{"rtmin"}{the smallest retention time of the peaks in the feature.} -##' \item{"rtmax"}{the largest retention time of the peaks in the feature.} -##' \item{"npeaks"}{the total number of peaks assigned to the feature.} -##' } -##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a -##' feature in the \code{peaks} input matrix. -##' -##' @family core peak grouping algorithms -##' -##' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for -##' processing and visualization of mass spectrometry based molecular profile -##' data. \emph{Bioinformatics} 2006, 22:634-636. +#' @title Core API function for chromatic peak grouping using a nearest +#' neighbor approach +#' +#' @description +#' +#' The `do_groupChromPeaks_nearest` function groups peaks across samples by +#' creating a master peak list and assigning corresponding peaks from all +#' samples to each peak group (i.e. feature). The method is inspired by the +#' correspondence algorithm of mzMine (Katajamaa 2006). +#' +#' @inheritParams do_groupChromPeaks_density +#' @inheritParams groupChromPeaks-nearest +#' +#' @return A `list` with elements `"featureDefinitions"` and +#' `"peakIndex"`. `"featureDefinitions"` is a `matrix`, each row +#' representing an (mz-rt) feature (i.e. peak group) with columns: +#' +#' - `"mzmed"`: median of the peaks' apex mz values. +#' - `"mzmin"`: smallest mz value of all peaks' apex within the feature. +#' - `"mzmax"`:largest mz value of all peaks' apex within the feature. +#' - `"rtmed"`: the median of the peaks' retention times. +#' - `"rtmin"`: the smallest retention time of the peaks in the feature. +#' - `"rtmax"`: the largest retention time of the peaks in the feature. +#' - `"npeaks"`: the total number of peaks assigned to the feature. +#' +#' `"peakIndex"` is a `list` with the indices of all peaks in a feature in the +#' `peaks` input matrix. +#' +#' @family core peak grouping algorithms +#' +#' @md +#' +#' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for +#' processing and visualization of mass spectrometry based molecular profile +#' data. Bioinformatics 2006, 22:634-636. do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, absMz = 0.2, absRt = 15, kNN = 10) { if (missing(sampleGroups)) @@ -453,7 +296,7 @@ do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, "assignment of the samples.") if (missing(peaks)) stop("Parameter 'peaks' is missing!") - if (!is.matrix(peaks) | is.data.frame(peaks)) + if (!(is.matrix(peaks) | is.data.frame(peaks))) stop("Peaks has to be a 'matrix' or a 'data.frame'!") ## Check that we've got all required columns .reqCols <- c("mz", "rt", "sample") @@ -472,7 +315,7 @@ do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, ## peaks == peakmat peaks <- peaks[, .reqCols, drop = FALSE] - + parameters <- list(mzVsRTBalance = mzVsRtBalance, mzcheck = absMz, rtcheck = absRt, knn = kNN) @@ -514,7 +357,7 @@ do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, ) if (length(mplenv$peakIdxList$peakidx) == 0) message("Warning: No peaks in sample number ", sample) - + ## this really doesn't take a long time not worth parallel version here. ## but make an apply loop now faster even with rearranging the data :D : PB scoreList <- sapply(mplenv$peakIdxList$peakidx, @@ -553,7 +396,7 @@ do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, } } notJoinedPeaks <- mplenv$peakIdxList[which(mplenv$peakIdxList$isJoinedPeak == FALSE), "peakidx"] - + for (notJoinedPeak in notJoinedPeaks) { mplenv$mplist <- rbind(mplenv$mplist, matrix(0, 1, dim(mplenv$mplist)[2])) @@ -593,6 +436,109 @@ do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, ## groupmat[i, 7 + seq(along = gcount)] <- gcount groupindex[[i]] <- mplenv$mplist[i, (which(mplenv$mplist[i,]>0))] } - - return(list(featureDefinitions = groupmat, peakIndex = groupindex)) + + return(list(featureDefinitions = groupmat, peakIndex = groupindex)) +} + +#' Low level function to group chromatographic peaks within a m/z slice. +#' +#' @param x `matrix` such as the one returned by `chromPeaks,XCMSnExp`, just +#' with the peaks within one m/z slice. Note that we require in addition +#' a column `"index"` with the index of the peak within the full peak table. +#' +#' @param return `data.frame` +#' +#' @author Johannes Rainer +#' +#' @noRd +.group_peaks_density <- function(x, bw, densFrom, densTo, densN, sampleGroups, + sampleGroupTable, minFraction, + minSamples, maxFeatures, sleep = 0) { + den <- density(x[, "rt"], bw = bw, from = densFrom, to = densTo, + n = densN) + maxden <- max(den$y) + deny <- den$y + sampleGroupNames <- names(sampleGroupTable) + nSampleGroups <- length(sampleGroupNames) + col_nms <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", + "npeaks", sampleGroupNames) + res_mat <- matrix(nrow = 0, ncol = length(col_nms), + dimnames = list(character(), col_nms)) + res_idx <- list() + while (deny[maxy <- which.max(deny)] > maxden / 20 && nrow(res_mat) < + maxFeatures) { + grange <- descendMin(deny, maxy) + deny[grange[1]:grange[2]] <- 0 + gidx <- which(x[,"rt"] >= den$x[grange[1]] & + x[,"rt"] <= den$x[grange[2]]) + ## Determine the sample group of the samples in which the peaks + ## were detected and check if they correspond to the required limits. + tt <- table(sampleGroups[unique(x[gidx, "sample"])]) + if (!any(tt / sampleGroupTable[names(tt)] >= minFraction & + tt >= minSamples)) + next + gcount <- rep(0, length(sampleGroupNames)) + names(gcount) <- sampleGroupNames + gcount[names(tt)] <- as.numeric(tt) + res_mat <- rbind(res_mat, + c(median(x[gidx, "mz"]), + range(x[gidx, "mz"]), + median(x[gidx, "rt"]), + range(x[gidx, "rt"]), + length(gidx), + gcount) + ) + res_idx <- c(res_idx, list(unname(sort(x[gidx, "index"])))) + } + if (sleep > 0) { + ## Plot the density + plot(den, main = paste(round(min(x[,"mz"]), 2), "-", + round(max(x[,"mz"]), 2))) + ## Highlight peaks per sample group. + for (j in seq_len(nSampleGroups)) { + ## Which peaks belong to this sample group. + cur_group_samples <- which(sampleGroups == sampleGroupNames[j]) + idx <- x[, "sample"] %in% cur_group_samples + points(x[idx, "rt"], x[idx, "into"] / + max(x[, "into"]) * maxden, + col = j, pch=20) + } + for (j in seq_len(nrow(res_mat))) + abline(v = res_mat[j, 5:6], lty = "dashed", col = j) + Sys.sleep(sleep) + } + res <- as.data.frame(res_mat) + res$peakidx <- res_idx + res +} + +#' @description +#' +#' Check the input peaks table eventually replacing `NA` values in column `"mz"` +#' with the mean of columns `"mzmin"` and `"mzmax"` (if present). +#' This fixes issue #416. +#' +#' @param x peaks `matrix`. +#' +#' @return peaks `matrix` +#' +#' @noRd +#' +#' @md +.fix_mz_clust_peaks <- function(x) { + ## Issue #416: fix for peaks with an m/z of NA. + nas <- is.na(x[, "mz"]) + if (any(nas)) { + ## if we have mzmin and mzmax use mean of them. + if (all(c("mzmin", "mzmax") %in% colnames(x)) && + !any(is.na(x[nas, c("mzmin", "mzmax")]))) { + warning("Got ", sum(nas), " peaks with missing values in column ", + "'mz'. Replaced them with the mean of values in columns ", + "'mzmin' and 'mzmax' values.") + x[nas, "mz"] <- rowMeans(x[nas, c("mzmin", "mzmax")]) + } else { + stop("Got ", sum(nas), " peaks with missing values in column 'mz'.") + } + } + x } diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R new file mode 100644 index 000000000..efd02f196 --- /dev/null +++ b/R/functions-Chromatogram.R @@ -0,0 +1,157 @@ +#' @title Merge neighboring peaks in chromatogram +#' +#' @description +#' +#' Peak detection sometimes fails to identify a chromatographic peak correctly, +#' especially for broad peaks and if the peak shape is irregular (mostly for +#' HILIC data). In such cases several smaller peaks are reported. This function +#' tries to combine such peaks again considering their distance in retention +#' time dimension and the measured intensity between them. +#' +#' In detail, the function evaluates if the peaks are close enough, i.e. checks +#' if the difference between the `"rtmax"` and `"rtmin"` of consecutive peaks +#' is smaller than `diffRt`. If so, the average intensity of the 3 data points +#' at half way between them is calculated. If this average is larger than +#' `minProp` of the smaller `"maxo"` value of both, the peaks are merged. +#' In other words, if the (average) intensity between the two peaks is larger +#' than `minProp` times the smaller maximal peak intensity of both peaks, they +#' are joined. Note that peaks are **not** joined if all 3 data points in the +#' middle between the peaks are `NA`. +#' The joined peaks get the `"mz"`, `"rt"`, `"sn"` and `"maxo"` values from +#' the peak with the largest signal (`"maxo"`) as well as its row in the +#' *chrom peak data* `pkd`. The `"rtmin"`, `"rtmax"` are updated and +#' `"into"` is recalculated based on all the signal between `"rtmin"` and +#' `"rtmax"` of the new merged peak. The smallest and largest m/z value of all +#' data points within the provided extracted ion chromatogram is used as +#' `"mzmin"` and `"mzmax`" of the merged peak. +#' +#' Note that the `"maxo"` of the merged peak is updated (the maximum of the two +#' merged peaks is used) and used in any further merging. If for example two +#' peaks were merged, the maxo of these merged peak is used in the evaluation +#' of any additional peak that should be merged. +#' +#' @param x `Chromatogram` object with the extracted ion chromatogram containing +#' the signal for the `pks`. +#' +#' @param pks `matrix` representing a peaks matrix. Columns `"rtmin"`, +#' `"rtmax"` and `"maxo"` are required. It is supposed that these peaks are +#' close enough on retention time to potentially represent signal from the +#' same compound. +#' +#' @param pkd `DataFrame` representing the peak data (as returned by the +#' [chromPeakData()] function. +#' +#' @param minProp `numeric(1)` representing the proportion of intensity to +#' be required for peaks to be joined. See description for more details. +#' +#' @param diffRt `numeric(1)` representing the maximal difference between +#' `"rtmax"` and `"rtmin"` of consecutive peaks to be considered for +#' merging. +#' +#' @return `list` with element `"chromPeaks"`, that contains the peaks `matrix` +#' containing newly merged peaks and original peaks if they could not be +#' merged and `"chromPeakData"` that represents the `DataFrame` with the +#' corresponding metadata information. The merged peaks will have a row +#' name of `NA`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +#' +#' @examples +#' +#' xd <- readMSData(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' mode = "onDisk") +#' chr <- chromatogram(xd, mz = c(-0.5, 0.5) + 453.2) +#' xchr <- findChromPeaks(chr, param = CentWaveParam(snthresh = 0)) +#' plot(xchr) +#' +#' res <- .chrom_merge_neighboring_peaks(chr[[1]], chromPeaks(xchr[[1]]), +#' chromPeakData(xchr[[1]]), diffRt = 2) +#' res +#' rect(res[, "rtmin"], 0, res[, "rtmax"], res[, "maxo"], border = "red") +.chrom_merge_neighboring_peaks <- function(x, pks, pkd, minProp = 0.75, + diffRt = 0) { + if (nrow(pks) < 2 || all(is.na(intensity(x)))) + return(list(chromPeaks = pks, chromPeakData = pkd)) + ## x <- clean(x, all = TRUE) + idx <- order(pks[, "rtmin"]) + pks <- pks[idx, , drop = FALSE] + pkd <- extractROWS(pkd, idx) + cns <- colnames(pks) + if (!any(cns == "mz")) + pks <- cbind(pks, mz = NA_real_, mzmin = NA_real_, mzmax = NA_real_) + if (is.null(rownames(pks))) + rownames(pks) <- rownames(pkd) <- seq_len(nrow(pks)) + pks_new <- pks + pks_new[ , ] <- NA_real_ + rownames(pks_new) <- rep(NA_character_, nrow(pks)) + pks_new[1, ] <- pks[1, ] + rownames(pks_new)[1] <- rownames(pks)[1] + current_peak <- 1 # point always to the current *new* (merged) peak. + drop_cols <- !(colnames(pks_new) %in% c("mz", "mzmin", "mzmax", "rt", + "rtmin", "rtmax", "into", + "maxo", "sn", "sample")) + for (i in 2:nrow(pks)) { + if ((pks[i, "rtmin"] - pks_new[current_peak, "rtmax"]) < diffRt) { + ## skip if second peak contained within first + if (pks[i, "rtmin"] >= pks_new[current_peak, "rtmin"] & + pks[i, "rtmax"] <= pks_new[current_peak, "rtmax"]) + next + rt_mid <- (pks[i, "rtmin"] + pks_new[current_peak, "rtmax"]) / 2 + ## If rt_mid is NOT between the peaks, take the midpoint between + ## the apexes instead. + apexes <- range(c(pks[i, "rt"], pks[current_peak, "rt"])) + if (rt_mid < apexes[1] || rt_mid > apexes[2]) + rt_mid <- sum(apexes) / 2 + ## Calculate the mean of the 3 data points closest to rt_mid. Skip + ## if all of them are `NA`. + mid_vals <- intensity(x)[order(abs(rtime(x) - rt_mid))[1:3]] + if (!all(is.na(mid_vals)) && + mean(mid_vals, na.rm = TRUE) > + min(pks_new[current_peak, "maxo"], pks[i, "maxo"]) * minProp) { + rownames(pks_new)[current_peak] <- NA_character_ + pks_new[current_peak, drop_cols] <- NA_real_ + if (pks[i, "rtmax"] > pks_new[current_peak, "rtmax"]) + pks_new[current_peak, "rtmax"] <- pks[i, "rtmax"] + if (!is.na(pks[i, "mzmin"])) { + ## Use the mzmin and mzmax of the actual EIC - since we are + ## also integrating the intensities from there. + pks_new[current_peak, c("mzmin", "mzmax")] <- + mz(x, filter = FALSE) + } + idx_min <- which.min( + abs(rtime(x) - pks_new[current_peak, "rtmin"])) + idx_max <- which.min( + abs(rtime(x) - pks_new[current_peak, "rtmax"])) + peak_width <- (pks_new[current_peak, "rtmax"] - + pks_new[current_peak, "rtmin"]) / (idx_max - idx_min) + ## Calculate into as done in centWave. + pks_new[current_peak, "into"] <- + sum(intensity(x)[rtime(x) >= pks_new[current_peak, "rtmin"] & + rtime(x) <= pks_new[current_peak, "rtmax"]], + na.rm = TRUE) * peak_width + if (pks[i, "maxo"] > pks_new[current_peak, "maxo"]) { + pks_new[current_peak, c("mz", "rt", "maxo", "sn")] <- + pks[i, c("mz", "rt", "maxo", "sn")] + pkd[current_peak, ] <- extractROWS(pkd, i) # replace peak data with new + } + } else { + current_peak <- current_peak + 1 + pks_new[current_peak, ] <- pks[i, ] + rownames(pks_new)[current_peak] <- rownames(pks)[i] + pkd[current_peak, ] <- extractROWS(pkd, i) + } + } else { + current_peak <- current_peak + 1 + pks_new[current_peak, ] <- pks[i, ] + rownames(pks_new)[current_peak] <- rownames(pks)[i] + pkd[current_peak, ] <- extractROWS(pkd, i) + } + } + keep <- which(!is.na(pks_new[, "rt"])) + list(chromPeaks = pks_new[keep, cns, drop = FALSE], + chromPeakData = extractROWS(pkd, keep)) +} diff --git a/R/functions-IO.R b/R/functions-IO.R index befb32ba6..588919ced 100644 --- a/R/functions-IO.R +++ b/R/functions-IO.R @@ -30,7 +30,7 @@ readRawData <- function(x, includeMSn = FALSE, dropEmptyScans = TRUE, ## def_backend <- "Ramp" ## Eventually use pwiz... header_cols <- c("retentionTime", "acquisitionNum", "totIonCurrent") msd <- mzR::openMSfile(x, backend = backend) - on.exit(if(!is.null(msd)) mzR::close(msd)) + on.exit(mzR::close(msd)) ## That's due to issue https://github.com/lgatto/MSnbase/issues/151 on.exit(rm(msd), add = TRUE) on.exit(gc(), add = TRUE) @@ -101,8 +101,5 @@ readRawData <- function(x, includeMSn = FALSE, dropEmptyScans = TRUE, warning("MSn spectra requested but none present in the file.") } } - mzR::close(msd) - mzR <- NULL - gc() resList } diff --git a/R/functions-MsFeatureData.R b/R/functions-MsFeatureData.R index 8c7094cc7..427908255 100644 --- a/R/functions-MsFeatureData.R +++ b/R/functions-MsFeatureData.R @@ -3,9 +3,9 @@ #' Validates a 'chromPeaks' matrix or data.frame and ensures that it contains all #' required columns and that all columns are of numeric data type. -#' +#' #' @return \code{TRUE} or a \code{character} with the error message. -#' +#' #' @noRd .validChromPeaksMatrix <- function(x) { msg <- character() @@ -23,28 +23,54 @@ is.numeric) if (any(!typeOK)) return(paste0("Values in column(s) ", - paste0("'", names(typeOK)[!typeOK], "'", collapse = ", ")), - " of the 'chromPeaks' matrix are not numeric!") + paste0("'", names(typeOK)[!typeOK], "'", + collapse = ", "), + " of the 'chromPeaks' matrix are not numeric!")) } return(TRUE) } +.validChromPeakData <- function(x) { + msg <- character() + if (!inherits(x$chromPeakData, "DataFrame")) + return("'chromPeakData' is supposed to be a 'DataFrame'") + if (!is.null(x$chromPeaks)) { + if (nrow(x$chromPeakData) != nrow(x$chromPeaks)) { + msg <- "number of rows of chromPeaks and chromPeakData does not match" + } else if (any(rownames(x$chromPeakData) != rownames(x$chromPeaks))) + msg <- "rownames differ between 'chromPeaks' and 'chromPeakData'" + req_cols <- .CHROMPEAKDATA_REQ_NAMES + if (!all(req_cols %in% colnames(x$chromPeakData))) + msg <- c(msg, paste0("one or more required columns (", + paste0(req_cols, collapse = ", "), + ") are missing")) + else { + if (!is.integer(x$chromPeakData$ms_level)) + msg <- c(msg, paste0("column 'ms_level' should contain only ", + "integer values")) + if (!is.logical(x$chromPeakData$is_filled)) + msg <- c(msg, paste0("column 'is_filled' should contain only ", + "logical values")) + } + } else msg <- "'chromPeakData' present but 'chromPeaks' is missing" + msg +} #' @description Performs a validation check of all elements within the object: #' 1) Allowed are: chromPeaks (matrix), featureDefinitions (DataFrame) and #' adjustedRtime (list). -#' +#' #' @author Johannes Rainer -#' +#' #' @return \code{TRUE} if object is valid, or a message with the error message. -#' +#' #' @noRd validateMsFeatureData <- function(x) { msg <- character() ks <- ls(x) if (length(ks)) { validKeys <- ks %in% c("chromPeaks", "featureDefinitions", - "adjustedRtime") + "adjustedRtime", "chromPeakData") if (!all(validKeys)) { msg <- c(msg, paste0("Only elements named 'chromPeaks', ", "'featureDefinitions' and 'adjustedRtime' ", @@ -57,6 +83,8 @@ validateMsFeatureData <- function(x) { if (is.character(OK)) msg <- c(msg, OK) } + if (.has_chrom_peak_data(x)) + msg <- c(msg, .validChromPeakData(x)) haveFGs <- any(ks == "featureDefinitions") if (haveFGs) { if (is(x$featureDefinitions, "DataFrame")) { @@ -123,58 +151,62 @@ validateMsFeatureData <- function(x) { } } } - ## if (length(msg) == 0) - ## return(TRUE) - ## else return(msg) - return(msg) + msg } #' @description Filter chromPeaks and sync them with with the present -#' filterGroups, i.e. update their peakidx column or remove them. +#' featureDefinitions, i.e. update their peakidx column or remove them. #' #' @param x A \code{MsFeatureData} or an \code{XCMSnExp} object. -#' +#' #' @param idx \code{numeric} with the indices of the chromatographic peaks to #' keep. #' #' @return A \code{MsFeatureData}. -#' +#' #' @author Johannes Rainer -#' +#' #' @noRd .filterChromPeaks <- function(x, idx) { if (missing(idx)) return(x) if (!hasChromPeaks(x)) return(x) - fts <- chromPeaks(x) - idx <- sort(idx) - if (!all(idx %in% 1:nrow(fts))) - stop("All indices in 'idx' have to be within 1 and nrow of the peak", - " matrix.") new_e <- new("MsFeatureData") - chromPeaks(new_e) <- fts[idx, , drop = FALSE] + if (!length(idx)) + return(new_e) + pks <- chromPeaks(x) + idx <- sort(idx) + ## if (!all(idx %in% 1:nrow(pks))) + ## stop("All indices in 'idx' have to be within 1 and nrow of the peak", + ## " matrix.") + chromPeaks(new_e) <- pks[idx, , drop = FALSE] + if (.has_chrom_peak_data(x)) + chromPeakData(new_e) <- extractROWS(chromPeakData(x), idx) if (hasFeatures(x)) { - af <- featureDefinitions(x) - af <- split(af, 1:nrow(af)) - afL <- lapply(af, function(z) { - if(all(z$peakidx[[1]] %in% idx)) { - z$peakidx <- list(match(z$peakidx[[1]], idx)) - return(z) - } else { - return(NULL) - } - }) - af <- do.call(rbind, afL) - if (length(af) > 0) - featureDefinitions(new_e) <- af + if (nrow(chromPeaks(new_e)) != nrow(chromPeaks(x))) + featureDefinitions(new_e) <- .update_feature_definitions( + featureDefinitions(x), rownames(chromPeaks(x)), + rownames(chromPeaks(new_e))) + else featureDefinitions(new_e) <- featureDefinitions(x) + if (nrow(featureDefinitions(new_e)) == 0) + rm(list = "featureDefinitions", envir = new_e) } if (hasAdjustedRtime(x)) { if (is(x, "XCMSnExp")) adjustedRtime(new_e) <- adjustedRtime(x, bySample = TRUE) else adjustedRtime(new_e) <- adjustedRtime(x) - } - return(new_e) + new_e +} + +.has_chrom_peak_data <- function(x) { + if (is(x, "XCMSnExp")) + x <- x@msFeatureData + !is.null(x$chromPeakData) +} + +.chrom_peak_data <- function(x) { + x$chromPeakData } diff --git a/R/functions-OnDiskMSnExp.R b/R/functions-OnDiskMSnExp.R index 5abf044d0..5ea881f6f 100644 --- a/R/functions-OnDiskMSnExp.R +++ b/R/functions-OnDiskMSnExp.R @@ -3,29 +3,29 @@ #' @param x an OnDiskMSnExp representing the whole experiment. -#' +#' #' @param method The (chromatographic) peak detection method to be used. Can be #' "centWave" etc. -#' +#' #' @param param A class extending Param containing all parameters for the #' peak detection method. #' #' @return a list of length 2, \code{peaks} containing a matrix with the #' identified peaks and \code{date} the time stamp when the peak detection #' was started. -#' +#' #' @noRd findChromPeaks_OnDiskMSnExp <- function(object, method = "centWave", param) { + require("xcms", quietly = TRUE, character.only = TRUE) if (missing(param)) stop("'param' has to be specified!") ## pass the spectra to the _Spectrum_list function ## Since we're calling this function already with bplapply ensure that ## the spectra call is not firing its own parallel processing! - findChromPeaks_Spectrum_list(x = spectra(object, - BPPARAM = SerialParam()), - method = method, - param = param, rt = rtime(object)) + findChromPeaks_Spectrum_list(x = spectra(object, BPPARAM = SerialParam()), + method = method, param = param, + rt = rtime(object)) } @@ -33,19 +33,19 @@ findChromPeaks_OnDiskMSnExp <- function(object, method = "centWave", #' file #' #' @param x A list of Spectrum1 objects of a sample. -#' +#' #' @param method The peak detection method to be used. Can be "centWave" etc. #' #' @param param A class extending Param containing all parameters for the #' peak detection method. -#' +#' #' @param rt Numeric with the retention times for the spectra. If not provided #' it is extracted from the spectra. -#' +#' #' @return a list of length 2, \code{peaks} containing a matrix with the #' identified peaks and \code{date} the time stamp when the peak detection #' was started. -#' +#' #' @author Johannes Rainer #' #' @noRd @@ -53,17 +53,29 @@ findChromPeaks_Spectrum_list <- function(x, method = "centWave", param, rt) { method <- match.arg(method, c("centWave", "massifquant", "matchedFilter", "MSW", "centWaveWithPredIsoROIs")) method <- paste0("do_findChromPeaks_", method) - if (method == "MSW") - method <- paste0("do_findPeaks_", method) + if (method == "do_findChromPeaks_MSW") + method <- "do_findPeaks_MSW" + if (method == "do_findChromPeaks_matchedFilter") { + ## Issue #325: empty spectra is not supported + x <- lapply(x, function(z) { + if (!length(z@mz)) { + z@mz <- 0.0 + z@intensity <- 0.0 + } + z + }) + } if (missing(param)) stop("'param' has to be specified!") - ## Check if the spectra are orderd by rt. if (missing(rt)) rt <- unlist(lapply(x, rtime), use.names = FALSE) if (is.unsorted(rt)) stop("Spectra are not ordered by retention time!") mzs <- lapply(x, mz) vals_per_spect <- lengths(mzs, FALSE) + if (any(vals_per_spect == 0)) + warning("Found empty spectra. Please run 'filterEmptySpectra' first.", + call. = FALSE) procDat <- date() res <- do.call( method, args = c(list(mz = unlist(mzs, use.names = FALSE), @@ -82,6 +94,7 @@ findChromPeaks_Spectrum_list <- function(x, method = "centWave", param, rt) { ## That's a special case since we don't expect to have rt available for this. findPeaks_MSW_OnDiskMSnExp <- function(object, method = "MSW", param) { + require("xcms", quietly = TRUE, character.only = TRUE) if (missing(param)) stop("'param' has to be specified!") ## pass the spectra to the _Spectrum_list function @@ -102,7 +115,170 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { as(param, "list"))), date = procDat) } +#' Fast way to split an `XCMSnExp` object by file: +#' - subsets the object to MS level specified with `msLevel`. +#' - subsets feature data to the smallest possible set of columns (if +#' `selectFeatureData = TRUE`. +#' - returns by default an `OnDiskMSnExp`, unless `to_class = "XCMSnExp"`, in +#' which case also potentially present chromatographic peaks are preserved. +#' +#' @param keep_sample_idx if column "sample" should be kept as it is. +#' +#' @note +#' +#' This function needs a considerable amount of memory if +#' `to_class = "XCMSnExp"` because, for efficiency reasons, it first splits +#' the `chromPeaks` and `chromPeakData` per file. +#' +#' @noRd +.split_by_file <- function(x, msLevel. = unique(msLevel(x)), + subsetFeatureData = TRUE, + to_class = "OnDiskMSnExp", + keep_sample_idx = FALSE) { + if (is(x, "XCMSnExp") && hasAdjustedRtime(x)) + x@featureData$retentionTime <- adjustedRtime(x) + if (subsetFeatureData) { + fcs <- intersect(c(MSnbase:::.MSnExpReqFvarLabels, "centroided", + "polarity", "seqNum"), colnames(.fdata(x))) + x <- selectFeatureData(x, fcol = fcs) + } + procd <- x@processingData + expd <- new( + "MIAPE", + instrumentManufacturer = x@experimentData@instrumentManufacturer[1], + instrumentModel = x@experimentData@instrumentModel[1], + ionSource = x@experimentData@ionSource[1], + analyser = x@experimentData@analyser[1], + detectorType = x@experimentData@detectorType[1]) + create_object <- function(x, i, to_class) { + a <- new(to_class) + slot(procd, "files", check = FALSE) <- x@processingData@files[i] + slot(a, "processingData", check = FALSE) <- procd + slot(a, "featureData", check = FALSE) <- extractROWS( + x@featureData, which(x@featureData$msLevel %in% msLevel. & + x@featureData$fileIdx == i)) + if (!nrow(a@featureData)) + stop("No MS level ", msLevel., " spectra present.", call. = FALSE) + a@featureData$fileIdx <- 1L + slot(a, "experimentData", check = FALSE) <- expd + slot(a, "spectraProcessingQueue", check = FALSE) <- + x@spectraProcessingQueue + slot(a, "phenoData", check = FALSE) <- x@phenoData[i, , drop = FALSE] + a + } + if (to_class == "XCMSnExp" && is(x, "XCMSnExp") && hasChromPeaks(x)) { + if (any(colnames(.chrom_peak_data(x@msFeatureData)) == "ms_level")) { + pk_idx <- which( + .chrom_peak_data(x@msFeatureData)$ms_level %in% msLevel.) + } else pk_idx <- seq_len(nrow(chromPeaks(x@msFeatureData))) + fct <- as.factor( + as.integer(chromPeaks(x@msFeatureData)[pk_idx, "sample"])) + pksl <- split.data.frame( + chromPeaks(x@msFeatureData)[pk_idx, , drop = FALSE], fct) + pkdl <- split.data.frame( + extractROWS(.chrom_peak_data(x@msFeatureData), pk_idx), fct) + res <- vector("list", length(fileNames(x))) + for (i in seq_along(res)) { + a <- create_object(x, i, to_class) + newFd <- new("MsFeatureData") + pks <- pksl[[as.character(i)]] + if (!is.null(pks) && nrow(pks)) { + if (!keep_sample_idx) + pks[, "sample"] <- 1 + chromPeaks(newFd) <- pks + chromPeakData(newFd) <- pkdl[[as.character(i)]] + } else { + chromPeaks(newFd) <- chromPeaks(x@msFeatureData)[0, ] + chromPeakData(newFd) <- .chrom_peak_data(x@msFeatureData)[0, ] + } + lockEnvironment(newFd, bindings = TRUE) + slot(a, "msFeatureData", check = FALSE) <- newFd + res[[i]] <- a + } + res + } else { + lapply(seq_along(fileNames(x)), function(z) { + create_object(x, z, to_class) + }) + } +} +#' Same as `.split_by_file` but *faster* because it splits the chrom peaks +#' matrix too - and requires thus more memory. +#' +#' @author Johannes Rainer +#' +#' @noRd +.split_by_file2 <- function(x, msLevel. = unique(msLevel(x)), + subsetFeatureData = FALSE, + to_class = "OnDiskMSnExp", + keep_sample_idx = FALSE) { + if (is(x, "XCMSnExp") && hasAdjustedRtime(x)) + x@featureData$retentionTime <- adjustedRtime(x) + if (subsetFeatureData) { + fcs <- intersect(c(MSnbase:::.MSnExpReqFvarLabels, "centroided", + "polarity", "seqNum"), colnames(.fdata(x))) + x <- selectFeatureData(x, fcol = fcs) + } + fdl <- split.data.frame(x@featureData, as.factor(fromFile(x))) + procd <- x@processingData + expd <- new( + "MIAPE", + instrumentManufacturer = x@experimentData@instrumentManufacturer[1], + instrumentModel = x@experimentData@instrumentModel[1], + ionSource = x@experimentData@ionSource[1], + analyser = x@experimentData@analyser[1], + detectorType = x@experimentData@detectorType[1]) + create_object <- function(i, fd, x, to_class) { + a <- new(to_class) + slot(procd, "files", check = FALSE) <- x@processingData@files[i] + slot(a, "processingData", check = FALSE) <- procd + slot(a, "featureData", check = FALSE) <- + extractROWS(fd, which(fd$msLevel %in% msLevel.)) + if (!nrow(a@featureData)) + warning("No MS level ", msLevel., " spectra present in file ", + basename(x@processingData@files[i]), call. = FALSE) + else a@featureData$fileIdx <- 1L + slot(a, "experimentData", check = FALSE) <- expd + slot(a, "spectraProcessingQueue", check = FALSE) <- + x@spectraProcessingQueue + slot(a, "phenoData", check = FALSE) <- x@phenoData[i, , drop = FALSE] + a + } + if (to_class == "XCMSnExp" && is(x, "XCMSnExp") && hasChromPeaks(x)) { + if (any(colnames(.chrom_peak_data(x@msFeatureData)) == "ms_level")) { + pk_idx <- which( + .chrom_peak_data(x@msFeatureData)$ms_level %in% msLevel.) + } else pk_idx <- seq_len(nrow(chromPeaks(x@msFeatureData))) + fct <- as.factor( + as.integer(chromPeaks(x@msFeatureData)[pk_idx, "sample"])) + pksl <- split.data.frame( + chromPeaks(x@msFeatureData)[pk_idx, , drop = FALSE], fct) + pkdl <- split.data.frame( + extractROWS(.chrom_peak_data(x@msFeatureData), pk_idx), fct) + res <- vector("list", length(fileNames(x))) + for (i in seq_along(res)) { + a <- create_object(i, fdl[[i]], x, to_class) + newFd <- new("MsFeatureData") + pks <- pksl[[as.character(i)]] + if (!is.null(pks) && nrow(pks)) { + if (!keep_sample_idx) + pks[, "sample"] <- 1 + chromPeaks(newFd) <- pks + chromPeakData(newFd) <- pkdl[[as.character(i)]] + } else { + chromPeaks(newFd) <- chromPeaks(x@msFeatureData)[0, ] + chromPeakData(newFd) <- .chrom_peak_data(x@msFeatureData)[0, ] + } + lockEnvironment(newFd, bindings = TRUE) + slot(a, "msFeatureData", check = FALSE) <- newFd + res[[i]] <- a + } + res + } else + mapply(seq_along(fileNames(x)), fdl, FUN = create_object, + MoreArgs = list(x = x, to_class = to_class)) +} ############################################################ @@ -114,7 +290,7 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { #' o mslevel #' o scanrange: this should enable to subset the raw data again by scan index #' (which should be equivalent to acquisitionNum/scanIndex) -#' +#' #' @param pset The pSet from which data should be extracted #' #' @noRd @@ -123,7 +299,7 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { filepaths(object) <- fileNames(pset) phenoData(object) <- pData(pset) ## rt - rt <- split(unname(rtime(pset)), f = fromFile(pset)) + rt <- split(unname(rtime(pset)), f = as.factor(fromFile(pset))) object@rt <- list(raw = rt, corrected = rt) ## mslevel mslevel(object) <- unique(msLevel(pset)) @@ -141,7 +317,7 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { #' list with two elements: \code{$peaks} the peaks matrix of identified #' peaks and \code{$procHist} a list of ProcessHistory objects (empty if #' \code{getProcHist = FALSE}). -#' +#' #' @param x See description above. #' #' @param getProcHist Wheter ProcessHistory objects should be returned too. @@ -186,38 +362,43 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { #' #' @note Adjustment should be performed only on spectra from the same MS level! #' It's up to the calling function to ensure that. -#' +#' #' @param object An \code{OnDiskMSnExp}. #' #' @param param An \code{ObiwarpParam}. #' #' @param msLevel \code{integer} defining the MS level on which the adjustment #' should be performed. -#' +#' #' @return The function returns a \code{list} of adjusted retention times #' grouped by file. -#' +#' #' @noRd .obiwarp <- function(object, param) { if (missing(object)) stop("'object' is mandatory!") if (missing(param)) - param <- ObiwarpParam() - nSamples <- length(fileNames(object)) + param <- ObiwarpParam() + subs <- subset(param) + if (!length(subs)) + subs <- seq_along(fileNames(object)) + total_samples <- length(fileNames(object)) + nSamples <- length(subs) if (nSamples <= 1) - stop("Can not perform a retention time correction on less than to", + stop("Can not perform a retention time correction on less than two", " files.") - + ## centerSample if (length(centerSample(param))) { if (!(centerSample(param) %in% 1:nSamples)) stop("'centerSample' has to be a single integer between 1 and ", nSamples, "!") - } else { + } else centerSample(param) <- floor(median(1:nSamples)) - } message("Sample number ", centerSample(param), " used as center sample.") + rtraw <- split(rtime(object), as.factor(fromFile(object))) + object <- filterFile(object, file = subs) ## Get the profile matrix of the center sample: ## Using the (hidden) parameter returnBreaks to return also the breaks of ## the bins of the profile matrix. I can use them to align the matrices @@ -225,12 +406,13 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { ## NOTE: it might be event better to just re-use the breaks from the center ## sample for the profile matrix generation of all following samples. suppressMessages( - profCtr <- profMat(object, method = "bin", step = binSize(param), + profCtr <- profMat(object, method = "bin", + step = binSize(param), fileIndex = centerSample(param), returnBreaks = TRUE)[[1]] ) ## Now split the object by file - objL <- splitByFile(object, f = factor(seq_len(nSamples))) + objL <- .split_by_file2(object, msLevel. = 1) objL <- objL[-centerSample(param)] centerObject <- filterFile(object, file = centerSample(param)) ## Now we can bplapply here! @@ -246,30 +428,32 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { ## 1)Check the scan times of both objects: scantime1 <- unname(rtime(cntr)) scantime2 <- unname(rtime(z)) + scantime1_diff <- diff(scantime1) + scantime2_diff <- diff(scantime2) ## median difference between spectras' scan time. - mstdiff <- median(c(diff(scantime1), diff(scantime2))) + mstdiff <- median(c(scantime1_diff, scantime2_diff), na.rm = TRUE) ## rtup1 <- seq_along(scantime1) ## rtup2 <- seq_along(scantime2) - mst1 <- which(diff(scantime1) > 5 * mstdiff)[1] + mst1 <- which(scantime1_diff > 5 * mstdiff)[1] if (!is.na(mst1)) { - scantime1 <- scantime1[seq_len((mst1 - 1))] message("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds.") + scantime1 <- scantime1[seq_len(max(2, (mst1 - 1)))] } - mst2 <- which(diff(scantime2) > 5 * mstdiff)[1] - if(!is.na(mst2)) { - scantime2 <- scantime2[seq_len((mst2 - 1))] + mst2 <- which(scantime2_diff > 5 * mstdiff)[1] + if (!is.na(mst2)) { message("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds.") + scantime2 <- scantime2[seq_len(max(2, (mst2 - 1)))] } ## Drift of measured scan times - expected to be largest at the end. rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)], scantime2[length(scantime2)]))) ## If the drift is larger than the threshold, cut the matrix up to the ## max allowed difference. - if(rtmaxdiff > (5 * mstdiff)){ + if (rtmaxdiff > (5 * mstdiff)) { rtmax <- min(scantime1[length(scantime1)], scantime2[length(scantime2)]) scantime1 <- scantime1[scantime1 <= rtmax] @@ -277,14 +461,33 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { } valscantime1 <- length(scantime1) valscantime2 <- length(scantime2) - ## Finally, restrict the profile matrix to columns 1:valscantime - if (ncol(cntrPr$profMat) > valscantime1) { - cntrPr$profMat <- cntrPr$profMat[, -c((valscantime1 + 1): - ncol(cntrPr$profMat))] + ## Ensure we have the same number of scans. + if (valscantime1 != valscantime2) { + min_number <- min(valscantime1, valscantime2) + diffs <- abs(range(scantime1) - range(scantime2)) + ## Cut at the start or at the end, depending on where we have the + ## larger difference + if (diffs[2] > diffs[1]) { + scantime1 <- scantime1[1:min_number] + scantime2 <- scantime2[1:min_number] + } else { + scantime1 <- rev(rev(scantime1)[1:min_number]) + scantime2 <- rev(rev(scantime2)[1:min_number]) + } + valscantime1 <- length(scantime1) + valscantime2 <- length(scantime2) + } + ## Finally, restrict the profile matrix to the restricted data + if (ncol(cntrPr$profMat) != valscantime1) { + ## Find out whether we were cutting at the start or end. + start_idx <- which(scantime1[1] == rtime(cntr)) + end_idx <- which(scantime1[length(scantime1)] == rtime(cntr)) + cntrPr$profMat <- cntrPr$profMat[, start_idx:end_idx] } - if(ncol(curP$profMat) > valscantime2) { - curP$profMat <- curP$profMat[, -c((valscantime2 + 1): - ncol(curP$profMat))] + if(ncol(curP$profMat) != valscantime2) { + start_idx <- which(scantime2[1] == rtime(z)) + end_idx <- which(scantime2[length(scantime2)] == rtime(z)) + curP$profMat <- curP$profMat[, start_idx:end_idx] } ## --------------------------------- ## 2) Now match the breaks/mz range. @@ -325,9 +528,6 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { cntrVals <- length(cntrPr$profMat) curVals <- length(curP$profMat) if ((mzvals * valscantime1) != cntrVals | (mzvals * valscantime2) != curVals) - ## Here the question is if we REALLY need to have the same numbers - ## of values in both. This caused the problems in issue #196 - ## | cntrVals != curVals) stop("Dimensions of profile matrices of files ", basename(fileNames(cntr)), " and ", basename(fileNames(z)), " do not match!") @@ -338,28 +538,28 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { gapInit(parms), gapExtend(parms), factorDiag(parms), factorGap(parms), as.numeric(localAlignment(parms)), initPenalty(parms)) - if (length(rtime(z)) > valscantime2) { - ## Adding the raw retention times if we were unable to align all of - ## them. - rtadj <- c(rtadj, rtime(z)[(valscantime2 + 1):length(rtime(z))]) - warning(basename(fileNames(z)), " :could only align up to a ", - "retention time of ", rtime(z)[valscantime2], " seconds. ", - "After that raw retention times are reported.") + if (length(rtime(z)) != valscantime2) { + nrt <- length(rtime(z)) + adj_starts_at <- which(rtime(z) == scantime2[1]) + adj_ends_at <- which(rtime(z) == scantime2[length(scantime2)]) + if (adj_ends_at < nrt) + rtadj <- c(rtadj, rtadj[length(rtadj)] + + cumsum(diff(rtime(z)[adj_ends_at:nrt]))) + if (adj_starts_at > 1) + rtadj <- c(rtadj[1] + + rev(cumsum(diff(rtime(z)[adj_starts_at:1]))), rtadj) } message("OK") - return(rtadj) + return(unname(rtadj)) ## Related to issue #122: try to resemble the rounding done in the ## recor.obiwarp method. ## return(round(rtadj, 2)) }, cntr = centerObject, cntrPr = profCtr, parms = param) - ## Add also the rtime of the center sample: - adjRt <- vector("list", nSamples) - adjRt[centerSample(param)] <- list(unname(rtime(centerObject))) - ## Add the result. - idxs <- 1:nSamples - idxs <- idxs[idxs != centerSample(param)] - adjRt[idxs] <- res - return(adjRt) + ## Create result + adjRt <- vector("list", total_samples) + adjRt[subs[centerSample(param)]] <- list(unname(rtime(centerObject))) + adjRt[subs[-centerSample(param)]] <- res + adjustRtimeSubset(rtraw, adjRt, subset = subs, method = subsetAdjust(param)) } .concatenate_OnDiskMSnExp <- function(...) { @@ -416,7 +616,7 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { ionSource = expdata$ionSource, analyser = expdata$analyser, detectorType = expdata$detectorType) - + ## protocolData protodata <- lapply(x, function(z) z@protocolData) if (any(unlist(lapply(protodata, nrow)) > 0)) @@ -436,3 +636,225 @@ findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { if (validObject(res)) res } + +#' @title Change the file path of an `OnDiskMSnExp` object +#' +#' @aliases dirname dirname,OnDiskMSnExp-method +#' +#' @name dirname +#' +#' @description +#' +#' `dirname` allows to get and set the path to the directory containing the +#' source files of the [OnDiskMSnExp-class] (or [XCMSnExp-class]) object. +#' +#' @param path [OnDiskMSnExp-class]. +#' +#' @param value `character` of length 1 or length equal to the number of files +#' defining the new path to the files. +#' +#' @md +#' +#' @author Johannes Rainer +setMethod("dirname", "OnDiskMSnExp", function(path) { + dirname(fileNames(path)) +}) +#' @rdname dirname +setReplaceMethod("dirname", "OnDiskMSnExp", function(path, value) { + flnms <- fileNames(path) + if (length(value) == 1) + value <- rep(value, length(flnms)) + new_flnms <- normalizePath(paste0(value, .Platform$file.sep, + basename(flnms))) + do_exist <- file.exists(new_flnms) + if (any(!do_exist)) + stop("The following files do not exist: ", + paste(new_flnms[!do_exist], ", ")) + path@processingData@files <- new_flnms + validObject(path) + path +}) + +#' @description +#' +#' Estimate the precursor intensity for an MS2 spectrum based on interpolation +#' using the intensity of the respective m/z peak from the previous and +#' following MS1 spectrum. +#' +#' @param x `OnDiskMSnExp` for a single file +#' +#' @param ppm `numeric(1)` with acceptable difference to MS1 m/z. +#' +#' @param method `character(1)` specifying how the precursor intensity should +#' be estimated, either based on the previous MS1 scan +#' (`method = "previous"`, the default) or using an interpolation between +#' the previous and the subsequent MS1 scan (`method = "interpolation"`) +#' considering also their retention time. +#' +#' @return `numeric` same length than `x` with the estimated precursor intensity +#' or `NA` for MS1 spectra. +#' +#' @author Johannes Rainer +#' +#' @noRd +#' +#' @examples +#' +#' fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", package = "msdata") +#' pest_dda <- readMSData(fl, mode = "onDisk") +#' res <- .estimate_prec_intensity(pest_dda) +#' fData(pest_dda)$precursorIntensity <- res +#' +#' ms2 <- filterMsLevel(pest_dda, msLevel = 2) +#' tic <- vapply(intensity(ms2), function(z) sum(z, na.rm = TRUE), numeric(1)) +#' plot(tic, precursorIntensity(ms2)) # not that nice... +#' +#' fl <- proteomics(full.names = TRUE)[4] +#' tmt <- readMSData(fl, mode = "onDisk") +#' +#' res <- .estimate_prec_intensity(tmt, method = "interpolation") +#' res_2 <- .estimate_prec_intensity(tmt, method = "previous") +#' +#' par(mfrow = c(1, 2)) +#' plot(res, precursorIntensity(tmt)) +#' plot(res_2, precursorIntensity(tmt)) +.estimate_prec_intensity <- function(x, ppm = 10, + method = c("previous", "interpolation")) { + method <- match.arg(method) + pmz <- precursorMz(x) + pmi <- rep(NA_real_, length(pmz)) + idx <- which(!is.na(pmz)) + x_ms1 <- filterMsLevel(x, msLevel = 1L) + ms1_rt <- rtime(x_ms1) + sps <- spectra(x_ms1) + if (method == "previous") { + for (i in idx) { + ms2_rt <- .fdata(x)$retentionTime[i] + ## Find the closest rtime before and the closest rtime after. + before_idx <- which(ms1_rt < ms2_rt) + before_int <- numeric() + if (length(before_idx)) { + sp <- sps[[before_idx[length(before_idx)]]] + before_idx <- closest(pmz[i], sp@mz, ppm = ppm, tolerance = 0, + duplicates = "closest") + if (!is.na(before_idx)) { + before_rt <- sp@rt + before_int <- sp@intensity[before_idx] + before_int <- before_int[!is.na(before_int)] + } + } + if (length(before_int)) + pmi[i] <- before_int + } + } else { + for (i in idx) { + ms2_rt <- .fdata(x)$retentionTime[i] + ## Find the closest rtime before and the closest rtime after. + before_idx <- which(ms1_rt < ms2_rt) + before_int <- numeric() + if (length(before_idx)) { + sp <- sps[[before_idx[length(before_idx)]]] + before_idx <- closest(pmz[i], sp@mz, ppm = ppm, tolerance = 0, + duplicates = "closest") + if (!is.na(before_idx)) { + before_rt <- sp@rt + before_int <- sp@intensity[before_idx] + before_int <- before_int[!is.na(before_int)] + } + } + after_idx <- which(ms1_rt > ms2_rt) + after_int <- numeric() + if (length(after_idx)) { + sp <- sps[[after_idx[1L]]] + after_idx <- closest(pmz[i], sp@mz, ppm = ppm, tolerance = 0, + duplicates = "closest") + if (!is.na(after_idx)) { + after_rt <- sp@rt + after_int <- sp@intensity[after_idx] + after_int <- after_int[!is.na(after_int)] + } + } + ## Check if we have before and after value + if (length(before_int) && length(after_int)) { + pmi[i] <- approx(c(before_rt, after_rt), + c(before_int, after_int), + xout = ms2_rt)$y + } else { + if (length(before_int)) + pmi[i] <- before_int + if (length(after_int)) + pmi[i] <- after_int + } + } + } + pmi +} + +#' @title Estimate precursor intensity for MS level 2 spectra +#' +#' @description +#' +#' `estimatePrecursorIntensity` determines the precursor intensity for a MS 2 +#' spectrum based on the intensity of the respective signal from the +#' neighboring MS 1 spectra (i.e. based on the peak with the m/z matching the +#' precursor m/z of the MS 2 spectrum). Based on parameter `method` either the +#' intensity of the peak from the previous MS 1 scan is used +#' (`method = "previous"`) or an interpolation between the intensity from the +#' previous and subsequent MS1 scan is used (`method = "interpolation"`, which +#' considers also the retention times of the two MS1 scans and the retention +#' time of the MS2 spectrum). +#' +#' @param x `OnDiskMSnExp` or `XCMSnExp` object. +#' +#' @param ppm `numeric(1)` defining the maximal acceptable difference (in ppm) +#' of the precursor m/z and the m/z of the corresponding peak in the MS 1 +#' scan. +#' +#' @param method `character(1)` defining the method how the precursor intensity +#' should be determined (see description above for details). Defaults to +#' `method = "previous"`. +#' +#' @param BPPARAM parallel processing setup. See [bpparam()] for details. +#' +#' @return `numeric` with length equal to the number of spectra in `x`. `NA` is +#' returned for MS 1 spectra or if no matching peak in a MS 1 scan can be +#' found for an MS 2 spectrum +#' +#' @author Johannes Rainer +#' +#' @md +estimatePrecursorIntensity <- function(x, ppm = 10, + method = c("previous", "interpolation"), + BPPARAM = bpparam()) { + method <- match.arg(method) + unlist(bplapply(.split_by_file2(x, subsetFeatureData = FALSE), + .estimate_prec_intensity, ppm = ppm, method = method, + BPPARAM = BPPARAM), use.names = FALSE) +} + +#' Helper function to convert an OnDiskMSnExp to a Spectra object. This will +#' only convert the spectra data, but no sample information. +#' +#' @noRd +.OnDiskMSnExp2MsBackendMzR <- function(x) { + .fData2MsBackendMzR(.fdata(x), fileNames(x)) +} + +.fData2MsBackendMzR <- function(x, filenames, res = new("MsBackendMzR")) { + x$dataStorage <- x$dataOrigin <- filenames[x$fileIdx] + colnames(x)[colnames(x) == "retentionTime"] <- "rtime" + colnames(x)[colnames(x) == "seqNum"] <- "scanIndex" + colnames(x)[colnames(x) == "precursorScanNum"] <- "precScanNum" + colnames(x)[colnames(x) == "precursorMZ"] <- "precursorMz" + colnames(x)[colnames(x) == "isolationWindowTargetMZ"] <- + "isolationWindowTargetMz" + colnames(x)[colnames(x) == "fileIdx"] <- "fromFile" + x$isolationWindowLowerMz <- x$isolationWindowTargetMz - + x$isolationWindowLowerOffset + x$isolationWindowUpperMz <- x$isolationWindowTargetMz + + x$isolationWindowUpperOffset + x$isolationWindowUpperOffset <- NULL + x$isolationWindowLowerOffset <- NULL + slot(res, "spectraData", check = FALSE) <- as(x, "DataFrame") + res +} diff --git a/R/functions-Params.R b/R/functions-Params.R index 823e8274b..eed37c1fd 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -7,9 +7,9 @@ #' appended to the returned list. #' #' @param x A Param class. -#' +#' #' @author Johannes Rainer -#' +#' #' @noRd .param2list <- function(x) { ## Get all slot names, skip those matching the provided pattern. @@ -66,11 +66,11 @@ ## GenericParam #' @return The \code{GenericParam} function returns a \code{GenericParam} #' object. -#' +#' #' @param fun \code{character} representing the name of the function. -#' +#' #' @param args \code{list} (ideally named) with the arguments to the function. -#' +#' #' @rdname GenericParam GenericParam <- function(fun = character(), args = list()) { return(new("GenericParam", fun = fun, args = args)) @@ -85,13 +85,15 @@ CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = numeric()) { + firstBaselineCheck = TRUE, roiScales = numeric(), + extendLengthMSW = FALSE) { return(new("CentWaveParam", ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, prefilter = prefilter, mzCenterFun = mzCenterFun, integrate = as.integer(integrate), mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, - firstBaselineCheck = firstBaselineCheck, roiScales = roiScales)) + firstBaselineCheck = firstBaselineCheck, roiScales = roiScales, + extendLengthMSW = extendLengthMSW)) } #' @return The \code{MatchedFilterParam} function returns a @@ -112,7 +114,7 @@ MatchedFilterParam <- function(binSize = 0.1, impute = "none", } #' Convert the impute method to the old-style method name (e.g. for profMat #' calls) -#' +#' #' @noRd .impute2method <- function(x) { if (impute(x) == "none") @@ -168,15 +170,15 @@ MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, #' #' @param peakThr numeric(1) with the minimum absolute intensity #' (above baseline) of peaks to be picked. If provided, the smoothing -#' function \code{\link{sav.gol}} function (in the \code{MassSpecWavelet}) -#' package is called to estimate the local intensity. +#' Savitzky-Golay filter is used (in the \code{MassSpecWavelet}) +#' package to estimate the local intensity. #' #' @param tuneIn logical(1) whther to tune in the parameter #' estimation of the detected peaks. #' #' @param ... Additional parameters to be passed to the -#' \code{\link{identifyMajorPeaks}} and -#' \code{\link{sav.gol}} functions from the +#' \code{\link{peakDetectionCWT}} and +#' \code{\link{identifyMajorPeaks}} functions from the #' \code{MassSpecWavelet} package. #' #' @return The \code{MSWParam} function returns a \code{MSWParam} @@ -224,13 +226,15 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, mzIntervalExtension = mzIntervalExtension, polarity = polarity)) } -#' @return The \code{PeakDensityParam} function returns a -#' \code{PeakDensityParam} class instance with all of the settings +#' @return The `PeakDensityParam` function returns a +#' `PeakDensityParam` class instance with all of the settings #' specified for chromatographic peak alignment based on peak densities. #' Note that argument `sampleGroups` is mandatory and should represent #' either the sample grouping in the experiment. It's length has to match #' the number of sample in the experiments. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-density PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, minFraction = 0.5, minSamples = 1, @@ -243,10 +247,14 @@ PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, binSize = binSize, maxFeatures = maxFeatures) } -#' @return The \code{MzClustParam} function returns a -#' \code{MzClustParam} class instance with all of the settings -#' specified for high resolution single spectra peak alignment. -#' +#' @return +#' +#' The `MzClustParam` function returns a `MzClustParam` class instance with +#' all of the settings specified for high resolution single spectra peak +#' alignment. +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust MzClustParam <- function(sampleGroups = numeric(), ppm = 20, absMz = 0, minFraction = 0.5, minSamples = 1) { @@ -258,7 +266,7 @@ MzClustParam <- function(sampleGroups = numeric(), ppm = 20, absMz = 0, #' @return The \code{NearestPeaksParam} function returns a #' \code{NearestPeaksParam} class instance with all of the settings #' specified for peak alignment based on peak proximity. -#' +#' #' @rdname groupChromPeaks-nearest NearestPeaksParam <- function(sampleGroups = numeric(), mzVsRtBalance = 10, absMz = 0.2, absRt = 15, kNN = 10) { @@ -271,37 +279,45 @@ NearestPeaksParam <- function(sampleGroups = numeric(), mzVsRtBalance = 10, #' \code{PeakGroupsParam} class instance with all of the settings #' specified for retention time adjustment based on \emph{house keeping} #' features/peak groups. -#' +#' #' @rdname adjustRtime-peakGroups PeakGroupsParam <- function(minFraction = 0.9, extraPeaks = 1, smooth = "loess", span = 0.2, family = "gaussian", - peakGroupsMatrix = matrix(nrow = 0, ncol = 0)) { - return(new("PeakGroupsParam", minFraction = minFraction, - extraPeaks = extraPeaks, smooth = smooth, span = span, - family = family, peakGroupsMatrix = peakGroupsMatrix)) + peakGroupsMatrix = matrix(nrow = 0, ncol = 0), + subset = integer(), + subsetAdjust = c("average", "previous")) { + subsetAdjust <- match.arg(subsetAdjust) + new("PeakGroupsParam", minFraction = minFraction, + extraPeaks = extraPeaks, smooth = smooth, span = span, + family = family, peakGroupsMatrix = peakGroupsMatrix, + subset = as.integer(subset), subsetAdjust = subsetAdjust) } #' @return The \code{ObiwarpParam} function returns a #' \code{ObiwarpParam} class instance with all of the settings #' specified for obiwarp retention time adjustment and alignment. -#' +#' #' @rdname adjustRtime-obiwarp ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, distFun = "cor_opt", gapInit = numeric(), gapExtend = numeric(), factorDiag = 2, factorGap = 1, - localAlignment = FALSE, initPenalty = 0) { - return(new("ObiwarpParam", binSize = binSize, - centerSample = as.integer(centerSample), - response = as.integer(response), distFun = distFun, - gapInit = gapInit, gapExtend = gapExtend, factorDiag = factorDiag, - factorGap = factorGap, localAlignment = localAlignment, - initPenalty = initPenalty)) + localAlignment = FALSE, initPenalty = 0, + subset = integer(), + subsetAdjust = c("average", "previous")) { + subsetAdjust <- match.arg(subsetAdjust) + new("ObiwarpParam", binSize = binSize, + centerSample = as.integer(centerSample), + response = as.integer(response), distFun = distFun, + gapInit = gapInit, gapExtend = gapExtend, factorDiag = factorDiag, + factorGap = factorGap, localAlignment = localAlignment, + initPenalty = initPenalty, subset = as.integer(subset), + subsetAdjust = subsetAdjust) } #' @return The \code{FillChromPeaksParam} function returns a #' \code{FillChromPeaksParam} object. -#' +#' #' @rdname fillChromPeaks FillChromPeaksParam <- function(expandMz = 0, expandRt = 0, ppm = 0, fixedMz = 0, fixedRt = 0) { @@ -319,7 +335,7 @@ fixedMz <- function(object) object@fixedMz #' the `CalibrantMassParam` class with all settings and properties set. #' #' @md -#' +#' #' @rdname calibrate-calibrant-mass CalibrantMassParam <- function(mz = list(), mzabs = 0.0001, mzppm = 5, neighbors = 3, method = "linear") { @@ -344,3 +360,37 @@ CalibrantMassParam <- function(mz = list(), mzabs = 0.0001, mzppm = 5, .mz <- function(x) x@mz + +#' @rdname refineChromPeaks-clean +#' +#' @md +CleanPeaksParam <- function(maxPeakwidth = 10) { + new("CleanPeaksParam", maxPeakwidth = as.numeric(maxPeakwidth)) +} + +#' @rdname refineChromPeaks-merge +#' +#' @md +MergeNeighboringPeaksParam <- function(expandRt = 2, expandMz = 0, ppm = 10, + minProp = 0.75) { + new("MergeNeighboringPeaksParam", expandRt = as.numeric(expandRt), + expandMz = as.numeric(expandMz), ppm = as.numeric(ppm), + minProp = as.numeric(minProp)) +} + +#' @rdname fillChromPeaks +ChromPeakAreaParam <- function(mzmin = function(z) quantile(z, probs = 0.25), + mzmax = function(z) quantile(z, probs = 0.75), + rtmin = function(z) quantile(z, probs = 0.25), + rtmax = function(z) quantile(z, probs = 0.75)) { + new("ChromPeakAreaParam", mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax) +} + +#' @rdname refineChromPeaks-filter-intensity +#' +#' @md +FilterIntensityParam <- function(threshold = 0, nValues = 1L, value = "maxo") { + new("FilterIntensityParam", threshold = as.numeric(threshold), + nValues = as.integer(nValues), value = value) +} diff --git a/R/functions-ProcessHistory.R b/R/functions-ProcessHistory.R index 322994426..736c7ad01 100644 --- a/R/functions-ProcessHistory.R +++ b/R/functions-ProcessHistory.R @@ -50,6 +50,26 @@ updateFileIndex <- function(x, old = integer(), new = integer()) { return(x) } +#' For subsetting samples/columns, ensure that both fileIndex and e.g. +#' sampleClass are OK. +#' +#' @noRd +.process_history_subset_samples <- function(x, j = integer()) { + x <- lapply(x, function(z) { + z@fileIndex <- seq_along(j) + if (is(z, "XProcessHistory")) { + prm <- z@param + if (is(prm, "PeakDensityParam") | is(prm, "MzClustParam") | + is (prm, "NearestPeaksParam")) { + prm@sampleGroups <- prm@sampleGroups[j] + } + z@param <- prm + } + z + }) + x +} + ############################################################ ## XProcessHistory XProcessHistory <- function(param = NULL, msLevel = NA_integer_, ...) { @@ -57,7 +77,6 @@ XProcessHistory <- function(param = NULL, msLevel = NA_integer_, ...) { obj <- as(obj, "XProcessHistory") obj@param <- param obj@msLevel <- as.integer(msLevel) - classVersion(obj)["XProcessHistory"] <- "0.0.2" OK <- validObject(obj) if (is.character(OK)) stop(OK) @@ -76,13 +95,13 @@ XProcessHistory <- function(param = NULL, msLevel = NA_integer_, ...) { #' function was called. Usually `1:length(fileNames(xs))`. #' #' @md -#' +#' #' @noRd GenericProcessHistory <- function(fun, args = list(), msLevel = NA_integer_, date. = date(), fileIndex. = NA_integer_) { gp <- new("GenericParam", fun = fun, args = args) - xcms:::XProcessHistory(param = gp, msLevel = msLevel, - date. = date., fileIndex. = fileIndex.) + XProcessHistory(param = gp, msLevel = msLevel, + date. = date., fileIndex. = fileIndex.) } #' Remove a generic process history step based on the name of the function. @@ -129,4 +148,3 @@ dropProcessHistoriesList <- function(x, type, num = -1) { } return(x) } - diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index 66ec72322..bee9cb325 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -7,7 +7,7 @@ #' #' @param num which should be dropped? If \code{-1} all matching will be dropped, #' otherwise just the most recent num. -#' +#' #' @return The XCMSnExp input object with selected ProcessHistory steps dropped. #' #' @noRd @@ -26,7 +26,7 @@ dropGenericProcessHistory <- function(x, fun) { } #' Convert an XCMSnExp to an xcmsSet. -#' +#' #' @noRd .XCMSnExp2xcmsSet <- function(from) { if (any(msLevel(from) > 1)) @@ -39,7 +39,7 @@ dropGenericProcessHistory <- function(x, fun) { ## @groupidx <- featureDefinitions(x)$peakidx if (hasFeatures(from)){ fgs <- featureDefinitions(from) - xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]) + xs@groups <- S4Vectors::as.matrix(fgs[,names(fgs)!="peakidx"]) rownames(xs@groups) <- NULL xs@groupidx <- fgs$peakidx } @@ -54,7 +54,12 @@ dropGenericProcessHistory <- function(x, fun) { xs@rt <- rts ## @phenoData - xs@phenoData <- pData(from) + pd <- pData(from) + if (nrow(pd) != length(fileNames(from))) { + pd <- data.frame(file_name = basename(fileNames(from))) + rownames(pd) <- pd$file_name + } + xs@phenoData <- pd ## @filepaths xs@filepaths <- fileNames(from) @@ -96,8 +101,8 @@ dropGenericProcessHistory <- function(x, fun) { ## and if this is used at all. ## @filled ... not yet. - if (any(chromPeaks(from)[, "is_filled"] == 1)) { - fld <- which(chromPeaks(from)[, "is_filled"] == 1) + if (any(chromPeakData(from)$is_filled)) { + fld <- which(chromPeakData(from)$is_filled) xs@filled <- as.integer(fld) } ## @dataCorrection (numeric) ? in xcmsSet function, if lockMassFreq. @@ -111,6 +116,16 @@ dropGenericProcessHistory <- function(x, fun) { return(xs) } +.XCMSnExp2SummarizedExperiment <- function(x, ...) { + if (!hasFeatures(x)) + stop("No correspondence analysis results present. Please run ", + "groupChromPeaks first.") + SummarizedExperiment(assays = list(raw = featureValues(x, ...)), + rowData = featureDefinitions(x), + colData = pData(x), + metadata = processHistory(x)) +} + #' @description #' #' Extract a \code{data.frame} of retention time, mz and intensity @@ -133,14 +148,14 @@ dropGenericProcessHistory <- function(x, fun) { #' #' @param msLevel \code{integer} defining the MS level(s) to which the data #' should be restricted prior to data extraction. -#' +#' #' @return #' #' A \code{list} with length equal to the number of files and #' each element being a \code{data.frame} with the extracted values. #' #' @noRd -#' +#' #' @author Johannes Rainer .extractMsData <- function(x, rt, mz, msLevel = 1L) { if (!missing(rt)) { @@ -183,120 +198,15 @@ dropGenericProcessHistory <- function(x, fun) { return(res) } - -## #' @description Integrates the intensities for chromatograpic peak(s). This is -## #' supposed to be called by the fillChromPeaks method. -## #' -## #' @note This reads the full data first and does the subsetting later in R. -## #' -## #' @param object An \code{XCMSnExp} object representing a single sample. -## #' -## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. -## #' -## #' @noRd -## .getPeakInt2 <- function(object, peakArea) { -## if (length(fileNames(object)) != 1) -## stop("'object' should be an XCMSnExp for a single file!") -## res <- numeric(nrow(peakArea)) -## spctr <- spectra(object, BPPARAM = SerialParam()) -## mzs <- lapply(spctr, mz) -## valsPerSpect <- lengths(mzs) -## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) -## rm(spctr) -## mzs <- unlist(mzs, use.names = FALSE) -## rtim <- rtime(object) -## for (i in 1:length(res)) { -## rtr <- peakArea[i, c("rtmin", "rtmax")] -## mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, -## valsPerSpect = valsPerSpect, rtrange = rtr, -## mzrange = peakArea[i, c("mzmin", "mzmax")]) -## if (length(mtx)) { -## if (!all(is.na(mtx[, 3]))) { -## ## How to calculate the area: (1)sum of all intensities / (2)by -## ## the number of data points (REAL ones, considering also NAs) -## ## and multiplied with the (3)rt width. -## ## (1) sum(mtx[, 3], na.rm = TRUE) -## ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used -## ## nrow(mtx) here, which would correspond to the non-NA -## ## intensities within the rt range we don't get the same results -## ## as e.g. centWave. -## ## (3) rtr[2] - rtr[1] -## res[i] <- sum(mtx[, 3], na.rm = TRUE) * -## ((rtr[2] - rtr[1]) / -## (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) -## } else { -## res[i] <- NA_real_ -## } -## } else { -## res[i] <- NA_real_ -## } -## } -## return(unname(res)) -## } - -## #' @description Integrates the intensities for chromatograpic peak(s). This is -## #' supposed to be called by the fillChromPeaks method. -## #' -## #' @note This reads the full data first and does the subsetting later in R. This -## #' function uses the C getEIC function. -## #' -## #' @param object An \code{XCMSnExp} object representing a single sample. -## #' -## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. -## #' -## #' @noRd -## .getPeakInt3 <- function(object, peakArea) { -## if (length(fileNames(object)) != 1) -## stop("'object' should be an XCMSnExp for a single file!") -## if (nrow(peakArea) == 0) { -## return(numeric()) -## } -## res <- matrix(ncol = 4, nrow = nrow(peakArea)) -## res <- numeric(nrow(peakArea)) -## spctr <- spectra(object, BPPARAM = SerialParam()) -## mzs <- lapply(spctr, mz) -## valsPerSpect <- lengths(mzs) -## scanindex <- valueCount2ScanIndex(valsPerSpect) ## Index vector for C calls -## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) -## rm(spctr) -## mzs <- unlist(mzs, use.names = FALSE) -## rtim <- rtime(object) -## for (i in 1:length(res)) { -## rtr <- peakArea[i, c("rtmin", "rtmax")] -## sr <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) -## eic <- .Call("getEIC", mzs, ints, scanindex, -## as.double(peakArea[i, c("mzmin", "mzmax")]), -## as.integer(sr), as.integer(length(scanindex)), -## PACKAGE = "xcms") -## if (length(eic$intensity)) { -## ## How to calculate the area: (1)sum of all intensities / (2)by -## ## the number of data points (REAL ones, considering also NAs) -## ## and multiplied with the (3)rt width. -## if (!all(is.na(eic$intensity)) && !all(eic$intensity == 0)) { -## res[i] <- sum(eic$intensity, na.rm = TRUE) * -## ((rtr[2] - rtr[1]) / (length(eic$intensity) - 1)) -## } else { -## res[i] <- NA_real_ -## } -## } else { -## res[i] <- NA_real_ -## } -## } -## return(unname(res)) -## } - - #' @description #' #' Integrates the intensities for chromatograpic peak(s). This is #' supposed to be called by the fillChromPeaks method. #' #' @note This reads the full data first and does the subsetting later in R. -#' +#' #' @param object An \code{XCMSnExp} object representing a single sample. -#' +#' #' @param peakArea A \code{matrix} with the peak definition, i.e. #' \code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. #' @@ -305,9 +215,9 @@ dropGenericProcessHistory <- function(x, fun) { #' #' @param mzCenterFun Name of the function to be used to calculate the mz value. #' Defaults to \code{weighted.mean}, i.e. the intensity weighted mean mz. -#' +#' #' @param cn \code{character} with the names of the result matrix. -#' +#' #' @return #' #' A \code{matrix} with at least columns \code{"mz"}, \code{"rt"}, @@ -318,7 +228,8 @@ dropGenericProcessHistory <- function(x, fun) { #' @noRd .getChromPeakData <- function(object, peakArea, sample_idx, mzCenterFun = "weighted.mean", - cn = c("mz", "rt", "into", "maxo", "sample")) { + cn = c("mz", "rt", "into", "maxo", "sample"), + msLevel = 1L) { if (length(fileNames(object)) != 1) stop("'object' should be an XCMSnExp for a single file!") ncols <- length(cn) @@ -327,12 +238,19 @@ dropGenericProcessHistory <- function(x, fun) { res[, "sample"] <- sample_idx res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] ## Load the data - message("Requesting ", nrow(res), " missing peaks from ", - basename(fileNames(object)), " ... ", appendLF = FALSE) + message("Requesting ", nrow(res), " peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + object <- filterRt( + object, rt = range(peakArea[, c("rtmin", "rtmax")]) + c(-2, 2)) + object <- filterMsLevel(object, msLevel) + if (!length(object)) { + message("FAIL: no MS level ", msLevel, " data available.") + return(res) + } spctr <- spectra(object, BPPARAM = SerialParam()) - mzs <- lapply(spctr, mz) + mzs <- lapply(spctr, function(z) z@mz) valsPerSpect <- lengths(mzs) - ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + ints <- unlist(lapply(spctr, function(z) z@intensity), use.names = FALSE) rm(spctr) mzs <- unlist(mzs, use.names = FALSE) mzs_range <- range(mzs) @@ -385,11 +303,11 @@ dropGenericProcessHistory <- function(x, fun) { } } message("got ", sum(!is.na(res[, "into"])), ".") - return(res) + res } #' @description Same as getChromPeakData, just without retention time. -#' +#' #' @note #' #' The mz and maxo are however estimated differently than for the @@ -410,7 +328,7 @@ dropGenericProcessHistory <- function(x, fun) { res[, "rtmax"] <- -1 res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] ## Load the data - message("Requesting ", nrow(res), " missing peaks from ", + message("Requesting ", nrow(res), " peaks from ", basename(fileNames(object)), " ... ", appendLF = FALSE) spctr <- spectra(object, BPPARAM = SerialParam()) mzs <- lapply(spctr, mz) @@ -429,7 +347,7 @@ dropGenericProcessHistory <- function(x, fun) { ## mtx <- xcms:::.rawMat(mz = mzs, int = ints, scantime = rtime(object), ## valsPerSpect = valsPerSpect, ## mzrange = peakArea[i, c("mzmin", "mzmax")]) - if (length(mtx)) { + if (length(mz_area)) { if (!all(is.na(mtx[, 3]))) { ## How to calculate the area: (1)sum of all intensities res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) @@ -469,7 +387,7 @@ dropGenericProcessHistory <- function(x, fun) { res[, "rtmax"] <- -1 res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] ## Load the data - message("Reguesting ", nrow(res), " missing peaks from ", + message("Reguesting ", nrow(res), " peaks from ", basename(fileNames(object)), " ... ", appendLF = FALSE) spctr <- spectra(object, BPPARAM = SerialParam()) mzs <- lapply(spctr, mz) @@ -504,7 +422,7 @@ dropGenericProcessHistory <- function(x, fun) { mzCenterFun = "weighted.mean", param = MatchedFilterParam(), cn = c("mz", "rt", "into", "maxo", - "sample")) { + "sample"), msLevel = 1L) { if (length(fileNames(object)) != 1) stop("'object' should be an XCMSnExp for a single file!") ncols <- length(cn) @@ -514,9 +432,25 @@ dropGenericProcessHistory <- function(x, fun) { res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] ## Load the data - message("Requesting ", nrow(res), " missing peaks from ", + message("Requesting ", nrow(res), " peaks from ", basename(fileNames(object)), " ... ", appendLF = FALSE) + object <- filterRt( + object, rt = range(peakArea[, c("rtmin", "rtmax")]) + c(-2, 2)) + object <- filterMsLevel(object, msLevel) + if (!length(object)) { + message("FAIL: no data available for the requested m/z - rt range.") + return(res) + } spctr <- spectra(object, BPPARAM = SerialParam()) + ## Issue #653: empty spectra is not supported; this is the same fix applied + ## to matchedFilter peak detection. + spctr <- lapply(spctr, function(z) { + if (!length(z@mz)) { + z@mz <- 0.0 + z@intensity <- 0.0 + } + z + }) mzs <- lapply(spctr, mz) vps <- lengths(mzs) ints <- unlist(lapply(spctr, intensity), use.names = FALSE) @@ -550,7 +484,7 @@ dropGenericProcessHistory <- function(x, fun) { mass <- brks[-length(brks)] + bin_half ## midpoint for the breaks mass_range <- range(mass) - for (i in 1:nrow(res)) { + for (i in seq_len(nrow(res))) { rtr <- peakArea[i, c("rtmin", "rtmax")] mzr <- peakArea[i, c("mzmin", "mzmax")] ## Ensure that the rt region is within the rtrange of the data. @@ -608,12 +542,8 @@ dropGenericProcessHistory <- function(x, fun) { return(res) } - .hasFilledPeaks <- function(object) { - if (hasChromPeaks(object)) - if (any(colnames(chromPeaks(object)) == "is_filled")) - return(any(chromPeaks(object)[, "is_filled"] == 1)) - FALSE + hasChromPeaks(object) & any(chromPeakData(object)$is_filled, na.rm = TRUE) } #' @description @@ -621,13 +551,18 @@ dropGenericProcessHistory <- function(x, fun) { #' Simple helper function to extract the peakidx column from the #' featureDefinitions DataFrame. The function ensures that the names of the #' returned list correspond to the rownames of the DataFrame -#' +#' #' @noRd .peakIndex <- function(object) { - if (!hasFeatures(object)) - stop("No feature definitions present. Please run groupChromPeaks first.") - idxs <- featureDefinitions(object)$peakidx - names(idxs) <- rownames(featureDefinitions(object)) + if (inherits(object, "DataFrame")) { + idxs <- object$peakidx + names(idxs) <- rownames(object) + } else { + if (!hasFeatures(object)) + stop("No feature definitions present. Please run groupChromPeaks first.") + idxs <- featureDefinitions(object)$peakidx + names(idxs) <- rownames(featureDefinitions(object)) + } idxs } @@ -649,7 +584,8 @@ dropGenericProcessHistory <- function(x, fun) { #' by the median retention time across columns. #' #' @rdname adjustRtime-peakGroups -adjustRtimePeakGroups <- function(object, param = PeakGroupsParam()) { +adjustRtimePeakGroups <- function(object, param = PeakGroupsParam(), + msLevel = 1L) { if (!is(object, "XCMSnExp")) stop("'object' has to be an 'XCMSnExp' object.") if (!hasFeatures(object)) @@ -657,20 +593,26 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam()) { if (hasAdjustedRtime(object)) warning("Alignment/retention time correction was already performed, ", "returning a matrix with adjusted retention times.") - nSamples <- length(fileNames(object)) + subs <- subset(param) + if (!length(subs)) + subs <- seq_along(fileNames(object)) + nSamples <- length(subs) pkGrp <- .getPeakGroupsRtMatrix( - peaks = chromPeaks(object), - peakIndex = .peakIndex(object), - nSamples = nSamples, + peaks = chromPeaks(object, msLevel = msLevel), + peakIndex = .peakIndex( + .update_feature_definitions( + featureDefinitions(object), rownames(chromPeaks(object)), + rownames(chromPeaks(object, msLevel = msLevel)))), + sampleIndex = subs, missingSample = nSamples - (nSamples * minFraction(param)), extraPeaks = extraPeaks(param) ) - colnames(pkGrp) <- basename(fileNames(object)) + colnames(pkGrp) <- basename(fileNames(object))[subs] pkGrp } #' @title Visualization of alignment results -#' +#' #' @description #' #' Plot the difference between the adjusted and the raw retention @@ -684,6 +626,8 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam()) { #' @param col colors to be used for the lines corresponding to the individual #' samples. #' +#' @param lwd line width to be used for the lines of the individual samples. +#' #' @param lty line type to be used for the lines of the individual samples. #' #' @param type plot type to be used. See help on the \code{par} function for @@ -710,51 +654,39 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam()) { #' #' @param ylim optional \code{numeric(2)} with the upper and lower limits on #' the y-axis. -#' +#' #' @param ... Additional arguments to be passed down to the \code{plot} #' function. -#' +#' #' @seealso \code{\link{adjustRtime}} for all retention time correction/ #' alignment methods. -#' +#' #' @author Johannes Rainer #' #' @examples -#' ## Below we perform first a peak detection (using the matchedFilter -#' ## method) on some of the test files from the faahKO package followed by -#' ## a peak grouping and retention time adjustment using the "peak groups" -#' ## method -#' library(faahKO) -#' library(xcms) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") -#' -#' ## Perform the peak detection using the matchedFilter method. -#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' ## Performing the peak grouping using the "peak density" method. -#' p <- PeakDensityParam(sampleGroups = c(1, 1)) -#' res <- groupChromPeaks(res, param = p) +#' p <- PeakDensityParam(sampleGroups = c(1, 1, 1)) +#' res <- groupChromPeaks(faahko_sub, param = p) #' #' ## Perform the retention time adjustment using peak groups found in both #' ## files. #' fgp <- PeakGroupsParam(minFraction = 1) #' res <- adjustRtime(res, param = fgp) #' -#' ## Visualize the impact of the alignment. We show both versions of the plot, -#' ## with the raw retention times on the x-axis (top) and with the adjusted -#' ## retention times (bottom). -#' par(mfrow = c(2, 1)) +#' ## Visualize the impact of the alignment. #' plotAdjustedRtime(res, adjusted = FALSE) #' grid() -#' plotAdjustedRtime(res) -#' grid() -plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", - adjustedRtime = TRUE, +plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, lwd = 1, + type = "l", adjustedRtime = TRUE, xlab = ifelse(adjustedRtime, yes = expression(rt[adj]), no = expression(rt[raw])), @@ -775,6 +707,8 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", col <- rep(col, length(diffRt)) if (length(lty) == 1) lty <- rep(lty, length(diffRt)) + if (length(lwd) == 1) + lwd <- rep(lwd, length(diffRt)) if (length(col) != length(diffRt)) { warning("length of 'col' does not match the number of samples! Will ", "use 'col[1]' for all samples.") @@ -785,6 +719,11 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", "use 'lty[1]' for all samples.") lty <- rep(lty[1], length(diffRt)) } + if (length(lwd) != length(lwd)) { + warning("length of 'lwd' does not match the number of samples! Will ", + "use 'lwd[1]' for all samples.") + lwd <- rep(lwd[1], length(diffRt)) + } ## Initialize plot. if (missing(ylim)) ylim <- range(diffRt, na.rm = TRUE) @@ -793,7 +732,7 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", ## Plot all. for (i in 1:length(diffRt)) points(x = xRt[[i]], y = diffRt[[i]], col = col[i], lty = lty[i], - type = type) + type = type, lwd = lwd[i]) ## If alignment was performed using the peak groups method highlight also ## those in the plot. ph <- processHistory(object, type = .PROCSTEP.RTIME.CORRECTION) @@ -808,6 +747,11 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", rawRt <- rtime(object, adjusted = FALSE, bySample = TRUE) adjRt <- rtime(object, adjusted = TRUE, bySample = TRUE) pkGroup <- peakGroupsMatrix(prm) + subs <- subset(prm) + if (!length(subs)) + subs <- 1:ncol(pkGroup) + rawRt <- rawRt[subs] + adjRt <- adjRt[subs] ## Have to "adjust" these: pkGroupAdj <- pkGroup for (i in 1:ncol(pkGroup)) { @@ -832,130 +776,18 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", } } -#' @title Plot chromatographic peak density along the retention time axis -#' -#' @description -#' -#' Plot the density of chromatographic peaks along the retention -#' time axis and indicate which peaks would be (or were) grouped into the -#' same feature based using the *peak density* correspondence method. -#' Settings for the *peak density* method can be passed with an -#' [PeakDensityParam] object to parameter `param`. If the `object` contains -#' correspondence results and the correspondence was performed with the -#' *peak groups* method, the results from that correspondence can be -#' visualized setting `simulate = FALSE`. -#' -#' @details -#' -#' The `plotChromPeakDensity` function allows to evaluate -#' different settings for the *peak density* on an mz slice of -#' interest (e.g. containing chromatographic peaks corresponding to a known -#' metabolite). -#' The plot shows the individual peaks that were detected within the -#' specified `mz` slice at their retention time (x-axis) and sample in -#' which they were detected (y-axis). The density function is plotted as a -#' black line. Parameters for the `density` function are taken from the -#' `param` object. Grey rectangles indicate which chromatographic peaks -#' would be grouped into a feature by the `peak density` correspondence -#' method. Parameters for the algorithm are also taken from `param`. -#' See [groupChromPeaks-density()] for more information about the -#' algorithm and its supported settings. -#' -#' @param object A [XCMSnExp] object with identified -#' chromatographic peaks. -#' -#' @param mz `numeric(2)` defining an mz range for which the peak density -#' should be plotted. -#' -#' @param rt `numeric(2)` defining an optional rt range for which the -#' peak density should be plotted. Defaults to the absolute retention time -#' range of `object`. -#' -#' @param param [PeakDensityParam] from which parameters for the -#' *peak density* correspondence algorithm can be extracted. If not provided -#' and if `object` contains feature definitions with the correspondence/ -#' peak grouping being performed by the *peak density* method, the -#' corresponding parameter class stored in `object` is used. -#' -#' @param simulate `logical(1)` defining whether correspondence should be -#' simulated within the specified m/z / rt region or (with -#' `simulate = FALSE`) whether the results from an already performed -#' correspondence should be shown. -#' -#' @param col Color to be used for the individual samples. Length has to be 1 -#' or equal to the number of samples in `object`. -#' -#' @param xlab `character(1)` with the label for the x-axis. -#' -#' @param ylab `character(1)` with the label for the y-axis. -#' -#' @param xlim `numeric(2)` representing the limits for the x-axis. -#' Defaults to the range of the `rt` parameter. -#' -#' @param main `character(1)` defining the title of the plot. By default -#' (for `main = NULL`) the mz-range is used. -#' -#' @param type `character(1)` specifying how peaks are called to be located -#' within the region defined by `mz` and `rt`. Can be one of `"any"`, -#' `"within"`, and `"apex_within"` for all peaks that are even partially -#' overlapping the region, peaks that are completely within the region, and -#' peaks for which the apex is within the region. This parameter is passed -#' to the [chromPeaks] function. See related documentation for more -#' information and examples. -#' -#' @param ... Additional parameters to be passed to the `plot` function. Data -#' point specific parameters such as `bg` or `pch` have to be of length 1 -#' or equal to the number of samples. -#' -#' @return The function is called for its side effect, i.e. to create a plot. -#' -#' @author Johannes Rainer -#' -#' @seealso [groupChromPeaks-density()] for details on the -#' *peak density* correspondence method and supported settings. -#' -#' @md -#' -#' @examples -#' -#' ## Below we perform first a peak detection (using the centWave -#' ## method) on some of the test files from the faahKO package. -#' library(faahKO) -#' library(xcms) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' -#' ## Reading 2 of the KO samples -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") -#' -#' ## Perform the peak detection using the centWave method (settings are tuned -#' ## to speed up example execution) -#' res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 3000, snthresh = 40)) -#' -#' ## Align the samples using obiwarp -#' res <- adjustRtime(res, param = ObiwarpParam()) -#' -#' ## Plot the chromatographic peak density for a specific mz range to evaluate -#' ## different peak density correspondence settings. -#' mzr <- c(305.05, 305.15) -#' -#' plotChromPeakDensity(res, mz = mzr, pch = 16, -#' param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(res))))) -#' -#' ## Use a larger bandwidth -#' plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60, -#' sampleGroups = rep(1, length(fileNames(res)))), pch = 16) -#' ## Neighboring peaks are now fused into one. -#' -#' ## Require the chromatographic peak to be present in all samples of a group -#' plotChromPeakDensity(res, mz = mzr, pch = 16, -#' param = PeakDensityParam(minFraction = 1, -#' sampleGroups = rep(1, length(fileNames(res))))) -plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, +.plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, col = "#00000080", xlab = "retention time", ylab = "sample", xlim = range(rt), main = NULL, type = c("any", "within", "apex_within"), ...) { + .Deprecated( + msg = paste0("Use of 'plotChromPeakDensity' on 'XCMSnExp' is", + "discouraged. Please extract chromatographic ", + "data first and call 'plotChromPeakDensity' ", + "directly on the 'XChromatograms' object. See ", + "?XChromatograms, section 'Correspondence ", + "analysis' for more details.")) type <- match.arg(type) if (missing(object)) stop("Required parameter 'object' is missing") @@ -990,7 +822,7 @@ plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, nsamples <- length(fileNames(object)) if (length(col) != nsamples) col <- rep_len(col[1], nsamples) - pks <- chromPeaks(object, mz = mz, rt = rt, type = type) + pks <- chromPeaks(object, mz = mz, rt = rt, type = type, msLevel = 1L) if (nrow(pks)) { ## Extract parameters from the param object bw = bw(param) @@ -1031,7 +863,7 @@ plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, dots$pch <- pch[pks[, "sample"]] } do.call("plot", args = c(list(x = pks[, "rt"], - y = ypos[pks[, "sample"]], xlim = xlim, + y = ypos[pks[, "sample"]], xlim = xlim, col = col[pks[, "sample"]], xlab = xlab, yaxt = "n", ylab = ylab, main = main, ylim = yl), dots)) @@ -1076,30 +908,35 @@ plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, #' @title Add definition of chromatographic peaks to an extracted chromatogram #' plot -#' +#' #' @description #' #' The \code{highlightChromPeaks} function adds chromatographic #' peak definitions to an existing plot, such as one created by the #' \code{plot} method on a \code{\link{Chromatogram}} or -#' \code{\link{Chromatograms}} object. +#' \code{\link{MChromatograms}} object. #' #' @param x For \code{highlightChromPeaks}: \code{XCMSnExp} object with the #' detected peaks. #' #' @param rt For \code{highlightChromPeaks}: \code{numeric(2)} with the #' retention time range from which peaks should be extracted and plotted. -#' +#' #' @param mz \code{numeric(2)} with the mz range from which the peaks should #' be extracted and plotted. #' -#' @param border colors to be used to color the border of the rectangles. Has to -#' be equal to the number of samples in \code{x}. -#' +#' @param peakIds \code{character} defining the IDs (i.e. rownames of the peak +#' in the \code{chromPeaks} table) of the chromatographic peaks to be +#' highlighted in a plot. +#' +#' @param border colors to be used to color the border of the rectangles/peaks. +#' Has to be equal to the number of samples in \code{x}. +#' #' @param lwd \code{numeric(1)} defining the width of the line/border. #' #' @param col For \code{highlightChromPeaks}: color to be used to fill the -#' rectangle. +#' rectangle (if \code{type = "rect"}) or the peak +#' (for \code{type = "polygon"}). #' #' @param type the plotting type. See \code{\link{plot}} in base grapics for #' more details. @@ -1107,7 +944,10 @@ plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, #' should be highlighted: \code{type = "rect"} draws a rectangle #' representing the peak definition, \code{type = "point"} indicates a #' chromatographic peak with a single point at the position of the peak's -#' \code{"rt"} and \code{"maxo"}. +#' \code{"rt"} and \code{"maxo"} and \code{type = "polygon"} will highlight +#' the peak shape. For \code{type = "polygon"} the color of the border and +#' area can be defined with parameters \code{"border"} and \code{"col"}, +#' respectively. #' #' @param whichPeaks \code{character(1)} specifying how peaks are called to be #' located within the region defined by \code{mz} and \code{rt}. Can be @@ -1117,90 +957,124 @@ plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE, #' the region. This parameter is passed to the \code{type} argument of the #' \code{\link{chromPeaks}} function. See related documentation for more #' information and examples. -#' +#' #' @param ... additional parameters to the \code{\link{matplot}} or \code{plot} #' function. -#' +#' #' @author Johannes Rainer #' #' @examples #' -#' ## Read some files from the faahKO package. -#' library(xcms) -#' library(faahKO) -#' faahko_3_files <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") #' -#' od <- readMSData(faahko_3_files, mode = "onDisk") +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' -#' ## Peak detection using the 'matchedFilter' method. Note that we are using a -#' ## larger binSize to reduce the runtime of the example. -#' xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) -#' #' ## Extract the ion chromatogram for one chromatographic peak in the data. -#' chrs <- chromatogram(xod, rt = c(2700, 2900), mz = 335) +#' chrs <- chromatogram(faahko_sub, rt = c(2700, 2900), mz = 335) #' #' plot(chrs) #' #' ## Extract chromatographic peaks for the mz/rt range (if any). -#' chromPeaks(xod, rt = c(2700, 2900), mz = 335) -#' +#' chromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335) +#' #' ## Highlight the chromatographic peaks in the area -#' highlightChromPeaks(xod, rt = c(2700, 2900), mz = 335) -highlightChromPeaks <- function(x, rt, mz, +#' ## Show the peak definition with a rectangle +#' highlightChromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335) +#' +#' ## Color the actual peak +#' highlightChromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335, +#' col = c("#ff000020", "#00ff0020"), type = "polygon") +highlightChromPeaks <- function(x, rt, mz, peakIds = character(), border = rep("00000040", length(fileNames(x))), - lwd = 1, col = NA, type = c("rect", "point"), + lwd = 1, col = NA, + type = c("rect", "point", "polygon"), whichPeaks = c("any", "within", "apex_within"), ...) { type <- match.arg(type) + msLevel <- 1L whichPeaks <- match.arg(whichPeaks) n_samples <- length(fileNames(x)) + if (!hasChromPeaks(x)) + stop("'x' does not contain detected peaks") + if (length(peakIds)) { + if (!missing(rt) | !missing(mz)) + warning("Ignoring 'rt' and 'mz' because peakIds were provided") + if (!all(peakIds %in% rownames(chromPeaks(x, msLevel = msLevel)))) + stop("'peakIds' do not match rownames of 'chromPeaks(x)'") + rt <- range(chromPeaks(x)[peakIds, c("rtmin", "rtmax")]) + mz <- range(chromPeaks(x)[peakIds, c("mzmin", "mzmax")]) + } if (missing(rt)) rt <- c(-Inf, Inf) if (missing(mz)) mz <- c(-Inf, Inf) if (!is(x, "XCMSnExp")) stop("'x' has to be a XCMSnExp object") - if (!hasChromPeaks(x)) - stop("'x' does not contain any detected peaks") - pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0, type = whichPeaks) + if (length(peakIds)) + pks <- chromPeaks(x)[peakIds, , drop = FALSE] + else pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0, type = whichPeaks, + msLevel = msLevel) if (length(col) != n_samples) col <- rep(col[1], n_samples) if (length(border) != n_samples) border <- rep(border[1], n_samples) if (length(pks)) { - if (type == "rect") - rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], - border = border[pks[, "sample"]], lwd = lwd, - col = col[pks[, "sample"]]) - if (type == "point") { - ## Fix assignment of point types for each sample. - dots <- list(...) - if (any(names(dots) == "bg")) { - bg <- dots$bg - if (length(bg) != n_samples) - bg <- rep_len(bg[1], n_samples) - dots$bg <- bg[pks[, "sample"]] - } - if (any(names(dots) == "pch")) { - pch <- dots$pch - if (length(pch) != n_samples) - pch <- rep_len(pch[1], n_samples) - dots$pch <- pch[pks[, "sample"]] - } - if (any(is.na(col))) - col <- border - ## Draw a point at the position defined by the "rt" column - do.call("points", args = c(list(x = pks[, "rt"], y = pks[, "maxo"], - col = col[pks[, "sample"]]), dots)) - } + switch(type, + rect = rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], + border = border[pks[, "sample"]], lwd = lwd, + col = col[pks[, "sample"]]), + point = { + ## Fix assignment of point types for each sample. + dots <- list(...) + if (any(names(dots) == "bg")) { + bg <- dots$bg + if (length(bg) != n_samples) + bg <- rep_len(bg[1], n_samples) + dots$bg <- bg[pks[, "sample"]] + } + if (any(names(dots) == "pch")) { + pch <- dots$pch + if (length(pch) != n_samples) + pch <- rep_len(pch[1], n_samples) + dots$pch <- pch[pks[, "sample"]] + } + if (any(is.na(col))) + col <- border + ## Draw a point at the position defined by the "rt" column + do.call("points", + args = c(list(x = pks[, "rt"], + y = pks[, "maxo"], + col = col[pks[, "sample"]]), dots)) + }, + polygon = { + if (nrow(pks)) { + chrs <- chromatogram( + x, rt = range(pks[, c("rtmin", "rtmax")]), mz = mz) + pks <- pks[order(pks[, "maxo"], decreasing = TRUE), , + drop = FALSE] + for (j in seq_len(nrow(pks))) { + i <- pks[j, "sample"] + chr <- filterRt(chrs[1, i], + rt = pks[j, c("rtmin", "rtmax")]) + xs <- rtime(chr) + xs <- c(xs, xs[length(xs)], xs[1]) + ys <- c(intensity(chr), 0, 0) + nona <- !is.na(ys) + polygon(xs[nona], ys[nona], border = border[i], + col = col[i]) + } + } + }) } } - #' @title General visualizations of peak detection results -#' +#' #' @description #' #' \code{plotChromPeaks} plots the identified chromatographic @@ -1218,13 +1092,13 @@ highlightChromPeaks <- function(x, rt, mz, #' chromatographic peaks for the \code{plotChromPeaks} function can be #' specified using the \code{par} function, i.e. with \code{par(lwd = 3)} #' and \code{par(lty = 2)}, respectively. -#' +#' #' @param x \code{\link{XCMSnExp}} object. #' #' @param file For \code{plotChromPeaks}: \code{numeric(1)} specifying the #' index of the file within \code{x} for which the plot should be created. #' Defaults to \code{1}. -#' +#' #' @param xlim \code{numeric(2)} specifying the x-axis limits (retention time #' dimension). Defaults to \code{NULL} in which case the full retention #' time range of the file is used. @@ -1253,35 +1127,28 @@ highlightChromPeaks <- function(x, rt, mz, #' @param ... Additional arguments passed to the \code{plot} (for #' \code{plotChromPeaks}) and \code{image} (for #' \code{plotChromPeakImage}) functions. Ignored if \code{add = TRUE}. -#' +#' #' @author Johannes Rainer #' #' @seealso \code{\link{highlightChromPeaks}} for the function to highlight #' detected chromatographic peaks in extracted ion chromatogram plots. -#' -#' @examples #' -#' ## Perform peak detection on two files from the faahKO package. -#' library(xcms) -#' library(faahKO) -#' faahko_file <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) -#' -#' od <- readMSData(faahko_file, mode = "onDisk") +#' @examples #' -#' ## Peak detection using the 'matchedFilter' method. Note that we are using a -#' ## larger binSize to reduce the runtime of the example. -#' xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") #' #' ## plotChromPeakImage: plot an image for the identified peaks per file -#' plotChromPeakImage(xod) -#' +#' plotChromPeakImage(faahko_sub) +#' #' ## Show all detected chromatographic peaks from the first file -#' plotChromPeaks(xod) +#' plotChromPeaks(faahko_sub) #' #' ## Plot all detected peaks from the second file and restrict the plot to a #' ## mz-rt slice -#' plotChromPeaks(xod, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) +#' plotChromPeaks(faahko_sub, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) plotChromPeaks <- function(x, file = 1, xlim = NULL, ylim = NULL, add = FALSE, border = "#00000060", col = NA, xlab = "retention time", ylab = "mz", @@ -1300,7 +1167,7 @@ plotChromPeaks <- function(x, file = 1, xlim = NULL, ylim = NULL, main <- basename(fileNames(x_file)) ## Get the peaks from the file, restricting to the current limits (might ## speed up things). - pks <- chromPeaks(x_file, mz = ylim, rt = xlim) + pks <- chromPeaks(x_file, mz = ylim, rt = xlim, msLevel = 1L) ## Initialize plot if (!add) plot(3, 3, pch = NA, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, @@ -1343,9 +1210,9 @@ plotChromPeakImage <- function(x, binSize = 30, xlim = NULL, log = FALSE, brks <- seq(xlim[1], xlim[2], by = binSize) if (brks[length(brks)] < xlim[2]) brks <- c(brks, brks[length(brks)] + binSize) - pks <- chromPeaks(x, rt = xlim) + pks <- chromPeaks(x, rt = xlim, msLevel = 1L) if (nrow(pks)) { - rts <- split(pks[, "rt"], pks[, "sample"]) + rts <- split(pks[, "rt"], as.factor(as.integer(pks[, "sample"]))) cnts <- lapply(rts, function(z) { hst <- hist(z, breaks = brks, plot = FALSE) hst$counts @@ -1378,7 +1245,7 @@ plotChromPeakImage <- function(x, binSize = 30, xlim = NULL, log = FALSE, #' #' The `isCalibrated` function returns `TRUE` if chromatographic #' peaks of the [XCMSnExp] object `x` were calibrated and `FALSE` otherwise. -#' +#' #' @md isCalibrated <- function(object) { if (length(processHistory(object, type = .PROCSTEP.CALIBRATION))) @@ -1411,37 +1278,36 @@ isCalibrated <- function(object) { #' history is preserved. #' #' @param object An [XCMSnExp] object. -#' +#' #' @md #' #' @return #' #' A `XCMSnExp` with the raw retention times being replaced with the #' adjusted retention time. -#' +#' #' @author Johannes Rainer #' #' @seealso [adjustRtime()] for the function to perform the alignment (retention #' time correction). -#' +#' #' [adjustedRtime()] for the method to extract adjusted retention times from #' an [XCMSnExp] object. -#' +#' #' [dropAdjustedRtime] for the method to delete alignment results and to #' restore the raw retention times. -#' +#' #' @examples -#' ## Load test data -#' files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) #' -#' od <- readMSData(files, mode = "onDisk") +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") #' -#' ## Apply obiwarp retention time adjustment. We have to convert the -#' ## OnDiskMSnExp first to an XCMSnExp -#' xod <- as(od, "XCMSnExp") -#' xod <- adjustRtime(xod, param = ObiwarpParam()) +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' xod <- adjustRtime(faahko_sub, param = ObiwarpParam()) #' #' hasAdjustedRtime(xod) #' @@ -1452,7 +1318,7 @@ isCalibrated <- function(object) { #' hasAdjustedRtime(xod) #' #' ## Raw retention times have been replaced with adjusted retention times -#' plot(split(rtime(od), fromFile(od))[[1]] - +#' plot(split(rtime(faahko_sub), fromFile(faahko_sub))[[1]] - #' split(rtime(xod), fromFile(xod))[[1]], type = "l") #' #' ## And the process history still contains the settings for the alignment @@ -1467,7 +1333,7 @@ applyAdjustedRtime <- function(object) { ## Copy the data newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - newFd <- dropAdjustedRtime(newFd) + rm(list = "adjustedRtime", envir = newFd) object@msFeatureData <- newFd object } @@ -1516,23 +1382,14 @@ applyAdjustedRtime <- function(object) { rownames(pks) <- NULL new_x@.processHistory <- unlist(procH) chromPeaks(new_x) <- pks - if (validObject(new_x)) - new_x + rm(pks) + cpd <- do.call(rbind, lapply(x, chromPeakData)) + rownames(cpd) <- rownames(chromPeaks(new_x)) + chromPeakData(new_x) <- cpd + validObject(new_x) + new_x } -#' @description -#' -#' \code{filterFeatureDefinitions} allows to subset the feature definitions of -#' an \code{XCMSnExp} object. Which feature definitions should be kept can be -#' specified with the \code{features} argument that can be a \code{logical}, -#' \code{integer} or \code{character} vector. The function returns the -#' \code{XCMSnExp} with the reduced \code{featureDefinitions} data frame. -#' -#' @param features For \code{filterFeatureDefinitions}: either a \code{integer} -#' specifying the indices of the features (rows) to keep, a \code{logical} -#' with a length matching the number of rows of \code{featureDefinitions} -#' or a \code{character} with the feature (row) names. -#' #' @rdname XCMSnExp-filter-methods filterFeatureDefinitions <- function(x, features) { if (!is(x, "XCMSnExp")) @@ -1575,7 +1432,7 @@ filterFeatureDefinitions <- function(x, features) { #' @title Simple feature summaries #' #' @description -#' +#' #' Simple function to calculate feature summaries. These include counts and #' percentages of samples in which a chromatographic peak is present for each #' feature and counts and percentages of samples in which more than one @@ -1595,7 +1452,10 @@ filterFeatureDefinitions <- function(x, features) { #' #' @param method `character` passed to the [featureValues()] function. See #' respective help page for more information. -#' +#' +#' @param skipFilled `logical(1)` whether filled-in peaks should be excluded +#' (default) or included in the summary calculation. +#' #' @return #' #' `matrix` with one row per feature and columns: @@ -1618,7 +1478,7 @@ filterFeatureDefinitions <- function(x, features) { #' #' @author Johannes Rainer featureSummary <- function(x, group, perSampleCounts = FALSE, - method = "maxint") { + method = "maxint", skipFilled = TRUE) { if (!is(x, "XCMSnExp")) stop("'x' is expected to be an 'XCMSnExp' object") if (!hasFeatures(x)) @@ -1629,6 +1489,8 @@ featureSummary <- function(x, group, perSampleCounts = FALSE, stop("length of 'group' does not match the number of ", "samples in 'x'") } + if (skipFilled && .hasFilledPeaks(x)) + x <- dropFilledChromPeaks(x) ## First determine the number of peaks per sample smpls <- seq_along(fileNames(x)) pks_per_sample <- lapply(featureDefinitions(x)$peakidx, function(z) @@ -1679,7 +1541,7 @@ featureSummary <- function(x, group, perSampleCounts = FALSE, #' the m/z - rt space. #' #' @param x `XCMSnExp` with the features. -#' +#' #' @param expandMz `numeric(1)` with the value to expand each feature (on each #' side) in m/z dimension before identifying overlapping features. #' The resulting `"mzmin"` for the feature is thus `mzmin - expandMz` and @@ -1694,32 +1556,33 @@ featureSummary <- function(x, group, perSampleCounts = FALSE, #' value: `mzmin - mzmin * ppm / 2e6`, `mzmax + mzmax * ppm / 2e6`. Each #' feature is thus expanded in m/z dimension by ppm/2 on each side before #' identifying overlapping features. -#' +#' #' @return `list` with indices of features (in [featureDefinitions()]) that #' are overlapping. #' #' @md -#' +#' #' @author Johannes Rainer #' #' @examples -#' ## Load 2 test files. -#' data <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' system.file("cdf/KO/ko16.CDF", package = "faahKO")), -#' mode = "onDisk") #' -#' ## Perform peak detection; parameters set to reduce processing speed -#' data <- findChromPeaks(data, CentWaveParam(noise = 10000, snthresh = 40)) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' ## Correspondence analysis -#' data <- groupChromPeaks(data, param = PeakDensityParam(sampleGroups = c(1, 1))) +#' xdata <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = c(1, 1, 1))) #' #' ## Identify overlapping features -#' overlappingFeatures(data) +#' overlappingFeatures(xdata) #' #' ## Identify features that are separated on retention time by less than #' ## 2 minutes -#' overlappingFeatures(data, expandRt = 60) +#' overlappingFeatures(xdata, expandRt = 60) overlappingFeatures <- function(x, expandMz = 0, expandRt = 0, ppm = 0) { if (!is(x, "XCMSnExp")) stop("'x' is expected to be an 'XCMSnExp' object") @@ -1755,7 +1618,7 @@ overlappingFeatures <- function(x, expandMz = 0, expandRt = 0, ppm = 0) { #' @description #' #' Export the feature table for further analysis in the MetaboAnalyst -#' software (or the `MetaboAnalystR` R package. +#' software (or the `MetaboAnalystR` R package). #' #' @param x [XCMSnExp] object with identified chromatographic peaks grouped #' across samples. @@ -1773,20 +1636,27 @@ overlappingFeatures <- function(x, expandMz = 0, expandRt = 0, ppm = 0) { #' @param digits `integer(1)` defining the number of significant digits to be #' used for numeric. The default `NULL` uses `getOption("digits")`. See #' [format()] for more information. -#' +#' +#' @param groupnames `logical(1)` whether row names of the resulting matrix +#' should be the feature IDs (`groupnames = FALSE`; default) or IDs that +#' are composed of the m/z and retention time of the features (in the +#' format `MT` (`groupnames = TRUE`). See help of the [groupnames] +#' function for details. +#' #' @param ... additional parameters to be passed to the [featureValues()] #' function. #' #' @return If `file` is not specified, the function returns the `matrix` in #' the format supported by MetaboAnalyst. -#' +#' #' @export #' #' @author Johannes Rainer #' #' @md exportMetaboAnalyst <- function(x, file = NULL, label, - value = "into", digits = NULL, ...) { + value = "into", digits = NULL, + groupnames = FALSE, ...) { if (!is(x, "XCMSnExp")) stop("'x' is supposed to be an XCMSnExp object") fv <- featureValues(x, value = value, ...) @@ -1796,6 +1666,8 @@ exportMetaboAnalyst <- function(x, file = NULL, label, if (missing(label)) stop("Please provide the group assignment of the samples with the ", "'label' parameter") + if (groupnames) + rownames(fv) <- groupnames(x) if (length(label) == 1) { if (any(colnames(pData(x)) == label)) label <- as.character(pData(x)[, label]) @@ -1817,3 +1689,1747 @@ exportMetaboAnalyst <- function(x, file = NULL, label, else fv } + +#' @description +#' +#' Identifies for all peaks of an `XCMSnExp` object MS2 spectra and returns +#' them. Identification is performed separately (but parallel) for each file. +#' +#' @author Johannes Rainer +#' +#' @noRd +ms2_mspectrum_for_all_peaks <- function(x, expandRt = 0, expandMz = 0, + ppm = 0, method = c("all", + "closest_rt", + "closest_mz", + "signal"), + skipFilled = FALSE, subset = NULL, + BPPARAM = bpparam()) { + ## DEPRECATE THIS IN BIOC3.14 + method <- match.arg(method) + pks <- chromPeaks(x) + if (ppm != 0) + ppm <- pks[, "mz"] * ppm / 1e6 + if (expandMz != 0 || length(ppm) > 1) { + pks[, "mzmin"] <- pks[, "mzmin"] - expandMz - ppm + pks[, "mzmax"] <- pks[, "mzmax"] + expandMz + ppm + } + if (expandRt != 0) { + pks[, "rtmin"] <- pks[, "rtmin"] - expandRt + pks[, "rtmax"] <- pks[, "rtmax"] + expandRt + } + if (length(subset)) { + if (!(min(subset) >= 1 && max(subset) <= nrow(pks))) + stop("If 'subset' is defined it has to be >= 1 and <= ", + nrow(pks), ".") + not_subset <- rep(TRUE, nrow(pks)) + not_subset[subset] <- FALSE + pks[not_subset, "mz"] <- NA + rm(not_subset) + } + if (skipFilled && any(chromPeakData(x)$is_filled)) + pks[chromPeakData(x)$is_filled, ] <- NA + ## Split data per file + file_factor <- factor(pks[, "sample"]) + peak_ids <- rownames(pks) + pks <- split.data.frame(pks, f = file_factor) + x <- .split_by_file2( + x, msLevel. = 2L, subsetFeatureData = FALSE)[as.integer(levels(file_factor))] + res <- bpmapply(ms2_mspectrum_for_peaks_from_file, x, pks, + MoreArgs = list(method = method), SIMPLIFY = FALSE, + USE.NAMES = FALSE, BPPARAM = BPPARAM) + res <- unsplit(res, file_factor) + names(res) <- peak_ids + res +} + +#' @description +#' +#' Identify for all chromatographic peaks of a single file MS2 spectra with +#' an precursor m/z within the peak's m/z and retention time within the peak's +#' retention time width and return them. +#' +#' @note +#' +#' If needed, the m/z and rt width of the peaks should be increased previously. +#' No MS2 spectra are identified for peaks with an `"mz"` of `NA`, thus, to +#' skip identification for some (e.g. filled-in) peaks their value in the +#' `"mz"` column of `pks` should be set to `NA` before passing the parameter +#' to the function. +#' +#' @param x `OnDiskMSnExp` with (only) MS2 spectra of a single file. +#' +#' @param pks `matrix` with chromatographic peaks of a single file. +#' +#' @param method `character` defining the method to optionally select a single +#' MS2 spectrum for the peak. +#' +#' @return `list` with length equal to the number of rows of `pks` with +#' `Spectrum2` objects +#' +#' @author Johannes Rainer +#' +#' @noRd +ms2_mspectrum_for_peaks_from_file <- function(x, pks, method = c("all", + "closest_rt", + "closest_mz", + "signal")) { + ## DEPRECATE THIS IN BIOC3.14 + res <- vector(mode = "list", nrow(pks)) + if (nrow(pks) == 0 || !any(msLevel(x) == 2)) + return(res) + method <- match.arg(method) + fromFile <- as.integer(pks[1, "sample"]) + sps <- spectra(x) + pmz <- precursorMz(x) + rtm <- rtime(x) + for (i in 1:nrow(pks)) { + if (is.na(pks[i, "mz"])) + next + idx <- which(pmz >= pks[i, "mzmin"] & pmz <= pks[i, "mzmax"] & + rtm >= pks[i, "rtmin"] & rtm <= pks[i, "rtmax"]) + if (length(idx)) { + if (length(idx) > 1 & method != "all") { + if (method == "closest_rt") + idx <- idx[order(abs(rtm[idx] - pks[i, "rt"]))][1] + if (method == "closest_mz") + idx <- idx[order(abs(pmz[idx] - pks[i, "mz"]))][1] + if (method == "signal") { + sps_sub <- sps[idx] + ints <- vapply(sps_sub, function(z) sum(intensity(z)), + numeric(1)) + idx <- idx[order(abs(ints - pks[i, "maxo"]))][1] + } + if (method == "largest_tic") { + sps_sub <- sps[idx] + ints <- vapply(sps_sub, function(z) sum(intensity(z)), + numeric(1)) + idx <- idx[order(ints, decreasing = TRUE)][1L] + } + if (method == "largest_bpi") { + sps_sub <- sps[idx] + ints <- vapply(sps_sub, function(z) max(intensity(z)), + numeric(1)) + idx <- idx[order(ints, decreasing = TRUE)][1L] + } + } + res[[i]] <- lapply(sps[idx], function(z) { + z@fromFile = fromFile + z + }) + } + } + names(res) <- rownames(pks) + res +} + +#' given an XCMSnExp this function identifies for each MS1 chromatographic +#' peak all MS2 spectra with a precursor m/z within the peak region and returns +#' a `Spectra` object with all of these spectra. +#' +#' @return a `list`, same length than there are `chromPeaks` with a `Spectra` +#' in each, if found. +#' +#' @noRd +.spectra_for_peaks <- function(x, method = c("all", "closest_rt", + "closest_mz", "signal", + "largest_tic", "largest_bpi"), + msLevel = 2L, expandRt = 0, expandMz = 0, + ppm = 0, skipFilled = FALSE, + peaks = character()) { + if (is(x, "XCMSnExp") && hasAdjustedRtime(x)) + fData(x)$retentionTime <- rtime(x) + ## from_msl <- 1L + method <- match.arg(method) + if (msLevel == 1L && method %in% c("closest_mz", "signal")) { + warning("method = \"closest_mz\" and method = \"signa;\" are not", + " supported for msLevel = 1. Changing to method = \"all\".") + method <- "all" + } + pks <- as.data.frame(chromPeaks(x))[, c("mz", "mzmin", "mzmax", "rt", + "rtmin", "rtmax", "maxo", "sample")] + if (ppm != 0) + expandMz <- expandMz + pks$mz * ppm / 1e6 + if (expandMz[1L] != 0) { + pks$mzmin <- pks$mzmin - expandMz + pks$mzmax <- pks$mzmax + expandMz + } + if (expandRt != 0) { + pks$rtmin <- pks$rtmin - expandRt + pks$rtmax <- pks$rtmax + expandRt + } + if (length(peaks)) { + peaks <- .i2index(peaks, rownames(pks), "peaks") + keep <- rep(FALSE, nrow(pks)) + keep[peaks] <- TRUE + } else { + keep <- rep(TRUE, nrow(pks)) + if (skipFilled && any(chromPeakData(x)$is_filled)) + keep <- !chromPeakData(x)$is_filled + ## if (any(chromPeakData(x)$ms_level)) + ## keep <- keep & chromPeakData(x)$ms_level == from_msl + } + ## maybe subset by peak ID. + fns <- fileNames(x) + res <- vector("list", length(keep)) + be <- new("MsBackendMzR") + sps <- new("Spectra") + for (i in which(keep)) { + sel <- .fdata(x)$msLevel == msLevel & + .fdata(x)$retentionTime >= pks$rtmin[i] & + .fdata(x)$retentionTime <= pks$rtmax[i] & + .fdata(x)$fileIdx == pks$sample[i] + if (msLevel > 1L) + sel <- sel & + .fdata(x)$precursorMZ >= pks$mzmin[i] & + .fdata(x)$precursorMZ <= pks$mzmax[i] + fd <- .fdata(x)[which(sel), ] + if (nrow(fd)) { + fd$peak_index <- i + fd$peak_id <- rownames(pks)[i] + sp <- .fData2MsBackendMzR(fd, fns, be) + sp <- switch( + method, + all = sp, + closest_rt = { + sp[which.min(abs(pks$rt[i] - rtime(sp)))[1L]] + }, + closest_mz = { + sp[which.min(abs(pks$mz[i] - precursorMz(sp)))[1L]] + }, + signal = { + ints <- vapply(intensity(sp), sum, numeric(1)) + sp[which.min(abs(ints - pks$maxo[i]))[1L]] + }, + largest_tic = { + ints <- vapply(intensity(sp), sum, numeric(1)) + sp[which.max(ints)[1L]] + }, + largest_bpi = { + ints <- vapply(intensity(sp), max, numeric(1)) + sp[which.max(ints)[1L]] + }) + slot(sps, "backend", check = FALSE) <- sp + res[[i]] <- sps + } + } + names(res) <- rownames(pks) + if (length(peaks)) + res[peaks] + else res +} + +#' @title Extract spectra associated with chromatographic peaks +#' +#' @description +#' +#' Extract (MS1 or MS2) spectra from an [XCMSnExp] object for each identified +#' chromatographic peak. The function returns by default spectra for +#' chromatographic peaks of **all** MS levels, but parameter `peaks` allows to +#' restrict the result to selected chromatographic peaks. +#' For `msLevel = 1L` (only supported for `return.type = "Spectra"` or +#' `return.type = "List"`) MS1 spectra within the retention time boundaries +#' (in the file in which the peak was detected) are returned. For +#' `msLevel = 2L` MS2 spectra are returned for a chromatographic +#' peak if their precursor m/z is within the retention time and m/z range of +#' the chromatographic peak. Parameter `method` allows to define whether all +#' or a single spectrum should be returned: +#' +#' - `method = "all"`: (default): return all spectra for each peak. +#' - `method = "closest_rt"`: return the spectrum with the retention time +#' closest to the peak's retention time (at apex). +#' - `method = "closest_mz"`: return the spectrum with the precursor m/z +#' closest to the peaks's m/z (at apex); only supported for `msLevel = 2L`. +#' - `method = "signal"`: return the spectrum with the sum of intensities most +#' similar to the peak's apex signal (`"maxo"`); only supported for +#' `msLevel = 2L`. +#' - `method = "largest_tic"`: return the spectrum with the largest total +#' signal (sum of peaks intensities). +#' - `method = "largest_bpi"`: return the spectrum with the largest peak +#' intensity (maximal peak intensity). +#' +#' Parameter `return.type` allows to specify the *type* of the result object. +#' Please use `return.type = "Spectra"` or `return.type = "List"`, +#' `return.type = "list"` or the default `return.type = "MSpectra"` will be +#' deprecated (also, they do not support extracting MS1 spectra). +#' +#' See also the *LC-MS/MS data analysis* vignette for more details and examples. +#' +#' @param x [XCMSnExp] object with identified chromatographic peaks. +#' +#' @param msLevel `integer(1)` defining whether MS1 or MS2 spectra should be +#' returned. `msLevel = 1` is currently only supported for `return.type` +#' being `"Spectra"` or `"List"`. +#' +#' @param expandRt `numeric(1)` to expand the retention time range of each +#' peak by a constant value on each side. +#' +#' @param expandMz `numeric(1)` to expand the m/z range of each peak by a +#' constant value on each side. +#' +#' @param ppm `numeric(1)` to expand the m/z range of each peak (on each side) +#' by a value dependent on the peak's m/z. +#' +#' @param method `character(1)` specifying which spectra to include in the +#' result. Defaults to `method = "all"`. See function description for +#' details. +#' +#' @param peaks `character`, `logical` or `integer` allowing to specify a +#' subset of chromatographic peaks in `chromPeaks` for which spectra should +#' be returned (providing either their ID, a logical vector same length +#' than `nrow(chromPeaks(x))` or their index in `chromPeaks(x)`). This +#' parameter overrides `skipFilled` and is only supported for `return.type` +#' being either `"Spectra"` or `"List"`. +#' +#' @param skipFilled `logical(1)` whether spectra for filled-in peaks should +#' be reported or not. +#' +#' @param return.type `character(1)` defining the result type. Defaults to +#' `return.type = "MSpectra"` but `return.type = "Spectra"` or +#' `return.type = "List"` are preferred. See below for more information. +#' +#' @return +#' +#' parameter `return.type` allow to specify the type of the returned object: +#' +#' - `return.type = "MSpectra"`: a [MSpectra] object with elements being +#' [Spectrum-class] objects. The result objects contains all spectra +#' for all peaks. Metadata column `"peak_id"` provides the ID of the +#' respective peak (i.e. its rowname in [chromPeaks()]). +#' - `return.type = "Spectra"`: a `Spectra` object (defined in the `Spectra` +#' package). The result contains all spectra for all peaks. Metadata column +#' `"peak_id"` provides the ID of the respective peak (i.e. its rowname in +#' [chromPeaks()] and `"peak_index"` its index in the object's `chromPeaks` +#' matrix. +#' - `return.type = "list"`: `list` of `list`s that are either of length +#' 0 or contain [Spectrum2-class] object(s) within the m/z-rt range. The +#' length of the list matches the number of peaks. +#' - `return.type = "List"`: `List` of length equal to the number of +#' chromatographic peaks is returned with elements being either `NULL` (no +#' spectrum found) or a `Spectra` object. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' ## Read a file with DDA LC-MS/MS data +#' fl <- system.file("TripleTOF-SWATH/PestMix1_DDA.mzML", package = "msdata") +#' dda <- readMSData(fl, mode = "onDisk") +#' +#' ## Subset the object to reduce runtime of the example +#' dda <- filterRt(dda, c(200, 400)) +#' +#' ## Perform MS1 peak detection +#' dda <- findChromPeaks(dda, CentWaveParam(peakwidth = c(5, 15), prefilter = c(5, 1000))) +#' +#' ## Load the required Spectra package and return all MS2 spectro for each +#' ## chromatographic peaks as a Spectra object +#' ms2_sps <- chromPeakSpectra(dda, return.type = "Spectra") +#' ms2_sps +#' +#' ## columns peak_id or peak_index assign spectra to the chromatographic peaks +#' ms2_sps$peak_id +#' ms2_sps$peak_index +#' chromPeaks(dda) +#' +#' ## Alternatively, return the result as a List of Spectra objects. This list +#' ## is parallel to chromPeaks hence the mapping between chromatographic peaks +#' ## and MS2 spectra is easier. +#' ms2_sps <- chromPeakSpectra(dda, return.type = "List") +#' ms2_sps[[1L]] +#' length(ms2_sps) +#' +#' ## In addition to MS2 spectra we could also return the MS1 spectrum for each +#' ## chromatographic peak which is closest to the peak's apex position. +#' ms1_sps <- chromPeakSpectra(dda, msLevel = 1L, method = "closest_rt", +#' return.type = "Spectra") +#' ms1_sps +#' +#' ## Parameter peaks would allow to extract spectra for specific peaks only +#' chromPeakSpectra(dda, msLevel = 1L, method = "closest_rt", peaks = c(3, 5)) +chromPeakSpectra <- function(x, msLevel = 2L, expandRt = 0, expandMz = 0, + ppm = 0, method = c("all", "closest_rt", + "closest_mz", "signal", + "largest_tic", "largest_bpi"), + skipFilled = FALSE, + return.type = c("MSpectra", "Spectra", + "list", "List"), + peaks = character()) { + method <- match.arg(method) + return.type <- match.arg(return.type) + if (!is(x, "XCMSnExp")) + stop("'x' is supposed to be an 'XCMSnExp' object.") + if (!hasChromPeaks(x)) + stop("No chromatographic peaks present. Please run 'findChromPeaks' ", + "first") + if (return.type %in% c("Spectra", "List")) { + .require_spectra() + if (length(x@spectraProcessingQueue)) + warning("Lazy evaluation queue is not empty. Will ignore any", + " processing steps as 'return.type = \"Spectra\"' and", + " 'return.type = \"List\"' currently don't support a ", + "non-empty processing queue.") + res <- .spectra_for_peaks(x, msLevel = msLevel, method = method, + expandRt = expandRt, expandMz = expandMz, + ppm = ppm, skipFilled = skipFilled, + peaks = peaks) + if (return.type == "Spectra") { + res <- do.call(c, unname(res[lengths(res) > 0])) + if (is(res, "Spectra")) + res@processing <- character() + } else res <- List(res) + } else { + ## DEPRECATE THIS IN BIOC 3.14 + if (length(peaks)) + warning("Ignoring parameter 'peaks' which is only supported for ", + "'return.type = \"Spectra\"' and 'return.type = \"List\"'.") + if (msLevel != 2 || (msLevel == 2 & !any(msLevel(x) == 2))) { + res <- vector(mode = "list", length = nrow(chromPeaks(x))) + names(res) <- rownames(chromPeaks(x)) + if (msLevel != 2) + warning("msLevel = 1 is currently not supported for", + "return.type = \"MSpectra\" or return.type = \"list\".") + if (msLevel == 2 & !any(msLevel(x) == 2)) + warning("No MS2 spectra available in 'x'.") + } else { + res <- ms2_mspectrum_for_all_peaks(x, expandRt = expandRt, + expandMz = expandMz, + ppm = ppm, method = method, + skipFilled = skipFilled) + } + if (return.type == "MSpectra") { + pids <- rep(names(res), lengths(res)) + res <- res[lengths(res) > 0] + if (length(res)) + res <- unlist(res) + res <- MSpectra(res, elementMetadata = DataFrame(peak_id = pids)) + } + } + res +} + +#' For information and details see featureSpectra +#' +#' @noRd +ms2_mspectrum_for_features <- function(x, expandRt = 0, expandMz = 0, ppm = 0, + skipFilled = FALSE, ...) { + idxs <- featureDefinitions(x)$peakidx + sp_pks <- ms2_mspectrum_for_all_peaks(x, expandRt = expandRt, + expandMz = expandMz, ppm = ppm, + skipFilled = skipFilled, + subset = unique(unlist(idxs)), ...) + res <- lapply(idxs, function(z) unlist(sp_pks[z])) + names(res) <- rownames(featureDefinitions(x)) + res +} + +#' @title Extract spectra associated with features +#' +#' @description +#' +#' This function returns spectra associated with the identified features in the +#' input object. By default, spectra are returned for all features (from all +#' MS levels), but parameter `features` allows to specify selected features for +#' which the result should be returned. +#' Parameter `msLevel` allows to define whether MS level 1 or 2 +#' spectra should be returned. For `msLevel = 1L` all MS1 spectra within the +#' retention time range of each chromatographic peak (in that respective data +#' file) associated with a feature are returned. Note that for samples in which +#' no peak was identified (or even filled-in) no spectra are returned. +#' For `msLevel = 2L` all MS2 +#' spectra with a retention time within the retention time range and their +#' precursor m/z within the m/z range of any chromatographic peak of a feature +#' are returned. See also [chromPeakSpectra()] (used internally to extract +#' spectra for each chromatographic peak of a feature) for additional +#' information. +#' +#' In contrast to the [chromPeakSpectra()] function, selecting a `method` +#' different than `"all"` will not return a single spectrum per feature, but +#' one spectrum per **chromatographic peak** assigned to the feature. +#' +#' Note also that `msLevel = 1L` is only supported for `return.type = "List"` +#' or `return.type = "Spectra"`. +#' +#' @param x [XCMSnExp] object with feature defitions available. +#' +#' @inheritParams chromPeakSpectra +#' +#' @param features `character`, `logical` or `integer` allowing to specify a +#' subset of features in `featureDefinitions` for which spectra should +#' be returned (providing either their ID, a logical vector same length +#' than `nrow(featureDefinitions(x))` or their index in +#' `featureDefinitions(x)`). This parameter overrides `skipFilled` and is +#' only supported for `return.type` being either `"Spectra"` or `"List"`. +#' +#' @param ... additional arguments to be passed along to [chromPeakSpectra()], +#' such as `method`. +#' +#' @return +#' +#' parameter `return.type` allow to specify the type of the returned object: +#' +#' - `return.type = "MSpectra"`: a [MSpectra] object with elements being +#' [Spectrum-class] objects. The result objects contains all spectra +#' for all features. Metadata column `"feature_id"` provides the ID of the +#' respective feature (i.e. its rowname in [featureDefinitions()]). +#' - `return.type = "Spectra"`: a `Spectra` object (defined in the `Spectra` +#' package). The result contains all spectra for all features. Metadata column +#' `"feature_id"` provides the ID of the respective feature (i.e. its rowname +#' in [featureDefinitions()]. +#' - `return.type = "list"`: `list` of `list`s that are either of length +#' 0 or contain [Spectrum2-class] object(s) within the m/z-rt range. The +#' length of the list matches the number of features. +#' - `return.type = "List"`: `List` of length equal to the number of +#' features with MS level `msLevel` is returned with elements being either +#' `NULL` (no spectrum found) or a `Spectra` object. +#' +#' @author Johannes Rainer +#' +#' @md +featureSpectra <- function(x, msLevel = 2L, expandRt = 0, expandMz = 0, + ppm = 0, skipFilled = FALSE, + return.type = c("MSpectra", "Spectra", + "list", "List"), + features = character(), ...) { + if (!is(x, "XCMSnExp")) + stop("'x' is supposed to be an 'XCMSnExp' object.") + return.type <- match.arg(return.type) + if (!hasFeatures(x)) + stop("No feature definitions present. Please run 'groupChromPeaks' ", + "first.") + if (return.type %in% c("Spectra", "List")) { + .require_spectra() + if (length(x@spectraProcessingQueue)) + warning("Lazy evaluation queue is not empty. Will ignore any", + " processing steps as 'return.type = \"Spectra\"' and", + " 'return.type = \"List\"' currently don't support a ", + "non-empty processing queue.") + res <- .spectra_for_features(x, msLevel = msLevel, expandRt = expandRt, + expandMz = expandMz, ppm = ppm, + skipFilled = skipFilled, + features = features, ...) + if (return.type == "Spectra") { + res <- do.call(c, unname(res[lengths(res) > 0])) + if (!length(res)) { + warning("No MS level ", msLevel, " spectra found") + if (!is(res, "Spectra")) + res <- Spectra::Spectra() + } + res@processing <- character() + } else res <- List(res) + } else { + ## DEPRECATE IN BIOC3.14 + if (length(features)) + warning("Ignoring parameter 'features': this is only supported", + " for 'return.type = \"Spectra\"' and ", + "'return.type = \"List\"'.") + if (msLevel != 2 || (msLevel == 2 & !any(msLevel(x) == 2))) { + res <- vector(mode = "list", length = nrow(featureDefinitions(x))) + names(res) <- rownames(featureDefinitions(x)) + if (msLevel != 2) + warning("msLevel = 1 is currently only supported for ", + "'return.type = \"Spectra\"' and ", + "'return.type = \"List\"'.") + if (msLevel == 2 & !any(msLevel(x) == 2)) + warning("No MS2 spectra available in 'x'.") + } else { + res <- ms2_mspectrum_for_features( + x, expandRt = expandRt, expandMz = expandMz, + ppm = ppm, skipFilled = skipFilled, ...) + } + if (return.type == "MSpectra") { + fids <- rep(names(res), lengths(res)) + res <- res[lengths(res) > 0] + if (length(res)) { + res <- unlist(res) + pids <- vapply(strsplit(names(res), ".", TRUE), + `[`, character(1), 2) + } else { + pids <- character() + } + res <- MSpectra(res, elementMetadata = DataFrame(feature_id = fids, + peak_id = pids)) + } + } + res +} + +#' get spectra per feature: +#' 1) call chromPeakSpectra to get spectra for all peaks (as List) +#' 2) iterate over features to extract all +#' @noRd +.spectra_for_features <- function(x, msLevel = 2L, expandRt = 0, + expandMz = 0, ppm = 0, skipFilled = TRUE, + features = character(), ...) { + fids <- rownames(featureDefinitions(x)) + idx <- featureDefinitions(x)$peakidx + if (length(features)) { + findex <- .i2index(features, fids, "features") + fids <- fids[findex] + idx <- idx[findex] + } + ## Get spectra for all peaks of these features + pkidx <- sort(unique(unlist(idx, use.names = FALSE))) + peak_sp <- vector("list", nrow(chromPeaks(x))) + peak_sp[pkidx] <- xcms:::.spectra_for_peaks( + x, msLevel = msLevel, expandRt = expandRt, expandMz = expandMz, + ppm = ppm, skipFilled = skipFilled, peaks = pkidx, ...) + res <- lapply(seq_along(fids), function(i) { + z <- peak_sp[idx[[i]]] + if (any(lengths(z))) { + z <- Spectra::concatenateSpectra(z) + z@backend@spectraData <- cbind(z@backend@spectraData, + DataFrame(feature_id = fids[i])) + z@processing <- character() + z + } + }) + names(res) <- fids + res +} + +#' @title Extract ion chromatograms for each feature +#' +#' @description +#' +#' Extract ion chromatograms for features in an [XCMSnExp-class] object. The +#' function returns for each feature its extracted ion chromatogram and all +#' associated peaks with it. The chromatogram is extracted from the m/z - rt +#' region including all chromatographic peaks of that features (i.e. based on +#' the ranges of `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` of all +#' chromatographic peaks of the feature). +#' +#' By default only chromatographic peaks associated with a feature are included +#' for an extracted ion chromatogram (parameter `include = "feature_only"`). By +#' setting `include = "apex_within"` all chromatographic peaks (and eventually +#' the feature which they are part of - if feature definitions are present) +#' that have their apex position within the m/z - rt range from which the +#' chromatogram is extracted are returned too. +#' With `include = "any"` or `include = "all"` all chromatographic peaks (and +#' eventually the feature in which they are present) overlapping the m/z and rt +#' range will be returned. +#' +#' @note +#' +#' When extracting EICs from only the top `n` samples it can happen that one +#' or more of the features specified with `features` are dropped because they +#' have no detected peak in the *top n* samples. The chance for this to happen +#' is smaller if `x` contains also filled-in peaks (with `fillChromPeaks`). +#' +#' @param x `XCMSnExp` object with grouped chromatographic peaks. +#' +#' @param expandMz `numeric(1)` to expand the m/z range for each chromatographic +#' peak by a constant value on each side. Be aware that by extending the +#' m/z range the extracted EIC might **no longer** represent the actual +#' identified chromatographic peak because intensities of potential +#' additional mass peaks within each spectra would be aggregated into the +#' final reported intensity value per spectrum (retention time). +#' +#' @param expandRt `numeric(1)` to expand the retention time range for each +#' chromatographic peak by a constant value on each side. +#' +#' @param aggregationFun `character(1)` specifying the name that should be +#' used to aggregate intensity values across the m/z value range for +#' the same retention time. The default `"sum"` returns a base peak +#' chromatogram. +#' +#' @param features `integer`, `character` or `logical` defining a subset of +#' features for which chromatograms should be returned. Can be the index +#' of the features in `featureDefinitions`, feature IDs (row names of +#' `featureDefinitions`) or a logical vector. +#' +#' @param include `character(1)` defining which chromatographic peaks (and +#' related feature definitions) should be included in the returned +#' [XChromatograms()]. Defaults to `"feature_only"`; See description above +#' for options and details. +#' +#' @param filled `logical(1)` whether filled-in peaks should be included in +#' the result object. The default is `filled = FALSE`, i.e. only detected +#' peaks are reported. +#' +#' @param n `integer(1)` to optionally specify the number of *top n* samples +#' from which the EIC should be extracted. +#' +#' @param value `character(1)` specifying the column to be used to sort the +#' samples. Can be either `"maxo"` (the default) or `"into"` to use the +#' maximal peak intensity or the integrated peak area, respectively. +#' +#' @param ... optional arguments to be passed along to the [chromatogram()] +#' function. +#' +#' @return [XChromatograms()] object. +#' +#' @md +#' +#' @seealso [filterColumnsKeepTop()] to filter the extracted EICs keeping only +#' the *top n* columns (samples) with the highest intensity. +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Subset the object to a smaller retention time range +#' xdata <- filterRt(faahko_sub, c(2500, 3500)) +#' +#' xdata <- groupChromPeaks(xdata, +#' param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) +#' +#' ## Get the feature definitions +#' featureDefinitions(xdata) +#' +#' ## Extract ion chromatograms for the first 3 features. Parameter +#' ## `features` can be either the feature IDs or feature indices. +#' chrs <- featureChromatograms(xdata, features = 1:3) +#' +#' ## Plot the XIC for the first feature using different colors for each file +#' plot(chrs[1, ], col = c("red", "green", "blue")) +featureChromatograms <- function(x, expandRt = 0, aggregationFun = "max", + features, + include = c("feature_only", "apex_within", + "any", "all"), + filled = FALSE, + n = length(fileNames(x)), + value = c("maxo", "into"), + expandMz = 0, ...) { + include <- match.arg(include) + value <- match.arg(value) + if (!hasFeatures(x)) + stop("No feature definitions present. Please run first 'groupChromPeaks'") + if (!missing(features)) { + features <- .i2index(features, ids = rownames(featureDefinitions(x)), + "features") + } else features <- seq_len(nrow(featureDefinitions(x))) + if (!length(features)) + return(MChromatograms(ncol = length(fileNames(x)), nrow = 0)) + ## If we want to get chromatograms only in a reduced number of samples + n <- ceiling(n[1]) + if (n != length(fileNames(x))) { + fids <- rownames(featureDefinitions(x))[features] + if (n > length(fileNames(x)) || n < 1) + stop("'n' has to be a positive integer between 1 and ", + length(fileNames(x))) + pks <- chromPeaks(x) + if (!filled) + pks[chromPeakData(x)$is_filled, ] <- NA + vals <- apply(.feature_values( + pks, extractROWS(featureDefinitions(x), features), + method = "maxint", value = value, intensity = "maxo", + colnames = basename(fileNames(x))), + MARGIN = 2, sum, na.rm = TRUE) + sample_idx <- order(vals, decreasing = TRUE)[seq_len(n)] + x <- .filter_file_XCMSnExp(x, sample_idx, keepFeatures = TRUE) + features <- match(fids, rownames(featureDefinitions(x))) + features <- features[!is.na(features)] + if (length(features) < length(fids)) + warning(length(fids) - length(features), " of ", length(fids), + " features not present in the selected samples") + if (!length(features)) + return(MChromatograms(ncol = length(fileNames(x)), nrow = 0)) + } + pks <- chromPeaks(x) + if (length(unique(rownames(pks))) != nrow(pks)) { + rownames(pks) <- .featureIDs(nrow(pks), "CP") + rownames(chromPeaks(x)) <- rownames(pks) + } + pk_idx <- featureDefinitions(x)$peakidx[features] + rt_cols <- c("rtmin", "rtmax") + mz_cols <- c("mzmin", "mzmax") + mat <- do.call(rbind, lapply(pk_idx, function(z) { + pks_current <- pks[z, , drop = FALSE] + c(range(pks_current[, rt_cols]), + range(pks_current[, mz_cols])) + })) + include_peaks <- "apex_within" + if (include %in% c("any", "all")) + include_peaks <- "any" + mat[, 1] <- mat[, 1] - expandRt + mat[, 2] <- mat[, 2] + expandRt + mat[, 3] <- mat[, 3] - expandMz + mat[, 4] <- mat[, 4] + expandMz + colnames(mat) <- c("rtmin", "rtmax", "mzmin", "mzmax") + chrs <- chromatogram(x, rt = mat[, 1:2], mz = mat[, 3:4], + aggregationFun = aggregationFun, filled = filled, + include = include_peaks, ...) + if (include == "feature_only") { + ## Loop over rows/features: + ## subset to peaks of a feature. + fts_all <- featureDefinitions(chrs) + pks_all <- chromPeaks(chrs) + chrs@featureDefinitions <- fts_all[integer(), ] + nr <- nrow(chrs) + nc <- ncol(chrs) + ft_defs <- vector("list", nr) + ft_ids <- rownames(featureDefinitions(x))[features] + ## Keep only a single feature per row + ## Keep only peaks for the features of interest. + for (i in seq_len(nr)) { + is_feature <- which(fts_all$row == i & rownames(fts_all) == ft_ids[i]) + if (!length(is_feature)) + next + ft_def <- extractROWS(fts_all, is_feature) + ft_defs[[i]] <- ft_def + pk_ids <- rownames(pks_all)[ft_def$peakidx[[1]]] + for (j in seq_len(nc)) { + cur_pks <- chrs@.Data[i, j][[1]]@chromPeaks + if (nrow(cur_pks)) { + keep <- which(rownames(cur_pks) %in% pk_ids) + chrs@.Data[i, j][[1]]@chromPeaks <- + cur_pks[keep, , drop = FALSE] + chrs@.Data[i, j][[1]]@chromPeakData <- + extractROWS(chrs@.Data[i, j][[1]]@chromPeakData, keep) + } + } + } + pks_sub <- chromPeaks(chrs) + ## Update the index/mapping between features and peaks (in a loop to + ## support duplicated features). + fts <- lapply(seq_len(nr), function(r) { + .subset_features_on_chrom_peaks( + ft_defs[[r]], pks_all, pks_sub) + }) + chrs@featureDefinitions <- do.call(rbind, fts) + } + if (validObject(chrs)) + chrs +} + +#' @description +#' +#' \code{hasFilledChromPeaks}: whether filled-in peaks are present or not. +#' +#' @rdname XCMSnExp-class +hasFilledChromPeaks <- function(object) { + .hasFilledPeaks(object) +} + +#' Process the results from a peak detection in SWATH pockets. +#' +#' @param x `list` of `XCMSnExp` objects. +#' +#' @param msf `MsFeatureData` of the original object +#' +#' @param fileNames `character` with the file names of the original object. This +#' is required to ensure that column `"sample"` in the chrom peaks matrix +#' contains the correct indices. +#' +#' @return `MsFeatureData` with the `chromPeaks` and `chromPeakData` updated. +#' +#' @author Johannes Rainer +#' +#' @noRd +.swath_collect_chrom_peaks <- function(x, msf, fileNames) { + pks <- do.call(rbind, lapply(x, function(z) { + suppressWarnings(cpks <- chromPeaks(z)) + if (!is.null(cpks) && nrow(cpks)) + cpks[, "sample"] <- match(fileNames(z)[cpks[, "sample"]], fileNames) + cpks + })) + cpd <- do.call(rbind, lapply(x, function(z) { + if (hasChromPeaks(z) && nrow(chromPeakData(z))) { + ret <- chromPeakData(z) + target_mz <- isolationWindowTargetMz(z)[1] + ret$isolationWindow <- .fdata(z)$isolationWindow[1] + ret$isolationWindowTargetMZ <- target_mz + ret$isolationWindowLowerMz <- + target_mz - .fdata(z)$isolationWindowLowerOffset[1] + ret$isolationWindowUpperMz <- + target_mz + .fdata(z)$isolationWindowUpperOffset[1] + ret + } else DataFrame() + })) + if (!nrow(cpd)) + return(msf) + if (hasChromPeaks(msf)) { + idx_start <- max(nrow(chromPeaks(msf)), + as.numeric(sub("CP", "", rownames(chromPeaks(msf))))) + rownames(pks) <- rownames(cpd) <- .featureIDs(nrow(pks), + from = idx_start + 1, + prefix = "CP") + chromPeaks(msf) <- .rbind_fill(chromPeaks(msf), pks) + chromPeakData(msf) <- .rbind_fill(chromPeakData(msf), cpd) + } else { + rownames(pks) <- rownames(cpd) <- .featureIDs(nrow(pks), prefix = "CP") + chromPeaks(msf) <- pks + chromPeakData(msf) <- cpd + } + msf +} + +#' @title Data independent acquisition (DIA): peak detection in isolation windows +#' +#' @description +#' +#' The `findChromPeaksIsolationWindow` function allows to perform a +#' chromatographic peak detection in MS level > 1 spectra of certain isolation +#' windows (e.g. SWATH pockets). The function performs a peak detection, +#' separately for all spectra belonging to the same isolation window and adds +#' them to the [chromPeaks()] matrix of the result object, information about +#' the isolation window they were detected in is added to [chromPeakData()]. +#' Note that peak detection with this method does not remove previously +#' identified chromatographic peaks (e.g. on MS1 level using the +#' [findChromPeaks()] function but adds newly identified peaks to the existing +#' [chromPeaks()] matrix. +#' +#' Isolation windows can be defined with the `isolationWindow` parameter, that +#' by default uses the definition of [isolationWindowTargetMz()], i.e. +#' chromatographic peak detection is performed for all spectra with the same +#' isolation window target m/z (seprarately for each file). The parameter +#' `param` allows to define and configure the peak detection algorithm (see +#' [findChromPeaks()] for more information). +#' +#' @param object `OnDiskMSnExp` or `XCMSnExp` object with the DIA data. +#' +#' @param param Peak detection parameter object, such as a +#' [CentWaveParam-class] object defining and configuring the chromographic +#' peak detection algorithm. +#' See also [findChromPeaks()] for more details. +#' +#' @param msLevel `integer(1)` specifying the MS level in which the peak +#' detection should be performed. By default `msLevel = 2L`. +#' +#' @param isolationWindow `factor` or similar defining the isolation windows in +#' which the peak detection should be performed with length equal to the +#' number of spectra in `object`. +#' +#' @param ... currently not used. +#' +#' @return +#' +#' An `XCMSnExp` object with the chromatographic peaks identified in spectra of +#' each isolation window from each file added to the `chromPeaks` matrix. +#' Isolation window definition for each identified peak are stored as additional +#' columns in [chromPeakData()]. +#' +#' @author Johannes Rainer, Michael Witting +#' +#' @seealso [reconstructChromPeakSpectra()] for the function to reconstruct +#' MS2 spectra for each MS1 chromatographic peak. +#' +#' @md +findChromPeaksIsolationWindow <- + function(object, param, msLevel = 2L, + isolationWindow = isolationWindowTargetMz(object), ...) { + startDate <- date() + if (!is.factor(isolationWindow)) + isolationWindow <- factor(isolationWindow) + if (length(isolationWindow) != length(object)) + stop("length of 'isolationWindow' has to match length of 'object'") + if (all(is.na(isolationWindow))) + stop("all isolation windows in 'isolationWindow' are NA") + if (!inherits(object, "OnDiskMSnExp")) + stop("'object' should be an 'OnDiskMSnExp' or 'XCMSnExp' object") + fData(object)$isolationWindow <- isolationWindow + obj_sub <- selectFeatureData(as(object, "OnDiskMSnExp"), + fcol = c(MSnbase:::.MSnExpReqFvarLabels, + "centroided", + "isolationWindow", + "isolationWindowTargetMZ", + "isolationWindowLowerOffset", + "isolationWindowUpperOffset")) + if (inherits(object, "XCMSnExp")) + fData(obj_sub)$retentionTime <- rtime(object) + res <- lapply(split(obj_sub, f = isolationWindow), + FUN = findChromPeaks, param = param, msLevel = msLevel) + if (!inherits(object, "XCMSnExp")) + object <- as(object, "XCMSnExp") + msf <- new("MsFeatureData") + msf@.xData <- .copy_env(object@msFeatureData) + msf <- .swath_collect_chrom_peaks(res, msf, fileNames(object)) + lockEnvironment(msf, bindings = TRUE) + object@msFeatureData <- msf + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.DETECTION, + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object@.processHistory <- c(processHistory(object), list(xph)) + validObject(object) + object + } + +#' @title Data independent acquisition (DIA): reconstruct MS2 spectra +#' +#' @description +#' +#' *Reconstructs* MS2 spectra for each MS1 chromatographic peak (if possible) +#' for data independent acquisition (DIA) data (such as SWATH). See the +#' *LC-MS/MS analysis* vignette for more details and examples. +#' +#' @details +#' +#' In detail, the function performs for each MS1 chromatographic peak: +#' +#' - Identify all MS2 chromatographic peaks from the isolation window +#' containing the m/z of the ion (i.e. the MS1 chromatographic peak) with +#' approximately the same retention time than the MS1 peak (accepted rt shift +#' can be specified with the `diffRt` parameter). +#' - Correlate the peak shapes of the candidate MS2 chromatographic peaks with +#' the peak shape of the MS1 peak retaining only MS2 chromatographic peaks +#' for which the correlation is `> minCor`. +#' - Reconstruct the MS2 spectrum using the m/z of all above selected MS2 +#' chromatographic peaks and their intensity (either `"maxo"` or `"into"`). +#' Each MS2 chromatographic peak selected for an MS1 peak will thus represent +#' one **mass peak** in the reconstructed spectrum. +#' +#' The resulting `Spectra` object provides also the peak IDs of the MS2 +#' chromatographic peaks for each spectrum as well as their correlation value +#' with spectra variables *ms2_peak_id* and *ms2_peak_cor*. +#' +#' @param object `XCMSnExp` with identified chromatographic peaks. +#' +#' @param expandRt `numeric(1)` allowing to expand the retention time range +#' for extracted ion chromatograms by a constant value (for the peak +#' shape correlation). Defaults to `expandRt = 0` hence correlates only +#' the signal included in the identified chromatographic peaks. +#' +#' @param diffRt `numeric(1)` defining the maximal allowed difference between +#' the retention time of the chromatographic peak (apex) and the retention +#' times of MS2 chromatographic peaks (apex) to consider them as +#' representing candidate fragments of the original ion. +#' +#' @param minCor `numeric(1)` defining the minimal required correlation +#' coefficient for MS2 chromatographic peaks to be considered for MS2 +#' spectrum reconstruction. +#' +#' @param intensity `character(1)` defining the column in the `chromPeaks` +#' matrix that should be used for the intensities of the reconstructed +#' spectra's peaks. The same value from the MS1 chromatographic peaks will +#' be used as `precursorIntensity` of the resulting spectra. +#' +#' @param peakId optional `character` vector with peak IDs (i.e. rownames of +#' `chromPeaks`) of MS1 peaks for which MS2 spectra should be reconstructed. +#' By default they are reconstructed for all MS1 chromatographic peaks. +#' +#' @param BPPARAM parallel processing setup. See [bpparam()] for more +#' information. +#' +#' @param return.type `character(1)` defining the type of the returned object. +#' Only `return.type = "Spectra"` is supported, `return.type = "MSpectra"` +#' is deprecated. +#' +#' @return +#' +#' - `Spectra` object (defined in the `Spectra` package) with the +#' reconstructed MS2 spectra for all MS1 peaks in `object`. Contains +#' empty spectra (i.e. without m/z and intensity values) for MS1 peaks for +#' which reconstruction was not possible (either no MS2 signal was recorded +#' or the correlation of the MS2 chromatographic peaks with the MS1 +#' chromatographic peak was below threshold `minCor`. Spectra variables +#' `"ms2_peak_id"` and `"ms2_peak_cor"` (of type [CharacterList()] +#' and [NumericList()] with length equal to the number of peaks per +#' reconstructed MS2 spectrum) providing the IDs and the correlation of the +#' MS2 chromatographic peaks from which the MS2 spectrum was reconstructed. +#' As retention time the median retention times of all MS2 chromatographic +#' peaks used for the spectrum reconstruction is reported. The MS1 +#' chromatographic peak intensity is reported as the reconstructed +#' spectrum's `precursorIntensity` value (see parameter `intensity` above). +#' +#' @author Johannes Rainer, Michael Witting +#' +#' @md +#' +#' @seealso [findChromPeaksIsolationWindow()] for the function to perform MS2 +#' peak detection in DIA isolation windows and for examples. +reconstructChromPeakSpectra <- function(object, expandRt = 0, diffRt = 2, + minCor = 0.8, intensity = "maxo", + peakId = rownames( + chromPeaks(object, msLevel = 1L)), + BPPARAM = bpparam(), + return.type = c("Spectra", "MSpectra")){ + if (!inherits(object, "XCMSnExp") || !hasChromPeaks(object)) + stop("'object' should be an 'XCMSnExp' object with identified ", + "chromatographic peaks") + if (!is.character(peakId)) + stop("'peakId' has to be of type character") + return.type <- match.arg(return.type) + if (return.type != "Spectra") + stop("'return.type = \"MSpectra\"' is deprecated. ", + "Use `return.type = \"Spectra\"' instead.") + n_peak_id <- length(peakId) + peakId <- intersect(peakId, rownames(chromPeaks(object, msLevel = 1L))) + if (!length(peakId)) + stop("None of the provided 'peakId' matches IDs of MS1 ", + "chromatographic peaks") + if (length(peakId) < n_peak_id) + warning("Only ", length(peakId), " of the provided", + " identifiers match IDs of MS1 chromatographic peaks") + object <- selectFeatureData(object, + fcol = c(MSnbase:::.MSnExpReqFvarLabels, + "centroided", + "polarity", + "isolationWindow", + "isolationWindowTargetMZ", + "isolationWindowLowerOffset", + "isolationWindowUpperOffset")) + sps <- bplapply( + .split_by_file2( + object, subsetFeatureData = FALSE, to_class = "XCMSnExp"), + FUN = function(x, files, expandRt, diffRt, minCor, col, pkId, + return.type) { + .reconstruct_dia_ms2(x, expandRt = expandRt, diffRt = diffRt, + minCor = minCor, column = col, peakId = pkId, + fromFile = match(fileNames(x), files)) + }, + files = fileNames(object), expandRt = expandRt, diffRt = diffRt, + minCor = minCor, col = intensity, pkId = peakId, BPPARAM = BPPARAM) + + do.call(c, sps) +} + +#' This function *overwrites* the `MSnbase` .plot_XIC function by adding also +#' a rectangle with the identified chromatographic peak. +#' +#' @param x `XCMSnExp` object with identifie chromatographic peaks. +#' +#' @param peakCol color for the border of the rectangle. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.plot_XIC <- function(x, peakCol = "#00000060", ...) { + peakCol <- peakCol[1] + x <- filterMsLevel(x, 1L) + if (!length(x)) + stop("No MS1 data available") + pks <- chromPeaks(x) + fls <- basename(fileNames(x)) + x <- .split_by_file2(x) + x <- lapply(x, as, "data.frame") + ## Check if we are greedy and plot a too large area + if (any(unlist(lapply(x, nrow)) > 20000)) + warning("The MS area to be plotted seems rather large. It is suggested", + " to restrict the data first using 'filterRt' and 'filterMz'. ", + "See also ?chromatogram and ?Chromatogram for more efficient ", + "functions to plot a total ion chromatogram or base peak ", + "chromatogram.", + immediate = TRUE, call = FALSE) + ## Define the layout. + dots <- list(...) + if (any(names(dots) == "layout")) { + if (!is.null(dots$layout)) + layout(layout) + dots$layout <- NULL + } else + layout(MSnbase:::.vertical_sub_layout(length(x))) + for (i in seq_along(x)) { + do.call(MSnbase:::.plotXIC, + c(list(x = x[[i]], main = fls[i], layout = NULL), dots)) + pks_current <- pks[pks[, "sample"] == i, , drop = FALSE] + if (length(pks_current) && nrow(pks_current)) { + do.call(rect, c(list(xleft = pks_current[, "rtmin"], + ybottom = pks_current[, "mzmin"], + xright = pks_current[, "rtmax"], + ytop = pks_current[, "mzmax"], + border = peakCol), dots)) + } + } +} + +#' @title Group chromatographic peaks based on m/z or retention time +#' +#' @description +#' +#' Group chromatographic peaks if they are overlapping on m/z (independently +#' of their retention time) or *vice versa*. +#' +#' @param x `matrix` with columns `"mzmin"` and `"mzmax"` or `"rtmin"` and +#' `"rtmax"`. +#' +#' @param min_col +#' +#' @param max_col `character(1)` with the name of the column with the upper +#' range (e.g. `"mzmax"` or `"rtmax"`). +#' +#' @param min_col `character(1)` with the name of the column with the lower +#' range (e.g. `"mzmin"` or `"rtmin"`). +#' +#' @param expand `numeric(1)` defining a constant value by which each e.g. m/z +#' range is supposed to be expanded. Note that the mz range will be expanded +#' by `expandMz` in both dimensions (i.e. `"mzmin"` - `expandMz` and +#' `"mzmax"` + `expandMz`. +#' +#' @param ppm `numeric(1)` defining an m/z relative value by which the m/z range +#' should be expanded. +#' +#' @note +#' +#' `x` is supposed to be a `chromPeaks` matrix of a single file, otherwise we're +#' grouping chromatographic peaks across samples. +#' +#' Note also that **each** peak gets expanded by `expandMz`, thus +#' peaks differing by `2 * expand` will be overlapping. As an example: m/z max +#' of one peak is 12.2, m/z min of another one is 12.4, if `expand = 0.1` is +#' used the m/z max of the first peak will be 12.3 and the m/z min of the second +#' one 12.3, thus both are considered *overlapping*. +#' +#' @author Johannes Rainer +#' +#' @return `list` with rownames (chromatographic peak IDs) of peak groups. +#' +#' @noRd +#' +#' @examples +#' +#' mat <- cbind(rtmin = c(10, 13, 16, 18), rtmax = c(12, 15, 17, 20), +#' mzmin = c(2, 3, 4, 7), mzmax = c(2.5, 3.5, 4.2, 7.6)) +#' rownames(mat) <- c("a", "b", "c", "d") +#' .group_overlapping_peaks(mat) +#' +#' .group_overlapping_peaks(mat, expand = 1) +#' +#' .group_overlapping_peaks(mat, expand = 0.25) +.group_overlapping_peaks <- function(x, min_col = "mzmin", max_col = "mzmax", + expand = 0, ppm = 0) { + x[, min_col] <- x[, min_col] - expand - x[, min_col] * ppm / 1e6 + x[, max_col] <- x[, max_col] + expand + x[, max_col] * ppm / 1e6 + reduced_ranges <- .reduce(x[, min_col], x[, max_col]) + res <- vector("list", nrow(reduced_ranges)) + tolerance <- sqrt(.Machine$double.eps) + for (i in seq_along(res)) { + res[[i]] <- rownames(x)[ + x[, min_col] >= reduced_ranges[i, 1] - tolerance & + x[, max_col] <= reduced_ranges[i, 2] + tolerance + ] + } + res +} + +#' @description +#' +#' Identify chromatographic peaks overlapping in m/z dimension and being close +#' on retention time to combine them if they fulfill the additional criteria: +#' intensity at `"rtmax"` for the first chromatographic peak is +#' `> prop * "maxo"` (`"maxo"` being the maximal intensity of the first peak) +#' **and** intensity at `"rtmin"` for the second chromatographic peak is +#' `> prop * "maxo"` of the second peak. +#' +#' @details +#' +#' The function first identifies chromatographic peaks within the same sample +#' that are overlapping on their m/z range. The m/z range can be expanded with +#' parameter `expandMz` or `ppm`. Note that both the upper and lower m/z is +#' expanded by these resulting in m/z ranges that are of size *original m/z +#' range* `+ 2 * expandMz`. +#' +#' All peaks are first ordered by theyr `"mzmin"` and subsequently expanded by +#' `expandMz` and `ppm`. Peaks are grouped if their expanded m/z ranges +#' (`"mzmin" - expandMz - ppm("mzmin")` to `"mzmax + expandMz + ppm("mzmax")`) +#' and rt ranges (`"rtmin" - expandRt` to `"rtmax" + expandRt`) are overlapping. +#' +#' For overlapping peak-candidates a chromatogram is extracted, with the +#' m/z range being the range of the individual chromatographic peak's m/z range +#' expanded by `expandMz` and `ppm` (on both sides). This is to avoid data +#' points in between peaks being `NA`. +#' +#' @param x `XCMSnExp` object with chromatographic peaks of a **single** file or +#' an `OnDiskMSnExp` object, in which case parameters `pks` and `pkd` have +#' to be provided. +#' +#' @param pks `chromPeaks` matrix. +#' +#' @param pkd `chromPeakData` data frame. +#' +#' @param sample_index `integer(1)` representing the index of the sample in the +#' original object. To be used in column `"sample"` of the new peaks. +#' +#' @param expandRt `numeric(1)` defining by how many seconds the retention time +#' window is expanded on both sides to check for overlapping peaks. +#' +#' @param expandMz `numeric(1)` constant value by which the m/z range of each +#' chromatographic peak should be expanded (on both sides!) to check for +#' overlapping peaks. +#' +#' @param ppm `numeric(1)` defining a m/z relative value (in parts per million) +#' by which the m/z range of each chromatographic peak should be expanded +#' to check for overlapping peaks. +#' +#' @param minProp `numeric(1)` between `0` and `1` representing the proporion +#' of intensity to be required for peaks to be joined. See description for +#' more details. The default (`minProp = 0.75`) means that peaks are only +#' joined if the signal half way between then is larger 75% of the smallest +#' of the two peak's `"maxo"` (maximal intensity at peak apex). +#' +#' @return `list` with element `"chromPeaks"`, that contains the peaks `matrix` +#' containing newly merged peaks and original peaks if they could not be +#' merged and `"chromPeakData"` that represents the `DataFrame` with the +#' corresponding metadata information. The merged peaks will have a row +#' name of `NA`. +#' +#' @author Johannes Rainer, Mar Garcia-Aloy +#' +#' @md +#' +#' @noRd +#' +#' @examples +#' +#' xd <- readMSData(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' mode = "onDisk") +#' xd <- findChromPeaks(xd, param = CentWaveParam()) +#' +#' xchr <- chromatogram(xd, mz = c(-0.5, 0.5) + 305.1) +#' plot(xchr) +#' +#' res <- xcms:::.merge_neighboring_peaks(xd, expandRt = 4) +#' +#' res_sub <- res[res[, "mz"] >= 305.05 & res[, "mz"] <= 305.15, ] +#' rect(res_sub[, "rtmin"], 0, res_sub[, "rtmax"], res_sub[, "maxo"], +#' border = "red") +#' +#' xchr <- chromatogram(xd, mz = c(-0.5, 0.5) + 496.2) +#' plot(xchr) +#' +#' res <- xcms:::.merge_neighboring_peaks(xd, expandRt = 4) +#' +#' res_sub <- res[res[, "mz"] >= 496.15 & res[, "mz"] <= 496.25, ] +#' rect(res_sub[, "rtmin"], 0, res_sub[, "rtmax"], res_sub[, "maxo"], +#' border = "red") +.merge_neighboring_peaks <- function(x, pks = chromPeaks(x), + pkd = chromPeakData(x), + expandRt = 2, expandMz = 0, ppm = 10, + minProp = 0.75) { + pks <- force(pks) + pkd <- force(pkd) + if (is.null(rownames(pks))) + stop("Chromatographic peak IDs are required.") + ms_level <- unique(pkd$ms_level) + if (length(ms_level) != 1) + stop("Got chromatographic peaks from different MS levels.", call. = FALSE) + mz_groups <- .group_overlapping_peaks(pks, expand = expandMz, ppm = ppm) + mz_groups <- mz_groups[lengths(mz_groups) > 1] + drop_peaks <- rep(FALSE, nrow(pks)) + names(drop_peaks) <- rownames(pks) + message("Evaluating ", length(mz_groups), " peaks in file ", + basename(fileNames(x)), " for merging ... ", appendLF = FALSE) + if (!length(mz_groups)) { + message("OK") + return(list(chromPeaks = pks, chromPeakData = pkd)) + } + ## Defining merge candidates + pk_groups <- list() + chr_def_mat <- list() + current_group <- 1 + for (i in seq_along(mz_groups)) { + rt_groups <- .group_overlapping_peaks( + pks[mz_groups[[i]], , drop = FALSE], + expand = expandRt, min_col = "rtmin", + max_col = "rtmax" + ) + rt_groups <- rt_groups[lengths(rt_groups) > 1] + for (j in seq_along(rt_groups)) { + rt_group <- rt_groups[[j]] + pk_groups[[current_group]] <- rt_group + pks_sub <- pks[rt_group, ] + mzr_sub <- range(pks_sub[, c("mzmin", "mzmax")]) + ## Expand the mz range in a similar fashion than used for checking + ## if chrom peaks are overlapping. This fixes an issue with very + ## low intensities in between two peaks, that tend to have shifted + ## m/z value (because their intensities are so low). + mzr_sub <- mzr_sub + c(-1, 1) * mzr_sub * ppm * 1e-6 + expandMz + chr_def_mat[[current_group]] <- + c(mzr_sub, + range(pks_sub[, c("rtmin", "rtmax")])) + current_group <- current_group + 1 + } + } + if (!length(chr_def_mat)) { + message("OK") + return(list(chromPeaks = pks, chromPeakData = pkd)) + } + chr_def_mat <- do.call(rbind, chr_def_mat) + chrs <- chromatogram(x, mz = chr_def_mat[, c(1, 2)], + rt = chr_def_mat[, c(3, 4)], + msLevel = ms_level) + ## Now proceed to process them. + res_list <- pkd_list <- vector("list", length(pk_groups)) + for (i in seq_along(pk_groups)) { + pk_group <- pk_groups[[i]] + res <- .chrom_merge_neighboring_peaks( + chrs[i, 1], pks = pks[pk_group, , drop = FALSE], + extractROWS(pkd, pk_group), diffRt = 2 * expandRt, + minProp = minProp) + drop_peaks[pk_group[!pk_group %in% rownames(res$chromPeaks)]] <- TRUE + res_list[[i]] <- res$chromPeaks + pkd_list[[i]] <- res$chromPeakData + } + pks_new <- do.call(rbind, res_list) + pks_new[, "sample"] <- pks[1, "sample"] + pkd_new <- do.call(rbind, pkd_list) + keep_peaks <- which(!drop_peaks) + pks <- pks[keep_peaks, , drop = FALSE] + pkd <- extractROWS(pkd, keep_peaks) + idx_new <- is.na(rownames(pks_new)) + message("OK") + list(chromPeaks = rbind(pks, pks_new[idx_new, , drop = FALSE]), + chromPeakData = rbind(pkd, extractROWS(pkd_new, idx_new))) +} + +.filter_file_XCMSnExp <- function(object, file, + keepAdjustedRtime = hasAdjustedRtime(object), + keepFeatures = FALSE) { + if (missing(file)) return(object) + if (is.character(file)) + file <- base::match(file, basename(fileNames(object))) + ## This will not work if we want to get the files in a different + ## order (i.e. c(3, 1, 2, 5)) + file <- base::sort(unique(file)) + ## Error checking - seems that's not performed downstream. + if (!all(file %in% seq_along(fileNames(object)))) + stop("'file' has to be within 1 and the number of files in object.") + has_features <- hasFeatures(object) + has_chrom_peaks <- hasChromPeaks(object) + has_adj_rt <- hasAdjustedRtime(object) + ph <- processHistory(object) + if (has_features && !keepFeatures) { + has_features <- FALSE + ph <- dropProcessHistoriesList(ph, .PROCSTEP.PEAK.GROUPING, num = 1) + } + ## Extracting all the XCMSnExp data from the object. + newFd <- new("MsFeatureData") + ## Subset original data: + nobject <- as(filterFile(as(object, "OnDiskMSnExp"), file = file), + "XCMSnExp") + if (has_adj_rt) + adjustedRtime(newFd) <- adjustedRtime(object, bySample = TRUE)[file] + if (has_chrom_peaks) { + pks <- chromPeaks(object) + idx <- pks[, "sample"] %in% file + pks <- pks[idx, , drop = FALSE] + pks[, "sample"] <- match(pks[, "sample"], file) + if (has_features) { + featureDefinitions(newFd) <- .update_feature_definitions( + featureDefinitions(object), + original_names = rownames(chromPeaks(object)), + subset_names = rownames(pks)) + } + chromPeaks(newFd) <- pks + chromPeakData(newFd) <- extractROWS(chromPeakData(object), idx) + } + if (hasAdjustedRtime(newFd) && !keepAdjustedRtime) + newFd <- dropAdjustedRtime(newFd, rtime(nobject, bySample = TRUE, + adjusted = FALSE)) + ## Remove ProcessHistory not related to any of the files. + ## if (length(ph)) { + ## kp <- unlist(lapply(ph, function(z) { + ## any(fileIndex(z) %in% file) + ## })) + ## ph <- ph[kp] + ## } + ## ## Update file index in process histories. + ## if (length(ph)) { + ## ph <- lapply(ph, function(z) { + ## updateFileIndex(z, old = file, new = 1:length(file)) + ## }) + ## } + lockEnvironment(newFd, bindings = TRUE) + nobject@msFeatureData <- newFd + nobject@.processHistory <- ph + nobject +} + +#' Define the MS region (m/z - rt range) for each feature based on the rtmin, +#' rtmax, mzmin, mzmax of the corresponding detected peaks. +#' +#' @param x `XCMSnExp` object +#' +#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values +#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also +#' work with `mean` etc. +#' +#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` +#' defining the range of +#' +#' @author Johannes Rainer +#' +#' @noRd +.features_ms_region <- function(x, mzmin = median, mzmax = median, + rtmin = median, rtmax = median, + msLevel = unique(msLevel(x)), + features = character()) { + pk_idx <- featureValues(x, value = "index", method = "maxint", + msLevel = msLevel) + if (length(features)) { + if (!all(features %in% rownames(pk_idx))) + stop(sum(!features %in% rownames(pk_idx)), " IDs defined with ", + "'features' are not available in 'object' for MS level ", + msLevel) + pk_idx <- pk_idx[features, , drop = FALSE] + } + n_ft <- nrow(pk_idx) + rt_min <- rt_max <- mz_min <- mz_max <- numeric(n_ft) + for (i in seq_len(n_ft)) { + idx <- pk_idx[i, ] + tmp_pks <- chromPeaks(x)[idx[!is.na(idx)], , drop = FALSE] + rt_min[i] <- rtmin(tmp_pks[, "rtmin"]) + rt_max[i] <- rtmax(tmp_pks[, "rtmax"]) + mz_min[i] <- mzmin(tmp_pks[, "mzmin"]) + mz_max[i] <- mzmax(tmp_pks[, "mzmax"]) + } + res <- cbind(mzmin = mz_min, mzmax = mz_max, rtmin = rt_min, rtmax = rt_max) + rownames(res) <- rownames(pk_idx) + res +} + +#' @rdname XCMSnExp-class +#' +#' @description +#' +#' \code{featureArea} extracts the m/z - retention time region for each feature. +#' This area is defined by the m/z - retention time regions of all +#' chromatographic peaks associated with a feature. Parameters \code{mzmin}, +#' \code{mzmax}, \code{rtmin} and \code{rtmax} allow to define functions how +#' the corresponding value is calculated from the individual values (such as +#' the \code{"rtmin"}) of all chromatographic peaks of that feature. By default +#' the median \code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"} +#' is reported. Parameter \code{features} allows to provide feature IDs for +#' which the area should be extracted. By default it is extracted for all +#' features. +#' +#' @param features for \code{featureArea}: IDs of features for which the area +#' should be extracted. +#' +#' @param mzmin for \code{featureArea}: \code{function} to be applied to values +#' in the \code{"mzmin"} column of all chromatographic peaks of a feature +#' to define the lower m/z value of the feature area. +#' Defaults to \code{median}. +#' +#' @param mzmax for \code{featureArea}: \code{function} same as \code{mzmin} +#' but for the \code{"mzmax"} column. +#' +#' @param rtmin for \code{featureArea}: \code{function} same as \code{mzmin} +#' but for the \code{"rtmin"} column. +#' +#' @param rtmax for \code{featureArea}: \code{function} same as \code{mzmin} +#' but for the \code{"rtmax"} column. +featureArea <- function(object, mzmin = median, mzmax = median, rtmin = median, + rtmax = median, msLevel = unique(msLevel(object)), + features = character()) { + if (!hasFeatures(object)) + stop("No correspondence results available. Please run ", + "'groupChromPeaks' first.") + .features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax, msLevel = force(msLevel), + features = features) +} + +#' @param x `XCMSnExp` object of a single file. +#' +#' @param nValues `integer(1)` defining the number of values that have to be above +#' threshold. +#' +#' @param threshold `numeric(1)` with the threshold value. +#' +#' @param msLevel `integer(1)` with the MS level. +#' +#' @return `integer` with the index of the peaks that should be retained. +#' +#' @author Johannes Rainer +#' +#' @noRd +.chrom_peaks_above_threshold <- function(x, nValues = 1L, threshold = 0, + msLevel = 1L) { + if (length(msLevel) > 1) + stop("Currently only filtering of a single MS level at a ", + "time is supported") + x <- applyAdjustedRtime(x) + chrs <- chromatogram( + filterMsLevel(as(x, "OnDiskMSnExp"), msLevel = msLevel), + rt = chromPeaks(x, msLevel = msLevel)[, c("rtmin", "rtmax")], + mz = chromPeaks(x, msLevel = msLevel)[, c("mzmin", "mzmax")], + msLevel = msLevel) + keep <- vapply(chrs@.Data, function(z) { + sum(z@intensity >= threshold, na.rm = TRUE) >= nValues + }, logical(1)) + keep_all <- rep(TRUE, nrow(chromPeaks(x))) + keep_all[chromPeakData(x)$ms_level == msLevel] <- keep + keep_all +} + +#' @title Manual peak integration and feature definition +#' +#' @description +#' +#' The `manualChromPeaks` function allows to manually define chromatographic +#' peaks which are added to the object's `chromPeaks` matrix. In contrast to +#' [findChromPeaks()], no *peak detection* is performed (e.g. using an +#' algorithm such as *centWave*) but the peak is added as defined by the user. +#' Note that a peak will not be added if no signal (intensity) was found in a +#' sample within the provided boundaries. +#' +#' Because chromatographic peaks are added to eventually previously identified +#' peaks, it is suggested to run [refineChromPeaks()] with the +#' [MergeNeighboringPeaksParam()] approach to merge potentially overlapping +#' peaks. +#' +#' The `manualFeatures` function allows to manually group identified +#' chromatographic peaks into features by providing their index in the +#' object's `chromPeaks` matrix. +#' +#' @param object `XCMSnExp` or `OnDiskMSnExp` object. +#' +#' @param chromPeaks `matrix` defining the boundaries of the chromatographic +#' peaks, one row per chromatographic peak, columns `"mzmin"`, `"mzmax"`, +#' `"rtmin"` and `"rtmax"` defining the m/z and retention time region of +#' each peak. +#' +#' @param peakIdx for `nabbyakFeatyres`: `list` of `integer` vectors with the +#' indices of chromatographic peaks in the object's `chromPeaks` matrix +#' that should be grouped into features. +#' +#' @param samples optional `integer` to select samples in which the peak +#' integration should be performed. By default performed in all samples. +#' +#' @param BPPARAM parallel processing settings (see [bpparam()] for details). +#' +#' @param msLevel `integer(1)` defining the MS level in which peak integration +#' should be performed. +#' +#' @return `XCMSnExp` with the manually added chromatographic peaks or features. +#' +#' @author Johannes Rainer +#' +#' @md +manualChromPeaks <- function(object, chromPeaks = matrix(), + samples = seq_along(fileNames(object)), + BPPARAM = bpparam(), msLevel = 1L) { + if (length(msLevel) > 1L) + stop("Length 'msLevel' is > 1: can only add peaks for one MS level", + " at a time.") + if (!inherits(object, "OnDiskMSnExp")) + stop("'object' has to be either an OnDiskMSnExp or XCMSnExp object") + if (is(object, "XCMSnExp") && hasFeatures(object)) + object <- dropFeatureDefinitions(object) + else object <- as(object, "XCMSnExp") + if (!all(c("mzmin", "mzmax", "rtmin", "rtmax") %in% colnames(chromPeaks))) + stop("'chromPeaks' lacks one or more of the required columns: 'mzmin',", + " 'mzmax', 'rtmin' and 'rtmax'") + if (is.data.frame(chromPeaks)) chromPeaks <- as.matrix(chromPeaks) + if (!all(samples %in% seq_along(fileNames(object)))) + stop("'samples' out of bounds") + if (hasChromPeaks(object)) + cn <- colnames(chromPeaks(object)) + else cn <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", + "intb", "maxo", "sn", "sample") + ## Do integration + res <- bpmapply(xcms:::.split_by_file2(object)[samples], samples, + FUN = function(obj, idx, peakArea, msLevel, cn) { + xcms:::.getChromPeakData(obj, peakArea = peakArea, + sample_idx = idx, + msLevel = msLevel, + cn = cn) + }, MoreArgs = list(peakArea = chromPeaks, + msLevel = msLevel, cn = cn), + BPPARAM = BPPARAM, SIMPLIFY = FALSE, USE.NAMES = FALSE) + res <- do.call(rbind, res) + res <- res[!is.na(res[, "sample"]), , drop = FALSE] + if (nrow(res) == 0) + return(object) + newFd <- new("MsFeatureData") + if (hasChromPeaks(object)) { + newFd@.xData <- .copy_env(object@msFeatureData) + object@msFeatureData <- new("MsFeatureData") + incr <- nrow(chromPeaks(newFd)) + ## Define IDs for the new peaks; include fix for issue #347 + maxId <- max(as.numeric( + sub("M", "", sub("^CP", "", rownames(chromPeaks(newFd)))))) + if (maxId < 1) + stop("chromPeaks matrix lacks rownames; please update ", + "'object' with the 'updateObject' function.") + toId <- maxId + nrow(res) + rownames(res) <- sprintf( + paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"), + (maxId + 1L):toId) + chromPeaks(newFd) <- rbind(chromPeaks(newFd), res) + cpd <- extractROWS(chromPeakData(newFd), rep(1L, nrow(res))) + cpd[,] <- NA + cpd$ms_level <- as.integer(msLevel) + cpd$is_filled <- FALSE + if (!any(colnames(chromPeakData(newFd)) == "is_filled")) + chromPeakData(newFd)$is_filled <- FALSE + chromPeakData(newFd) <- rbind(chromPeakData(newFd), cpd) + rownames(chromPeakData(newFd)) <- rownames(chromPeaks(newFd)) + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + } else { + nr <- nrow(res) + rownames(res) <- sprintf( + paste0("CP", "%0", ceiling(log10(nr + 1L)), "d"), + seq_len(nr)) + chromPeaks(newFd) <- res + cpd <- DataFrame(ms_level = rep(msLevel, nr), is_filled = rep(FALSE, nr)) + rownames(cpd) <- rownames(res) + chromPeakData(newFd) <- cpd + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + } + object +} + +#' @rdname manualChromPeaks +manualFeatures <- function(object, peakIdx = list(), msLevel = 1L) { + if (!length(peakIdx)) + return(object) + if (length(msLevel) > 1L) + stop("Length 'msLevel' is > 1: can only add peaks for one MS level", + " at a time.") + if (!inherits(object, "XCMSnExp")) + stop("'object' has to be an XCMSnExp object") + if (!hasChromPeaks(object)) + stop("No features present. Please run 'findChromPeaks' first.") + peakIdx <- lapply(peakIdx, as.integer) + newFd <- new("MsFeatureData") + newFd@.xData <- xcms:::.copy_env(object@msFeatureData) + if (hasFeatures(newFd)) { + fnew <- as.data.frame(featureDefinitions(newFd))[1L, ] + fnew[] <- NA + rownames(fnew) <- NULL + } else { + cn <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", "npeaks") + fnew <- as.data.frame( + matrix(ncol = 7, nrow = 1, dimnames = list(character(), cn))) + } + res <- lapply(peakIdx, function(z) { + cp <- chromPeaks(newFd)[z, , drop = FALSE] + if (any(is.na(cp[, "sample"]))) + stop("Some of the provided indices are out of bounds. 'peakIdx' ", + "needs to be a list of valid indices in the 'chromPeaks' ", + "matrix.", call. = FALSE) + newf <- fnew + newf$mzmed <- median(cp[, "mz"]) + newf$mzmin <- min(cp[, "mz"]) + newf$mzmax <- max(cp[, "mz"]) + newf$rtmed <- median(cp[, "rt"]) + newf$rtmin <- min(cp[, "rt"]) + newf$rtmax <- max(cp[, "rt"]) + newf$npeaks <- length(z) + newf + }) + res <- DataFrame(do.call(rbind, res)) + res$peakidx <- peakIdx + res$ms_level <- msLevel + ## Define feature IDs + if (hasFeatures(newFd)) + max_id <- max( + as.integer(sub("FT", "", rownames(featureDefinitions(newFd))))) + else max_id <- 0 + rownames(res) <- sprintf( + paste0("FT", "%0", ceiling(log10(max_id + nrow(res) + 1L)), "d"), + (max_id + 1L):(max_id + nrow(res))) + suppressWarnings( + featureDefinitions(newFd) <- rbind(featureDefinitions(newFd), res) + ) + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + object +} diff --git a/R/functions-XChromatogram.R b/R/functions-XChromatogram.R new file mode 100644 index 000000000..bfdd1f47c --- /dev/null +++ b/R/functions-XChromatogram.R @@ -0,0 +1,288 @@ +.validXChromatogram <- function(object) { + txt <- character() + if (nrow(object@chromPeaks)) { + if (!all(.CHROMPEAKS_REQ_NAMES %in% colnames(object@chromPeaks))) + txt <- c(txt, paste0("chromPeaks matrix does not have all required", + " columns: ", paste(.CHROMPEAKS_REQ_NAMES, + collapse = ","))) + if (!is.numeric(object@chromPeaks)) + txt <- c(txt, "chromPeaks should be a numeric matrix") + if (!is.null(colnames(object@chromPeaks)) && + any(object@chromPeaks[, "rtmax"] < object@chromPeaks[, "rtmin"])) + txt <- c(txt, "rtmax has to be larger than rtmin") + if (nrow(object@chromPeaks) != nrow(object@chromPeakData)) + txt <- c(txt, paste0("number of rows of 'chromPeaks' and ", + "'chromPeakData' have to match")) + else if (!is.null(rownames(object@chromPeaks))) { + if (any(rownames(object@chromPeaks) != + rownames(object@chromPeakData))) + txt <- c(txt, paste0("rownames of 'chromPeaks' and ", + "'chromPeakData' have to match")) + } + if (!all(.CHROMPEAKDATA_REQ_NAMES %in% colnames(object@chromPeakData))) + txt <- c(txt, paste0("'chromPeakData' does not have all required", + " columns: ", paste0(.CHROMPEAKDATA_REQ_NAMES, + collapse = ","))) + else { + if (!is.integer(object@chromPeakData$ms_level)) + txt <- c(txt, paste0("column \"ms_level\" should only ", + "contain integer values")) + if (!is.logical(object@chromPeakData$is_filled)) + txt <- c(txt, paste0("column \"is_filled\" should only ", + "contain logical values")) + } + } + if (length(txt)) txt + else TRUE +} + +#' @title Containers for chromatographic and peak detection data +#' +#' @aliases XChromatogram-class XChromatograms-class coerce,MChromatograms,XChromatograms-method +#' +#' @description +#' +#' The `XChromatogram` object allows to store chromatographic data (e.g. +#' an extracted ion chromatogram) along with identified chromatographic peaks +#' within that data. The object inherits all functions from the [Chromatogram()] +#' object in the `MSnbase` package. +#' +#' Multiple `XChromatogram` objects can be stored in a `XChromatograms` object. +#' This class extends [MChromatograms()] from the `MSnbase` package and allows +#' thus to arrange chromatograms in a matrix-like structure, columns +#' representing samples and rows m/z-retention time ranges. +#' +#' All functions are described (grouped into topic-related sections) after the +#' **Arguments** section. +#' +#' @section Creation of objects: +#' +#' Objects can be created with the contructor function `XChromatogram` and +#' `XChromatograms`, respectively. Also, they can be coerced from +#' [Chromatogram] or [MChromatograms()] objects using +#' `as(object, "XChromatogram")` or `as(object, "XChromatograms")`. +#' +#' @section Filtering and subsetting: +#' +#' Besides classical subsetting with `[` specific filter operations on +#' [MChromatograms()] and `XChromatograms` objects are available. See +#' [filterColumnsIntensityAbove()] for more details. +#' +#' @param rtime For `XChromatogram`: `numeric` with the retention times +#' (length has to be equal to the length of `intensity`). +#' +#' @param intensity For `XChromatogram`: `numeric` with the intensity values +#' (length has to be equal to the length of `rtime`). +#' +#' For `featureValues`: `character(1)` specifying the name +#' of the column in `chromPeaks(object)` containing the intensity value +#' of the peak that should be used for the `method = "maxint"` conflict +#' resolution if. +#' +#' @param mz For `XChromatogram`: `numeric(2)` representing the m/z value +#' range (min, max) on which the chromatogram was created. This is +#' supposed to contain the *real* range of m/z values in contrast +#' to the `filterMz` below. +#' For `chromPeaks` and `featureDefinitions`: `numeric(2)` defining the +#' m/z range for which chromatographic peaks or features should be returned. +#' For `filterMz`: `numeric(2)` defining the m/z range for which +#' chromatographic peaks should be retained.#' +#' +#' @param filterMz For `XChromatogram`: `numeric(2)` representing the m/z +#' value range (min, max) that was used to filter the original object +#' on m/z dimension. If not applicable use `filterMz = c(0, 0)`. +#' +#' @param precursorMz For `XChromatogram`: `numeric(2)` for SRM/MRM transitions. +#' Represents the mz of the precursor ion. See details for more information. +#' +#' @param productMz For `XChromatogram`: `numeric(2)` for SRM/MRM transitions. +#' Represents the mz of the product. See details for more information. +#' +#' @param fromFile For `XChromatogram`: `integer(1)` the index of the file +#' within the `OnDiskMSnExp` or `MSnExp` object from which the chromatogram +#' was extracted. +#' +#' @param aggregationFun For `XChromatogram`: `character(1)` specifying the +#' function that was used to aggregate intensity values for the same +#' retention time across the m/z range. +#' +#' @param msLevel For `XChromatogram`: `integer` with the MS level from which +#' the chromatogram was extracted. +#' For `chromPeaks` and `chromPeakData`: extract chromatographic peaks of a +#' certain MS level. +#' +#' @param chromPeaks For `XChromatogram`: `matrix` with required columns +#' `"rt"`, `"rtmin"`, `"rtmax"`, `"into"`, `"maxo"` and `"sn"`. +#' For `XChromatograms`: `list`, same length than `data`, with the +#' chromatographic peaks for each chromatogram. Each element has to be +#' a `matrix`, the ordering has to match the order of the chromatograms +#' in `data`. +#' +#' @param chromPeakData For `XChromatogram`: `DataFrame` with optional +#' additional annotations for each chromatographic peak. The number of rows +#' has to match the number of chromatographic peaks. +#' +#' @param object An `XChromatogram` or `XChromatograms` object. +#' +#' @param ... For `plot`: additional parameters to passed to the `plot` +#' function. +#' For `XChromatograms`: additional parameters to be passed to the +#' [matrix] constructor, such as `nrow`, `ncol` and `byrow`. +#' +#' @return +#' +#' See help of the individual functions. +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @rdname XChromatogram +#' +#' @examples +#' +#' ## Create a XChromatogram object +#' pks <- matrix(nrow = 1, ncol = 6) +#' colnames(pks) <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn") +#' pks[, "rtmin"] <- 2 +#' pks[, "rtmax"] <- 9 +#' pks[, "rt"] <- 4 +#' pks[, "maxo"] <- 19 +#' pks[, "into"] <- 93 +#' +#' xchr <- XChromatogram(rtime = 1:10, +#' intensity = c(4, 8, 14, 19, 18, 12, 9, 8, 5, 2), +#' chromPeaks = pks) +#' xchr +#' +#' ## Add arbitrary peak annotations +#' df <- DataFrame(peak_id = c("a")) +#' xchr <- XChromatogram(rtime = 1:10, +#' intensity = c(4, 8, 14, 19, 18, 12, 9, 8, 5, 2), +#' chromPeaks = pks, chromPeakData = df) +#' xchr +#' chromPeakData(xchr) +XChromatogram <- function(rtime = numeric(), intensity = numeric(), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), + fromFile = integer(), aggregationFun = character(), + msLevel = 1L, chromPeaks, chromPeakData) { + if (missing(chromPeaks)) + chromPeaks <- matrix(ncol = length(.CHROMPEAKS_REQ_NAMES), nrow = 0, + dimnames = list(character(), + .CHROMPEAKS_REQ_NAMES)) + else if (!is.matrix(chromPeaks)) + stop("'x' has to be a 'matrix'") + if (missing(chromPeakData)) + chromPeakData <- DataFrame(ms_level = rep(1L, nrow(chromPeaks)), + is_filled = rep(FALSE, nrow(chromPeaks)), + row.names = rownames(chromPeaks)) + else { + if (!any(colnames(chromPeakData) == "ms_level")) + chromPeakData$ms_level <- 1L + if (!any(colnames(chromPeakData) == "is_filled")) + chromPeakData$is_filled <- FALSE + } + x <- as(Chromatogram(rtime = rtime, intensity = intensity, mz = mz, + filterMz = filterMz, precursorMz = precursorMz, + productMz = productMz, fromFile = fromFile, + msLevel = msLevel), "XChromatogram") + x@chromPeaks <- chromPeaks + x@chromPeakData <- chromPeakData + validObject(x) + x +} + +#' Internal function to plot/draw identified chromatographic peaks in a +#' plot. +#' +#' @param x `XChromatogram` or an `XChromatograms` object. +#' +#' @param pks chromatographic peaks as returned by `chromPeaks(x)`. +#' +#' @noRd +.add_chromatogram_peaks <- function(x, pks, col, bg, type, pch, + yoffset = 0, transform = identity, ...) { + switch(type, + point = { + points(pks[, "rt"], transform(pks[, "maxo"]) + yoffset, + pch = pch, col = col, bg = bg, ...) + }, + rectangle = { + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = rep(yoffset, nrow(pks)), + ytop = transform(pks[, "maxo"]) + yoffset, + col = bg, border = col, ...) + }, + polygon = { + ordr <- order(transform(pks[, "maxo"]), decreasing = TRUE) + pks <- pks[ordr, , drop = FALSE] + col <- col[ordr] + bg <- bg[ordr] + xs_all <- numeric() + ys_all <- numeric() + for (i in seq_len(nrow(pks))) { + if (inherits(x, "MChromatograms")) { + chr <- filterRt(x[pks[i, "row"], pks[i, "column"]], + rt = pks[i, c("rtmin", "rtmax")]) + } else + chr <- filterRt(x, rt = pks[i, c("rtmin", "rtmax")]) + xs <- rtime(chr) + if (!length(xs)) { + next + col <- col[-i] + bg <- bg[-i] + } + xs <- c(xs[1], xs, xs[length(xs)]) + ints <- transform(intensity(chr)) + ints[is.infinite(ints)] <- 0 + ys <- c(yoffset, ints + yoffset, yoffset) + nona <- !is.na(ys) + if (length(xs_all)) { + xs_all <- c(xs_all, NA) + ys_all <- c(ys_all, NA) + } + xs_all <- c(xs_all, xs[nona]) + ys_all <- c(ys_all, ys[nona]) + ## polygon(xs[nona], ys[nona], border = col[i], col = bg[i], + ## ...) + } + polygon(xs_all, ys_all, border = col, col = bg, ...) + }) +} + +.xchrom_merge_neighboring_peaks <- function(x, minProp = 0.75, diffRt = 0) { + if (nrow(x@chromPeaks)) { + res <- .chrom_merge_neighboring_peaks( + x, x@chromPeaks, x@chromPeakData, + minProp = minProp, diffRt = diffRt) + x@chromPeaks <- res$chromPeaks + x@chromPeakData <- res$chromPeakData + if (is.null(rownames(x@chromPeaks))) + are_new <- rep(TRUE, nrow(x@chromPeaks)) + else + are_new <- is.na(rownames(x@chromPeaks)) + if (any(are_new)) { + rownames(x@chromPeaks)[are_new] <- .featureIDs(sum(are_new), + prefix = "CPM") + rownames(x@chromPeakData) <- rownames(x@chromPeaks) + } + x@chromPeakData$merged <- are_new + } + x +} + +.filter_chrom_peaks_keep_top <- function(x, order = c("maxo", "into"), + n = 1L, decreasing = TRUE, ...) { + order <- match.arg(order) + ncp <- nrow(x@chromPeaks) + if (ncp && ncp > n) { + sn <- seq_len(n) + idx <- sort(order(chromPeaks(x)[, order], decreasing = decreasing)[sn]) + x@chromPeaks <- x@chromPeaks[idx, , drop = FALSE] + x@chromPeakData <- x@chromPeakData[idx, ] + } + x +} diff --git a/R/functions-XChromatograms.R b/R/functions-XChromatograms.R new file mode 100644 index 000000000..bb2ef6304 --- /dev/null +++ b/R/functions-XChromatograms.R @@ -0,0 +1,329 @@ +.validXChromatograms <- function(object) { + txt <- character() + if (length(object@.processHistory)) + if (!all(vapply(object@.processHistory, + function(z) inherits(z, "ProcessHistory"), logical(1)))) + txt <- c(txt, paste0("Only 'ProcessHistory' objects are allowed ", + "in slot .processHistory")) + if (!all(vapply(object, function(z) + inherits(z, "XChromatogram"), logical(1)))) + txt <- c(txt, paste0("'object' should only contain 'XChromatogram' ", + "objects")) + else lapply(object, validObject) + if (nrow(object@featureDefinitions)) { + if (!all(object@featureDefinitions$row %in% seq_len(nrow(object)))) + txt <- c(txt, paste0("Elements in column 'row' are outside of the", + " number of rows of 'object'")) + if (!all(unlist(object@featureDefinitions$peakidx) %in% + seq_len(nrow(chromPeaks(object))))) + txt <- c(txt, paste0("peakidx in feature data does not match ", + "the number of present chromatographic peaks")) + } + if (length(txt)) txt + else TRUE +} + +#' @rdname XChromatogram +#' +#' @param data For `XChromatograms`: `list` of `Chromatogram` or +#' `XChromatogram` objects. +#' +#' @param phenoData For `XChromatograms`: either a `data.frame`, +#' `AnnotatedDataFrame` or `NAnnotatedDataFrame` describing the +#' phenotypical information of the samples. +#' +#' @param featureData For `XChromatograms`: either a `data.frame` or +#' `AnnotatedDataFrame` with additional information for each row of +#' chromatograms. +#' +#' @md +#' +#' @examples +#' +#' ## ---- Creation of XChromatograms ---- +#' ## +#' ## Create a XChromatograms from Chromatogram objects +#' dta <- list(Chromatogram(rtime = 1:7, c(3, 4, 6, 12, 8, 3, 2)), +#' Chromatogram(1:10, c(4, 6, 3, 4, 7, 13, 43, 34, 23, 9))) +#' +#' ## Create an XChromatograms without peak data +#' xchrs <- XChromatograms(dta) +#' +#' ## Create an XChromatograms with peaks data +#' pks <- list(matrix(c(4, 2, 5, 30, 12, NA), nrow = 1, +#' dimnames = list(NULL, c("rt", "rtmin", "rtmax", "into", "maxo", "sn"))), +#' NULL) +#' xchrs <- XChromatograms(dta, chromPeaks = pks) +#' +#' ## Create an XChromatograms from XChromatogram objects +#' dta <- lapply(dta, as, "XChromatogram") +#' chromPeaks(dta[[1]]) <- pks[[1]] +#' +#' xchrs <- XChromatograms(dta, nrow = 1) +#' +#' hasChromPeaks(xchrs) +#' +#' ## Loading a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Subset the dataset to the first and third file. +#' xod_sub <- filterFile(faahko_sub, file = c(1, 3)) +#' +#' od <- as(xod_sub, "OnDiskMSnExp") +#' +#' ## Extract chromatograms for a m/z - retention time slice +#' chrs <- chromatogram(od, mz = 344, rt = c(2500, 3500)) +#' chrs +#' +#' ## --------------------------------------------------- ## +#' ## Chromatographic peak detection ## +#' ## --------------------------------------------------- ## +#' ## Perform peak detection using CentWave +#' xchrs <- findChromPeaks(chrs, param = CentWaveParam()) +#' xchrs +#' +#' ## Do we have chromatographic peaks? +#' hasChromPeaks(xchrs) +#' +#' ## Process history +#' processHistory(xchrs) +#' +#' ## The chromatographic peaks, columns "row" and "column" provide information +#' ## in which sample the peak was identified. +#' chromPeaks(xchrs) +#' +#' ## Spectifically extract chromatographic peaks for one sample/chromatogram +#' chromPeaks(xchrs[1, 2]) +#' +#' ## Plot the results +#' plot(xchrs) +#' +#' ## Plot the results using a different color for each sample +#' sample_colors <- c("#ff000040", "#00ff0040", "#0000ff40") +#' cols <- sample_colors[chromPeaks(xchrs)[, "column"]] +#' plot(xchrs, col = sample_colors, peakBg = cols) +#' +#' ## Indicate the peaks with a rectangle +#' plot(xchrs, col = sample_colors, peakCol = cols, peakType = "rectangle", +#' peakBg = NA) +#' +#' ## --------------------------------------------------- ## +#' ## Correspondence analysis ## +#' ## --------------------------------------------------- ## +#' ## Group chromatographic peaks across samples +#' prm <- PeakDensityParam(sampleGroup = rep(1, 2)) +#' res <- groupChromPeaks(xchrs, param = prm) +#' +#' hasFeatures(res) +#' featureDefinitions(res) +#' +#' ## Plot the correspondence results. Use simulate = FALSE to show the +#' ## actual results. Grouped chromatographic peaks are indicated with +#' ## grey shaded rectangles. +#' plotChromPeakDensity(res, simulate = FALSE) +#' +#' ## Simulate a correspondence analysis based on different settings. Larger +#' ## bw will increase the smoothing of the density estimate hence grouping +#' ## chromatographic peaks that are more apart on the retention time axis. +#' prm <- PeakDensityParam(sampleGroup = rep(1, 3), bw = 60) +#' plotChromPeakDensity(res, param = prm) +#' +#' ## Delete the identified feature definitions +#' res <- dropFeatureDefinitions(res) +#' hasFeatures(res) +XChromatograms <- function(data, phenoData, featureData, chromPeaks, + chromPeakData, ...) { + if (missing(data)) + return(new("XChromatograms")) + if (!missing(chromPeaks)) { + if (!is.list(chromPeaks) || length(chromPeaks) != length(data)) + stop("If provided, 'chromPeaks' has to be a list same length than", + " 'data'.") + data <- mapply(data, chromPeaks, FUN = function(z, pks) { + if (is(z, "Chromatogram")) + z <- as(z, "XChromatogram") + if (is.matrix(pks) && length(pks)) + chromPeaks(z) <- pks + z + }) + } + if (!missing(chromPeakData)) { + if (missing(chromPeaks)) + stop("If 'chromPeakData' is provided, also 'chromPeaks' is required") + if (!is.list(chromPeakData) || length(chromPeakData) != length(data)) + stop("If provided, 'chromPeakData' has to be a list same length ", + "than 'data'.") + data <- mapply(data, chromPeakData, FUN = function(z, pkd) { + if (length(pkd)) + chromPeakData(z) <- pkd + }) + } + object <- MChromatograms(data = data, phenoData = phenoData, + featureData = featureData, ...) + object <- as(object, "XChromatograms") + if (validObject(object)) object +} + +#' Subset the featureDefinitions `DataFrame` fts based on `pks` and `pks_sub` +#' being the `chromPeaks` before and after filtering. +#' +#' @author Johannes Rainer +#' +#' @noRd +.subset_features_on_chrom_peaks <- function(fts, pks, pks_sub) { + if (nrow(fts)) { + if (!is.null(rownames(pks)) && !is.null(rownames(pks_sub))) { + ids_orig <- rownames(pks) + ids_sub <- rownames(pks_sub) + } else { + cns <- intersect(colnames(pks), colnames(pks_sub)) + cns <- cns[!(cns %in% c("row", "column"))] + ids_orig <- apply(pks[, cns, drop = FALSE], 1, paste, + collapse = "-") + ## if (length(ids_orig) != length(unique(ids_orig))) + ## stop("Can not uniquely identify chromatographic peaks.") + ids_sub <- apply(pks_sub[, cns, drop = FALSE], 1, paste, + collapse = "-") + } + for (i in seq_len(nrow(fts))) { + fts$peakidx[[i]] <- unname( + which(ids_sub %in% ids_orig[fts$peakidx[[i]]] & + pks_sub[, "row"] == fts$row[i])) + } + fts <- extractROWS(fts, which(lengths(fts$peakidx) > 0)) + } + fts +} + +#' Subset the chromPeaks matrix from an `XChromatograms` object. The +#' `chromPeaks` matrix is generated dynamically from the `chromPeaks` matrices +#' of each internal `XChromatogram` object, so there is not really a need to +#' subset the `chromPeaks` from an `XChromatograms` - only that we need this +#' to update the `"peakidx"` column of the `featureDefinitions`. +#' +#' Note: the chromPeaks matrix is extracted ordered by row. +#' +#' @author Johannes Rainer +#' +#' @noRd +.subset_chrom_peaks_xchromatograms <- function(x, i, j) { + if (missing(i) & missing(j)) + return(x) + if (missing(i)) i <- seq_len(nrow(x)) + if (missing(j)) j <- seq_len(ncol(x)) + x <- x[x[, "row"] %in% i & x[, "column"] %in% j, , drop = FALSE] + if (nrow(x)) { + x[, "row"] <- match(x[, "row"], i) + x[, "column"] <- match(x[, "column"], j) + x[order(x[, "row"], x[, "column"]), , drop = FALSE] + } else x +} + +#' Convenience function to plot a peak density given provided chromPeaks and +#' a PeakDensityParam object. +#' +#' @param pks `matrix` with chromatographic peaks. +#' +#' @param param `PeakDensityParam` +#' +#' @param xlim optional definition of the x-axis limits. +#' +#' @param main optional title. +#' +#' @param xlab, ylab x- and y-axis labels. +#' +#' @param peakCol foreground color definition for peaks. Either 1 or length +#' equal to `ncol(pks`). +#' +#' @param peakBg background color definition for peaks. +#' +#' @param peakPch point character. +#' +#' @author Johannes Rainer +#' +#' @noRd +.plot_chrom_peak_density <- function(pks, fts, param, xlim = range(pks[, "rt"]), + main = NA, xlab = "retention time", + ylab = "sample", peakCol = "#00000060", + peakBg = "#00000020", peakPch = 1, + simulate = TRUE, col = "black", + ylim = range(pks[, "column"]), ...) { + pks_count <- nrow(pks) + if (pks_count) { + smpl_col <- which(colnames(pks) == "sample") + if (!length(smpl_col)) + smpl_col <- which(colnames(pks) == "column") + if (length(peakCol) == 1) + peakCol <- rep(peakCol, pks_count) + if (length(peakBg) == 1) + peakBg <- rep(peakBg, pks_count) + if (length(peakPch) == 1) + peakPch <- rep(peakPch, pks_count) + if (length(peakCol) != pks_count) { + warning("Length of 'peakCol' does not match the number of peaks. ", + "Using peakCol[1] for all.") + peakCol <- rep(peakCol[1], pks_count) + } + if (length(peakBg) != pks_count) { + warning("Length of 'peakBg' does not match the number of peaks. ", + "Using peakBg[1] for all.") + peakBg <- rep(peakBg[1], pks_count) + } + if (length(peakPch) != pks_count) { + warning("Length of 'peakPch' does not match the number of peaks. ", + "Using peakPch[1] for all.") + peakPch <- rep(peakPch[1], pks_count) + } + bw <- bw(param) + full_rt_range <- range(pks[, "rt"]) + dens_from <- full_rt_range[1] - 3 * bw + dens_to <- full_rt_range[2] + 3 * bw + densN <- max(512, 2 * 2^(ceiling(log2(diff(full_rt_range) / (bw / 2))))) + sample_groups <- sampleGroups(param) + dens <- density(pks[, "rt"], bw = bw, from = dens_from, to = dens_to, + n = densN) + yl <- c(0, max(dens$y)) + min_max_smple <- ylim + ypos <- seq(from = yl[1], to = yl[2], + length.out = diff(min_max_smple) + 1) + plot(pks[, "rt"], ypos[pks[, smpl_col]], xlim = xlim, ylim = yl, + main = main, yaxt = "n", ylab = ylab, xlab = xlab, + col = peakCol, bg = peakBg, pch = peakPch, ...) + points(x = dens$x, y = dens$y, type = "l", ...) + axis(side = 2, at = ypos, labels = seq(from = min_max_smple[1], + to = min_max_smple[2])) + sample_groups <- sampleGroups(param) + sample_groups_table <- table(sample_groups) + dens_max <- max(dens$y) + if (simulate) { + snum <- 0 + while(dens$y[max_y <- which.max(dens$y)] > dens_max / 20 && + snum < maxFeatures(param)) { + feat_range <- descendMin(dens$y, max_y) + dens$y[feat_range[1]:feat_range[2]] <- 0 + feat_idx <- which(pks[, "rt"] >= dens$x[feat_range[1]] & + pks[, "rt"] <= dens$x[feat_range[2]]) + tt <- table(sample_groups[pks[feat_idx, smpl_col]]) + if (!any(tt / sample_groups_table[names(tt)] >= + minFraction(param) & tt >= minSamples(param))) + next + rect(xleft = min(pks[feat_idx, "rt"]), ybottom = yl[1], + xright = max(pks[feat_idx, "rt"]), ytop = yl[2], + border = "#00000040", col = "#00000020") + } + } else { + if (!missing(fts) && nrow(fts)) { + rect(xleft = fts$rtmin, xright = fts$rtmax, + ybottom = rep(yl[1], nrow(fts)), + ytop = rep(yl[2], nrow(fts)), + border = "#00000040", col = "#00000020") + abline(v = fts$rtmed, col = "#00000040", lty = 2) + } else warning("No feature definitions present. Either use ", + "'groupChromPeaks' first or set 'simulate = TRUE'") + } + } else { + plot(3, 3, pch = NA, xlim = xlim, main = main, xlab = xlab, ylab = ylab) + } +} diff --git a/R/functions-imputation.R b/R/functions-imputation.R index b57388026..9ae458329 100644 --- a/R/functions-imputation.R +++ b/R/functions-imputation.R @@ -52,29 +52,51 @@ imputeRowMin <- function(x, min_fraction = 1/2) { #' #' @description #' -#' Replace missing values with random numbers from a normal distribution based +#' Replace missing values with random numbers. +#' When using the `method = "mean_sd"`, random numbers will be generated +#' from a normal distribution based #' on (a fraction of) the row min and a standard deviation estimated from the #' linear relationship between row standard deviation and mean of the full data #' set. Parameter `sd_fraction` allows to further reduce the estimated #' standard deviation. +#' When using the method `method = "from_to"`, random numbers between 2 specific values +#' will be generated. #' #' @details #' -#' Imputed values are taken from a normal distribution with mean being a +#' For method **mean_sd**, imputed +#' values are taken from a normal distribution with mean being a #' user defined fraction of the row minimum and the standard deviation #' estimated for that mean based on the linear relationship between row #' standard deviations and row means in the full matrix `x`. -#' +#' #' To largely avoid imputed values being negative or larger than the *real* #' values, the standard deviation for the random number generation is estimated #' ignoring the intercept of the linear model estimating the relationship #' between standard deviation and mean. If `abs = TRUE` `NA` values are #' replaced with the absolute value of the random values. #' +#' For method **from_to**, imputed values are taken between 2 user defined +#' fractions of the row minimum. +#' #' @inheritParams imputeRowMin #' +#' @param method method `character(1)` defining the imputation method. +#' See description for details. Defaults to `method = "mean_sd"`. +#' +#' @param min_fraction `numeric(1)` with the fraction of the row minimum that +#' should be used to replace `NA` values in that row in case that `mean_sd` +#' method is specified. When using `from_to` method, this value will be the +#' one used to calculate the maximum value for replace `NA` values in that row. +#' +#' @param min_fraction_from `numeric(1)` with the fraction of the row minimum +#' that should be used to calculate the minimum value for replace `NA` values +#' in that row. This parameter is used only in case that `from_to` method is +#' specified. +#' #' @param sd_fraction `numeric(1)` factor to reduce the estimated standard -#' deviation. +#' deviation. This parameter is used only in case that `mean_sd` method is +#' specified. #' #' @param abs `logical(1)` to force imputed values to be strictly positive. #' @@ -84,10 +106,10 @@ imputeRowMin <- function(x, min_fraction = 1/2) { #' #' @md #' -#' @author Johannes Rainer -#' +#' @author Johannes Rainer, Mar Garcia-Aloy +#' #' @examples -#' +#' #' library(faahKO) #' data("faahko") #' @@ -101,27 +123,41 @@ imputeRowMin <- function(x, min_fraction = 1/2) { #' plot(mns, sds) #' abline(lm(sds ~ mns)) #' -#' mat_imp <- imputeRowMinRand(mat) +#' mat_imp_meansd <- imputeRowMinRand(mat, method = "mean_sd") +#' mat_imp_fromto <- imputeRowMinRand(mat, method = "from_to") #' #' head(mat) -#' head(mat_imp) -imputeRowMinRand <- function(x, min_fraction = 1/2, +#' head(mat_imp_meansd) +#' head(mat_imp_fromto) +imputeRowMinRand <- function(x, method = c("mean_sd", "from_to"), + min_fraction = 1/2, min_fraction_from = 1/1000, sd_fraction = 1, abs = TRUE) { - row_means <- rowMeans(x, na.rm = TRUE) - row_sds <- apply(x, MARGIN = 1, sd, na.rm = TRUE) - sd_mean_lm <- lm(row_sds ~ row_means) - for (i in 1:nrow(x)) { - nas <- is.na(x[i, ]) - if (all(nas)) - next - if (any(nas)) { - minx <- min(x[i, !nas]) * min_fraction - rndm <- rnorm(n = sum(nas), mean = minx, - sd = (minx * abs(sd_mean_lm$coefficients[2])) * - sd_fraction) - if (abs) - x[i, nas] <- abs(rndm) - else x[i, nas] <- rndm + method <- match.arg(method) + if (method == "mean_sd") { + row_means <- rowMeans(x, na.rm = TRUE) + row_sds <- apply(x, MARGIN = 1, sd, na.rm = TRUE) + sd_mean_lm <- lm(row_sds ~ row_means) + for (i in 1:nrow(x)) { + nas <- is.na(x[i, ]) + if (all(nas)) + next + if (any(nas)) { + minx <- min(x[i, !nas]) * min_fraction + rndm <- rnorm(n = sum(nas), mean = minx, + sd = (minx * abs(sd_mean_lm$coefficients[2])) * + sd_fraction) + if (abs) + x[i, nas] <- abs(rndm) + else x[i, nas] <- rndm + } + } + } + if (method == "from_to") { + for (i in 1:nrow(x)) { + x[i, is.na(x[i, ])] <- + runif(min = min(x[i, ], na.rm = TRUE) * min_fraction_from, + max = min(x[i, ], na.rm = TRUE) * min_fraction, + n = sum(is.na(x[i, ]))) } } x diff --git a/R/functions-utils.R b/R/functions-utils.R index a66e575c2..8f869b288 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -5,17 +5,17 @@ ## valueCount2ScanIndex ## #' @title Create index vector for internal C calls -#' +#' #' @description Simple helper function that converts the number of values #' per scan/spectrum to an integer vector that can be passed to the base #' xcms functions/downstream C functions. #' #' @param valCount Numeric vector representing the number of values per #' spectrum. -#' +#' #' @return An integer vector with the index (0-based) in the mz or intensity #' vectors indicating the start of a spectrum. -#' +#' #' @author Johannes Rainer #' #' @noRd @@ -51,28 +51,28 @@ valueCount2ScanIndex <- function(valCount){ #' home directory; to ensure that the option is correctly read add a new line #' to the file too). See also [Startup] from the base R documentation on how to #' specify system-wide options for R. -#' +#' #' Usage of old code is strongly dicouraged. This function is thought #' to be used mainly in the transition phase from xcms to xcms version 3. -#' +#' #' @details #' #' The functions/methods that are affected by this option are: -#' +#' #' - [do_findChromPeaks_matchedFilter]: use the original #' code that iteratively creates a subset of the binned (profile) #' matrix. This is helpful for computers with limited memory or #' matchedFilter settings with a very small bin size. #' - [getPeaks] -#' +#' #' @param x `logical(1)` to specify whether or not original #' old code should be used in corresponding functions. If not provided the #' function simply returns the value of the global option. -#' +#' #' @return `logical(1)` indicating whether old code is being used. #' #' @md -#' +#' #' @author Johannes Rainer useOriginalCode <- function(x) { if (missing(x)) { @@ -95,17 +95,17 @@ useOriginalCode <- function(x) { ## ) #' @title Copy the content from an environment to another one -#' +#' #' @description This function copies the content of an environment into another #' one. -#' +#' #' @param env environment from which to copy. -#' +#' #' @param inheritLocks logical(1) whether the locking status should be copied #' too. -#' +#' #' @return an env. -#' +#' #' @noRd .copy_env <- function(env, inheritLocks = FALSE) { ## new_e <- new.env(parent = emptyenv()) @@ -186,7 +186,7 @@ useOriginalCode <- function(x) { method <- match.arg(method, profMeths) impute <- names(profMeths)[profMeths == method] brks <- NULL - + if (length(mzrange.) != 2) { mrange <- range(mz, na.rm = TRUE) mzrange. <- c(floor(mrange[1] / step) * step, @@ -269,48 +269,15 @@ useOriginalCode <- function(x) { #' @description This function creates arbitrary IDs for features. #' #' @param prefix character(1) with the prefix to be added to the ID. -#' +#' #' @param x integer(1) with the number of IDs that should be generated. #' #' @noRd -.featureIDs <- function(x, prefix = "FT") { - sprintf(paste0(prefix, "%0", ceiling(log10(x + 1L)), "d"), 1:x) +.featureIDs <- function(x, prefix = "FT", from = 1L) { + sprintf(paste0(prefix, "%0", ceiling(log10(x + 1L)), "d"), + seq(from = from, length.out = x)) } -## #' @description Expands stretches of TRUE values in \code{x} by one on both -## #' sides. -## #' -## #' @note The return value for a \code{NA} is always \code{FALSE}. -## #' -## #' @param x \code{logical} vector. -## #' -## #' @author Johannes Rainer -## #' -## #' @noRd -## .grow_trues <- function(x) { -## previous <- NA -## x_new <- rep_len(FALSE, length(x)) -## for (i in 1:length(x)) { -## if (is.na(x[i])) { -## previous <- NA -## next -## } -## ## If current element is TRUE -## if (x[i]) { -## x_new[i] <- TRUE -## ## if last element was FALSE, set last element to TRUE -## if (!is.na(previous) && !previous) -## x_new[i - 1] <- TRUE -## } else { -## ## if previous element was TRUE, set current to TRUE. -## if (!is.na(previous) && previous) -## x_new[i] <- TRUE -## } -## previous <- x[i] -## } -## x_new -## } - #' @title Weighted mean around maximum #' #' @description Calculate a weighted mean of the values around the value with @@ -351,19 +318,17 @@ weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { weighted.mean(x[seq_idx], w[seq_idx]) } - - #' @title DEPRECATED: Create a plot that combines a XIC and a mz/rt 2D plot for one sample #' #' @description #' #' **UPDATE**: please use `plot(x, type = "XIC")` from the `MSnbase` package #' instead. See examples below. -#' +#' #' The `plotMsData` creates a plot that combines an (base peak ) #' extracted ion chromatogram on top (rt against intensity) and a plot of #' rt against m/z values at the bottom. -#' +#' #' @param x `data.frame` such as returned by the [extractMsData()] function. #' Only a single `data.frame` is supported. #' @@ -371,7 +336,7 @@ weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { #' #' @param cex `numeric(1)` defining the size of points. Passed directly to the #' `plot` function. -#' +#' #' @param mfrow `numeric(2)` defining the plot layout. This will be passed #' directly to `par(mfrow = mfrow)`. See `par` for more information. Setting #' `mfrow = NULL` avoids calling `par(mfrow = mfrow)` hence allowing to @@ -383,9 +348,9 @@ weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { #' @param colramp a *color ramp palette* to be used to color the data points #' based on their intensity. See argument `col.regions` in #' [lattice::level.colors] documentation. -#' +#' #' @author Johannes Rainer -#' +#' #' @md #' #' @examples @@ -430,13 +395,15 @@ plotMsData <- function(x, main = "", cex = 1, mfrow = c(2, 1), } #' @title Calculate relative log abundances -#' +#' +#' @description +#' #' `rla` calculates the relative log abundances (RLA, see reference) on a #' `numeric` vector. #' #' @details The RLA is defines as the (log) abundance of an analyte relative #' to the median across all abundances of the same group. -#' +#' #' @param x `numeric` (for `rla`) or `matrix` (for `rowRla`) with the #' abundances (in natural scale) on which the RLA should be calculated. #' @@ -446,7 +413,7 @@ plotMsData <- function(x, main = "", cex = 1, mfrow = c(2, 1), #' #' @param log.transform `logical(1)` whether `x` should be log2 transformed. #' Set to `log.transform = FALSE` if `x` is already in log scale. -#' +#' #' @return `numeric` of the same length than `x` (for `rla`) or `matrix` with #' the same dimensions than `x` (for `rowRla`). #' @@ -461,7 +428,7 @@ plotMsData <- function(x, main = "", cex = 1, mfrow = c(2, 1), #' De Livera AM, Dias DA, De Souza D, Rupasinghe T, Pyke J, Tull D, Roessner U, #' McConville M, Speed TP. Normalizing and integrating metabolomics data. #' *Anal Chem* 2012 Dec 18;84(24):10768-76. -#' +#' #' @examples #' #' x <- c(3, 4, 5, 1, 2, 3, 7, 8, 9) @@ -510,7 +477,7 @@ rowRla <- function(x, group, log.transform = TRUE) { if (length(unique(c(length(xleft), length(xright), length(ybottom), length(ytop)))) != 1) stop("'xleft', 'xright', 'ybottom' and 'ytop' have to have the same", - " length") + " length") .overlap <- function(x1, x2, xs1, xs2) { x1 <= xs2 & x2 >= xs1 } @@ -556,29 +523,339 @@ rowRla <- function(x, group, log.transform = TRUE) { ovlap_merged } -## #' @examples -## #' x1_high <- c(0.000012323) -## #' x1_low <- c(0.0000034302) -## #' x2_high <- c(0.000012322) -## #' x2_low <- c(0.0000034301) -## .overlap <- function(x1_low, x1_high, x2_low, x2_high, res = 1e12) { -## library(IRanges) -## x1 <- IRanges(round(x1_low * res), round(x1_high * res)) -## x2 <- IRanges(round(x2_low * res), round(x2_high * res)) -## } +#' Calculate a range of values adding a part per million to it. The minimum +#' will be the minimum - ppm/2, the maximum the maximum + ppm/2 +#' +#' @param x `numeric` +#' +#' @param ppm `numeric(1)` +#' +#' @return `numeric(2)` with the range +/- ppm +#' +#' @noRd +#' +#' @author Johannes Rainer +.ppm_range <- function(x, ppm = 0) { + x <- range(x) + x[1] <- x[1] - x[1] * ppm / 2e6 + x[2] <- x[2] + x[2] * ppm / 2e6 + x +} + +#' Simple helper to insert column(s) in a matrix. +#' +#' @param x `matrix` +#' +#' @param pos `integer()` with positions (columns) where a column should be +#' inserted in `x`. +#' +#' @param val `vector` or `list` with the elements to insert. +#' +#' @return `matrix` +#' +#' @author Johannes Rainer +#' +#' @noRd +#' +#' @examples +#' +#' mat <- matrix(1:100, ncol = 5) +#' +#' ## Insert a column at position 3, containing a single value. +#' .insertColumn(mat, pos = 3, 5) +#' +#' ## Insert columns at positions 2 and 4 containing the same sequence of +#' ## values +#' .insertColumn(mat, c(2, 4), list(101:120)) +.insertColumn <- function(x, pos = integer(), val = NULL) { + if (length(pos)) { + if (length(val) == 1) + val <- rep(val, length(pos)) + if (length(val) != length(pos)) + stop("length of 'pos' and 'val' have to match") + } + for (i in seq_along(pos)) { + if (pos[i] == 1) { + x <- cbind(val[[i]], x) + } else { + if (pos[i] == ncol(x)) + x <- cbind(x, val[[i]]) + else + x <- cbind(x[, 1:(pos[i]-1)], val[[i]], x[, pos[i]:ncol(x)]) + } + } + x +} +#' helper to subset featureDefinitions based on provided chrom peak names and +#' update the peakidx. +#' +#' @param x `DataFrame` with feature definitions (such as returned by +#' `featureDefinitions(object)`. +#' +#' @param original_names `character` with the original rownames (peak IDs) of +#' the `chromPeaks` matrix **before** subsetting. +#' +#' @param subset_names `character` with the rownames (peak IDs) of the +#' `chromPeaks` matrix **after** subsetting. +#' +#' @return updated feature definitions `DataFrame`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.update_feature_definitions <- function(x, original_names, subset_names) { + ## Skip if they are the same. + if (length(original_names) == length(subset_names) && + all.equal(original_names, subset_names)) + return(x) + f <- as.factor(rep(seq_len(length(x$peakidx)), lengths(x$peakidx))) + x$peakidx <- unname(lapply( + split(match(original_names[unlist(x$peakidx, + use.names = FALSE)], + subset_names), f), function(z) z[!is.na(z)])) + extractROWS(x, lengths(x$peakidx) > 0) +} -## Use IRanges for this... -## 1) find overlaps in one dimension. -## 2) find overlaps in second dimension. -## Given: -## MS1 peaks with m/z range, rt range. -## MS2 peaks with rt. -## finding the MS2 spectra related to a single mass peak in a MS1 spectrum: -## one MS2 is associated to a single MS1: -## MS2: precursor is the MS1 spectrum ID, target m/z (lower and upper bound), -## selected m/z, peak intensity. -## So, for a peak: -## - select spectra for the rt range of the peak. -## - get all MS2 spectra for these spectra. -## - select those MS2 that have an selected ion m/z within m/z range. +#' @description +#' +#' Combine `matrix` or `data.frame`s adding eventually missing columns filling +#' them with `NA`s. +#' +#' @param x `matrix` or `data.frame`. +#' +#' @param y `matrix` or `data.frame`. +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @noRd +.rbind_fill <- function(x, y) { + cnx <- colnames(x) + cny <- colnames(y) + cn <- union(cnx, cny) + mis_col <- setdiff(cn, colnames(x)) + for (mc in mis_col) { + if (is.factor(y[, mc])) + x <- cbind(x, tmp = as.factor(NA)) + else + x <- cbind(x, tmp = as(NA, class(y[, mc]))) + } + colnames(x) <- c(cnx, mis_col) + mis_col <- setdiff(cn, colnames(y)) + for (mc in mis_col) { + if (is.factor(x[, mc])) + y <- cbind(y, tmp = as.factor(NA)) + else + y <- cbind(y, tmp = as(NA, class(x[, mc]))) + } + colnames(y) <- c(cny, mis_col) + rbind(x, y[, colnames(x)]) +} + +#' @description +#' +#' Similar to the `IRanges::reduce` method, this function *joins* overlapping +#' ranges (e.g. m/z ranges or retention time ranges) to create unique and +#' disjoined (i.e. not overlapping) ranges. +#' +#' @param start `numeric` with start positions. +#' +#' @param end `numeric` with end positions. +#' +#' @return `matrix` with two columns containing the start and end values for +#' the disjoined ranges. Note that the ranges are increasingly ordered. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +#' +#' @examples +#' +#' mzmin <- c(2, 3, 4, 7) +#' mzmax <- c(2.5, 3.5, 4.2, 7.6) +#' .reduce(mzmin, mzmax) +#' .reduce(mzmin - 0.1, mzmax + 0.1) +#' .reduce(mzmin - 0.5, mzmax + 0.5) +.reduce <- function(start, end) { + if (!length(start)) + return(matrix(ncol = 2, nrow = 0, + dimnames = list(NULL, c("start", "end")))) + if (length(start) == 1) { + return(cbind(start, end)) + } + idx <- order(start, end) + start <- start[idx] + end <- end[idx] + new_start <- new_end <- numeric(length(start)) + current_slice <- 1 + new_start[current_slice] <- start[1] + new_end[current_slice] <- end[1] + for (i in 2:length(start)) { + if (start[i] <= new_end[current_slice]) { + if (end[i] > new_end[current_slice]) + new_end[current_slice] <- end[i] + } else { + current_slice <- current_slice + 1 + new_start[current_slice] <- start[i] + new_end[current_slice] <- end[i] + } + } + idx <- 1:current_slice + cbind(start = new_start[idx], end = new_end[idx]) +} + +#' @title Group overlapping ranges +#' +#' @description +#' +#' `groupOverlaps` identifies overlapping ranges in the input data and groups +#' them by returning their indices in `xmin` `xmax`. +#' +#' @param xmin `numeric` (same length than `xmax`) with the lower boundary of +#' the range. +#' +#' @param xmax `numeric` (same length than `xmin`) with the upper boundary of +#' the range. +#' +#' @return `list` with the indices of grouped elements. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' x <- c(2, 12, 34.2, 12.4) +#' y <- c(3, 16, 35, 36) +#' +#' groupOverlaps(x, y) +groupOverlaps <- function(xmin, xmax) { + tolerance <- sqrt(.Machine$double.eps) + reduced_ranges <- .reduce(xmin, xmax) + res <- vector("list", nrow(reduced_ranges)) + for (i in seq_along(res)) { + res[[i]] <- which(xmin >= reduced_ranges[i, 1] - tolerance & + xmax <= reduced_ranges[i, 2] + tolerance) + } + res +} + +.require_spectra <- function() { + if (!requireNamespace("Spectra", quietly = TRUE)) + stop("Returning data as a 'Spectra' object requires the 'Spectra' ", + "package to be installed. Please ", + "install with 'BiocInstaller::install(\"Spectra\")'") + else invisible(TRUE) +} + +#' very efficient extractor for the featureData of an OnDiskMSnExp +#' +#' @param x `OnDiskMSnExp`. +#' +#' @author Johannes Rainer +#' +#' @noRd +.fdata <- function(x) { + x@featureData@data +} + +.i2index <- function(x, ids = character(), name = character()) { + if (is.character(x)) + x <- match(x, ids) + if (is.logical(x)) { + if (length(ids) && length(ids) != length(x)) + stop("Length of '", name, "' has to be equal to ", length(ids), ".") + x <- which(x) + } + if (is.numeric(x)) + x <- as.integer(x) + if (length(ids) && any(is.na(x)) || (any(x < 1) || any(x > length(ids)))) + stop("'", name, "' out of bounds") + x +} + +#' @description +#' +#' Function to extract EICs. In contrast to the other versions, this one +#' allows to extract `Chromatogram` for all MS levels at the same time. +#' The EIC is defined by a chrom peak matrix `pks` (column `"rtmin"`, +#' `"rtmax"`, `"mzmin"` and `"mzmax"`). Additional required parameters are +#' the MS level of spectra and EICs. Further selection/mapping of spectra +#' with m/z-rt regions can be defined with the parameter `tmz` and `pks_tmz` +#' which can e.g. be the *isolation window target m/z* for spectra and EICs. +#' The latter is important for MS2 data, since that could be generated using +#' different scanning windows (SWATH): only MS2 spectra from the matching +#' isolation window will be used for chromatogram generation. +#' +#' See also `.old_chromatogram_sample` in *MsExperiment-functions.R* for an +#' alternative implementation. +#' +#' @param pd `list` of peaks matrices (e.g. returned by `Spectra::peaksData`). +#' +#' @param rt `numeric` with retention times of spectra. +#' +#' @param msl `integer` with the MS levels for the spectra. +#' +#' @param tmz `numeric` with the isolation window target m/z for each spectrum +#' (for DIA MS2 data). +#' +#' @param pks `matrix` with columns `"rtmin"`, `"rtmax"`, `"mzmin"`, `"mzmax"` +#' for which the +#' +#' @param pks_msl `integer` with the MS levels for the regions from which the +#' chromatograms should be extracted. +#' +#' @param pks_tmz `numeric` with the isolation window target m/z in which +#' the (MS2) chromatographic peak was detected. +#' +#' @param file_idx `integer(1)` allowing to optionally set the index of the +#' file the EIC is from (parameter `fromFile`). +#' +#' @return `list` of `MSnbase::Chromatogram` objects. +#' +#' @author Johannes Rainer, Nir Shachaf +#' +#' @noRd +.chromatograms_for_peaks <- function(pd, rt, msl, file_idx = 1L, + tmz = rep(1, length(pd)), pks, pks_msl, + pks_tmz = rep(1, nrow(pks)), + aggregationFun = "sum") { + nr <- nrow(pks) + if (aggregationFun == "sum") + FUN <- getFunction("sumi") + else FUN <- getFunction(aggregationFun) + empty_chrom <- MSnbase::Chromatogram( + fromFile = file_idx, + aggregationFun = aggregationFun, + intensity = numeric(), + rtime = numeric()) + res <- list(empty_chrom)[rep(1L, nr)] + rtc <- c("rtmin", "rtmax") + mzc <- c("mzmin", "mzmax") + for (i in seq_len(nr)) { + res[[i]]@filterMz <- pks[i, mzc] + res[[i]]@mz <- pks[i, mzc] + res[[i]]@msLevel <- pks_msl[i] + ## if pks_msl > 1: precursor m/z has to match! + keep <- between(rt, pks[i, rtc]) & msl == pks_msl[i] + if (pks_msl[i] > 1L) { + ## for DIA MS2: spectra have to match the isolation window. + keep <- keep & tmz == pks_tmz[i] + } + keep <- which(keep) # the get rid of `NA`. + if (length(keep)) { + ## Aggregate intensities. + res[[i]]@intensity <- vapply(pd[keep], function(z) { + FUN(z[between(z[, "mz"], pks[i, mzc]), "intensity"]) + }, numeric(1L)) + res[[i]]@rtime <- rt[keep] + } + } + res +} diff --git a/R/functions-xcmsRaw.R b/R/functions-xcmsRaw.R index fcc364aeb..2d188fb37 100644 --- a/R/functions-xcmsRaw.R +++ b/R/functions-xcmsRaw.R @@ -50,7 +50,7 @@ xcmsRaw <- function(filename, profstep = 1, profmethod = "bin", message("Provided scanrange was adjusted to ", scanrange[1]," - ", scanrange[2]) } if (!is.null(rawdata$acquisitionNum)) { - ## defined only for mzData and mzXML + ## defined only for mzXML object@acquisitionNum <- rawdata$acquisitionNum } if (!is.null(rawdata$polarity)) { @@ -571,7 +571,7 @@ remakeTIC<-function(object){ baselevel <- pi$baselevel basespace <- pi$basespace vps <- diff(c(object@scanindex, length(object@env$mz))) - + cat("method: ", method, "\n") cat("step: ", step, "\n") ## Create the profile matrix: @@ -737,7 +737,7 @@ msn2xcmsRaw <- function(xmsn) { x <- deepCopy(xmsn) x@tic <- x@msnAcquisitionNum - + x@scantime <- x@msnRt # Fake time in secs x@acquisitionNum <- x@msnAcquisitionNum x@scanindex <- x@msnScanindex @@ -746,4 +746,3 @@ msn2xcmsRaw <- function(xmsn) { x@env$intensity <- x@env$msnIntensity invisible(x) } - diff --git a/R/functions-xcmsSet.R b/R/functions-xcmsSet.R index d6c315c04..8eea47700 100644 --- a/R/functions-xcmsSet.R +++ b/R/functions-xcmsSet.R @@ -56,8 +56,8 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, exists <- file.exists(files_abs) files[exists] <- files_abs[exists] if (length(files) == 0 | all(is.na(files))) - stop("No NetCDF/mzXML/mzData/mzML files were found.\n") - + stop("No NetCDF/mzXML/mzML files were found.\n") + if(lockMassFreq==TRUE){ ## remove the 02 files if there here lockMass.files<-grep("02.CDF", files) @@ -374,7 +374,7 @@ split.xcmsSet <- function(x, f, drop = TRUE, ...) { #' @note This function is used by the *old* `xcmsSet` function to guess #' the experimental design (i.e. group assignment of the files) from the #' folders in which the files of the experiment can be found. -#' +#' #' @param paths `character` representing the file names (including the full #' path) of the experiment's files. #' @@ -407,7 +407,8 @@ phenoDataFromPaths <- function(paths) { i <- min(i, tail(c(0, which(scomp[1:i,1] == .Platform$file.sep)), n = 1) + 1) if (i > 1 && i <= nrow(scomp)) sclass <- substr(sclass, i, max(nchar(sclass))) - pdata <- data.frame(class = sclass) + pdata <- data.frame(factor(sclass)) + colnames(pdata) <- "class" } rownames(pdata) <- gsub("\\.[^.]*$", "", basename(paths)) pdata @@ -436,7 +437,7 @@ patternVsRowScore <- function(currPeak, parameters, mplenv) for (mplRow in 1:length(nnDist$nn.idx)) { mplistMZ <- mplenv$mplistmean[nnDist$nn.idx[mplRow], "mz"] mplistRT <- mplenv$mplistmean[nnDist$nn.idx[mplRow], "rt"] - + ## Calculate differences between M/Z and RT values of current peak and ## median of the row diffMZ = abs(mplistMZ - mplenv$peakmat[[currPeak, "mz"]]) @@ -593,7 +594,7 @@ plotSpecWindow <- function(xs, gidxs, borderwidth=1){ ############################################################ ## xcmsBoxPlot xcmsBoxPlot<-function(values, className, dirpath, pic, width=640, height=480){ - + if (pic == "png"){ png(filename = file.path(dirpath, "%003d.png"), width = width, height = height, units = "px") diff --git a/R/functions-xcmsSwath.R b/R/functions-xcmsSwath.R new file mode 100644 index 000000000..3505c399b --- /dev/null +++ b/R/functions-xcmsSwath.R @@ -0,0 +1,149 @@ +## functions for SWATH/DIA analysis. + +#' Get all MS2 peaks from the isolation window containing the MS1 peaks' +#' m/z. +#' +#' @noRd +.which_mz_in_range <- function(mz, lowerMz, upperMz) { + if (length(mz) > 1) + return(lapply(mz, .which_mz_in_range, lowerMz, upperMz)) + which(mz >= lowerMz & mz <= upperMz) +} + +#' @description +#' +#' Which rows (peaks) in `pks` have overlapping retention time ranges with +#' the peak `x`? +#' +#' @param x `numeric` representing one row from the `chromPeaks` matrix. +#' +#' @param pks `matrix` representing a `chromPeaks` matrix. +#' +#' @return `integer` with the index of rows (peaks) in `pks` overlapping the +#' retention time range of `x`. +#' +#' @author Johannes Rainer +#' +#' @noRd +.which_chrom_peak_overlap_rt <- function(x, pks) { + if (is.matrix(x)) + x <- x[1, ] + which(pks[, "rtmin"] <= x["rtmax"] & pks[, "rtmax"] >= x["rtmin"]) +} + +#' @description +#' +#' Which rows (peaks) in `pks` have a retention time (of the apex) that is +#' close to the apex of the specified peak `x`. Peaks are considerd close +#' if the difference between their apex retention time and the retention time +#' of the input peak is smaller than `diffRt`. +#' +#' @param x `numeric` representing one row from the `chromPeaks` matrix. +#' +#' @param pks `matrix` representing a `chromPeaks` matrix. +#' +#' @return `integer` with the index of the rows (peaks) in `pks` that are close +#' to the specified peak `x`. +#' +#' @author Johannes Rainer +#' +#' @noRd +.which_chrom_peak_diff_rt <- function(x, pks, diffRt = 2) { + if (is.matrix(x)) + x <- x[1, ] + which(abs(pks[, "rt"] - x["rt"]) <= diffRt) +} + +#' *Reconstruct* MS2 spectra for DIA data: +#' For each MS1 chromatographic peak: +#' +#' - find all MS2 chrom peaks from the same isolation window +#' - reduce to MS2 chrom peaks with an rt similar to the one from the MS1 +#' chrom peak +#' - remove EICs with 2 or less data points +#' - create an MS2 spectrum from all MS2 chrom peaks with peak shape +#' correlation > `minCor`. +#' +#' @param object `XCMSnExp` with data from a **single** file. +#' +#' @note +#' +#' this function first extracts EICs for all chromatographic peaks, thus it +#' will not be efficient for predicting MS2 spectra for selected MS1 peaks. +#' +#' Be aware that this function does only support returning a `Spectra`! +#' +#' @author Johannes Rainer, Michael Witting +#' +#' @noRd +.reconstruct_dia_ms2 <- + function(object, expandRt = 2, diffRt = 5, minCor = 0.8, fromFile = 1L, + column = "maxo", + peakId = rownames(chromPeaks(object, msLevel = 1L))) { + if (hasAdjustedRtime(object)) + fData(object)$retentionTime <- rtime(object) + message("Reconstructing MS2 spectra for ", length(peakId), + " chrom peaks ...", appendLF = FALSE) + pks <- chromPeaks(object)[, c("mz", "mzmin", "mzmax", "rt", "rtmin", + "rtmax", column)] + pks[, "rtmin"] <- pks[, "rtmin"] - expandRt + pks[, "rtmax"] <- pks[, "rtmax"] + expandRt + ord <- order(pks[, "mz"]) # m/z need to be ordered in a Spectra + pks <- pks[ord, ] + ilmz <- chromPeakData(object)$isolationWindowLowerMz[ord] + iumz <- chromPeakData(object)$isolationWindowUpperMz[ord] + ## Get EICs for all chrom peaks (all MS levels) + object <- filterRt(object, rt = range(pks[, c("rtmin", "rtmax")])) + chrs <- .chromatograms_for_peaks( + lapply(spectra(object), + function(z) cbind(mz = z@mz, intensity = z@intensity)), + rt = rtime(object), msl = msLevel(object), file_idx = fromFile, + tmz = isolationWindowTargetMz(object), pks = pks, + pks_msl = chromPeakData(object)$ms_level[ord], + pks_tmz = chromPeakData(object)$isolationWindowTargetMZ[ord]) + idx <- match(peakId, rownames(pks)) # MS1 peaks to loop over + res <- data.frame( + peak_id = peakId, precursorMz = pks[idx, "mz"], + rtime = pks[idx, "rt"], msLevel = 2L, + polarity = polarity(object)[1L], + precursorIntensity = pks[idx, column], + fromFile = fromFile) + ms2_peak_id <- lapply(idx, function(z) character()) + mzs <- ints <- ms2_peak_cor <- + lapply(seq_len(nrow(res)), function(z) numeric()) + for (i in seq_along(idx)) { + ii <- idx[i] + imz <- .which_mz_in_range(pks[ii, "mz"], ilmz, iumz) + irt <- .which_chrom_peak_diff_rt(pks[ii, c("rt", "rtmax")], + pks, diffRt = diffRt) + ix <- intersect(imz, irt) + if (!length(ix)) + next + ## Filter empty or sparse chromatograms + ix <- ix[vapply( + chrs@.Data[ix], function(z) sum(!is.na(z@intensity)), + integer(1)) > 2] + if (!length(ix)) + next + ## Correlate + cors <- vapply( + chrs@.Data[ix], compareChromatograms, numeric(1), + y = chrs[[ii]], ALIGNFUNARGS = list(method = "approx")) + keep <- which(cors >= minCor) + if (length(keep)) { + ix <- ix[keep] + res$rtime[i] <- median(pks[ix, "rt"]) + ms2_peak_id[[i]] <- rownames(pks)[ix] + ms2_peak_cor[[i]] <- unname(cors[keep]) + mzs[[i]] <- unname(pks[ix, "mz"]) + ints[[i]] <- unname(pks[ix, column]) + } + } + res$mz <- mzs + res$intensity <- ints + res$ms2_peak_id <- ms2_peak_id + res$ms2_peak_cor <- ms2_peak_cor + message(" OK") + .require_spectra() + Spectra::Spectra(res) + } diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R index a08ad7b14..40a97f690 100644 --- a/R/methods-Chromatogram.R +++ b/R/methods-Chromatogram.R @@ -4,7 +4,7 @@ #' #' @description #' -#' `findChromPeaks` on a [Chromatogram] or [Chromatograms] object with a +#' `findChromPeaks` on a [Chromatogram] or [MChromatograms] object with a #' [CentWaveParam] parameter object performs centWave-based peak detection #' on purely chromatographic data. See [centWave] for details on the method #' and [CentWaveParam] for details on the parameter class. @@ -12,68 +12,74 @@ #' See [peaksWithCentWave()] for the arguments used for peak detection #' on purely chromatographic data. #' -#' @param object a [Chromatogram] or [Chromatograms] object. +#' After chromatographic peak detection, identified peaks can also be *refined* +#' with the [refineChromPeaks()] method, which can help to reduce peak +#' detection artifacts. +#' +#' @param object a [Chromatogram] or [MChromatograms] object. #' #' @param param a [CentWaveParam] object specifying the settings for the #' peak detection. See [peaksWithCentWave()] for the description of #' arguments used for peak detection. #' +#' @param BPPARAM a parameter class specifying if and how parallel processing +#' should be performed (only for `XChromatograms` objects). It defaults to +#' `bpparam()`. See [bpparam()] for more information. +#' #' @param ... currently ignored. -#' +#' #' @return #' -#' If called on a `Chromatogram` object, the method returns a `matrix` with -#' the identified peaks. See [peaksWithCentWave()] for details on the matrix -#' content. -#' +#' If called on a `Chromatogram` object, the method returns an [XChromatogram] +#' object with the identified peaks. See [peaksWithCentWave()] for details on +#' the peak matrix content. +#' #' @seealso [peaksWithCentWave()] for the downstream function and [centWave] #' for details on the method. #' #' @author Johannes Rainer #' #' @rdname findChromPeaks-Chromatogram-CentWaveParam -#' +#' #' @md #' #' @examples -#' -#' od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' mode = "onDisk") +#' +#' ## Loading a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) +#' +#' ## +#' od <- as(filterFile(faahko_sub, 1L), "OnDiskMSnExp") #' #' ## Extract chromatographic data for a small m/z range #' chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] #' #' ## Identify peaks with default settings -#' pks <- findChromPeaks(chr, CentWaveParam()) -#' pks +#' xchr <- findChromPeaks(chr, CentWaveParam()) +#' xchr #' -#' ## Plot the identified peaks -#' plot(chr, type = "h") -#' rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], -#' ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#ff000020") -#' -#' ## Modify the settings -#' cwp <- CentWaveParam(snthresh = 5, peakwidth = c(10, 60)) -#' pks <- findChromPeaks(chr, cwp) -#' pks -#' -#' plot(chr, type = "h") -#' rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], -#' ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#00ff0020") +#' ## Plot data and identified peaks. +#' plot(xchr) setMethod("findChromPeaks", signature(object = "Chromatogram", param = "CentWaveParam"), function(object, param, ...) { - do.call("peaksWithCentWave", - args = c(list(int = intensity(object), - rt = rtime(object)), - as(param, "list"))) + res <- do.call("peaksWithCentWave", + args = c(list(int = intensity(object), + rt = rtime(object)), + as(param, "list"))) + object <- as(object, "XChromatogram") + chromPeaks(object) <- res + object }) #' @title matchedFilter-based peak detection in purely chromatographic data #' #' @description #' -#' `findChromPeaks` on a [Chromatogram] or [Chromatograms] object with a +#' `findChromPeaks` on a [Chromatogram] or [MChromatograms] object with a #' [MatchedFilterParam] parameter object performs matchedFilter-based peak #' detection on purely chromatographic data. See [matchedFilter] for details #' on the method and [MatchedFilterParam] for details on the parameter class. @@ -81,59 +87,195 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", #' See [peaksWithMatchedFilter()] for the arguments used for peak detection #' on purely chromatographic data. #' -#' @param object a [Chromatogram] or [Chromatograms] object. +#' @param object a [Chromatogram] or [MChromatograms] object. #' #' @param param a [MatchedFilterParam] object specifying the settings for the #' peak detection. See [peaksWithMatchedFilter()] for the description of #' arguments used for peak detection. #' #' @param ... currently ignored. -#' +#' #' @return #' #' If called on a `Chromatogram` object, the method returns a `matrix` with #' the identified peaks. See [peaksWithMatchedFilter()] for details on the #' matrix content. -#' +#' #' @seealso [peaksWithMatchedFilter()] for the downstream function and #' [matchedFilter] for details on the method. #' #' @author Johannes Rainer #' #' @rdname findChromPeaks-Chromatogram-MatchedFilter -#' +#' #' @md #' #' @examples -#' -#' od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' mode = "onDisk") +#' +#' ## Loading a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) +#' +#' ## +#' od <- as(filterFile(faahko_sub, 1L), "OnDiskMSnExp") #' #' ## Extract chromatographic data for a small m/z range #' chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] #' #' ## Identify peaks with default settings -#' pks <- findChromPeaks(chr, MatchedFilterParam()) -#' pks +#' xchr <- findChromPeaks(chr, MatchedFilterParam()) #' #' ## Plot the identified peaks -#' plot(chr, type = "h") -#' rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], -#' ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#ff000020") -#' -#' ## Modify the settings -#' mfp <- MatchedFilterParam(fwhm = 60) -#' pks <- findChromPeaks(chr, mfp) -#' pks -#' -#' plot(chr, type = "h") -#' rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], -#' ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#00ff0020") +#' plot(xchr) setMethod("findChromPeaks", signature(object = "Chromatogram", param = "MatchedFilterParam"), function(object, param, ...) { - do.call("peaksWithMatchedFilter", - args = c(list(int = intensity(object), - rt = rtime(object)), - as(param, "list"))) + res <- do.call("peaksWithMatchedFilter", + args = c(list(int = intensity(object), + rt = rtime(object)), + as(param, "list"))) + object <- as(object, "XChromatogram") + chromPeaks(object) <- res + object + }) + +#' @title Correlate chromatograms +#' +#' @aliases correlate +#' +#' @rdname correlate-Chromatogram +#' +#' @description +#' +#' **For `xcms` >= 3.15.3 please use [compareChromatograms()] instead of +#' `correlate`** +#' +#' Correlate intensities of two chromatograms with each other. If the two +#' `Chromatogram` objects have different retention times they are first +#' *aligned* to match data points in the first to data points in the second +#' chromatogram. See help on `alignRt` in [MSnbase::Chromatogram()] for more +#' details. +#' +#' If `correlate` is called on a single [MChromatograms()] object a pairwise +#' correlation of each chromatogram with each other is performed and a `matrix` +#' with the correlation coefficients is returned. +#' +#' Note that the correlation of two chromatograms depends also on their order, +#' e.g. `correlate(chr1, chr2)` might not be identical to +#' `correlate(chr2, chr1)`. The lower and upper triangular part of the +#' correlation matrix might thus be different. +#' +#' @param x [Chromatogram()] or [MChromatograms()] object. +#' +#' @param y [Chromatogram()] or [MChromatograms()] object. +#' +#' @param use `character(1)` passed to the `cor` function. See [cor()] for +#' details. +#' +#' @param method `character(1)` passed to the `cor` function. See [cor()] for +#' details. +#' +#' @param align `character(1)` defining the alignment method to be used. See +#' help on `alignRt` in [MSnbase::Chromatogram()] for details. The value of +#' this parameter is passed to the `method` parameter of `alignRt`. +#' +#' @param ... optional parameters passed along to the `alignRt` method such as +#' `tolerance` that, if set to `0` requires the retention times to be +#' identical. +#' +#' @return `numeric(1)` or `matrix` (if called on `MChromatograms` objects) +#' with the correlation coefficient. If a `matrix` is returned, the rows +#' represent the chromatograms in `x` and the columns the chromatograms in +#' `y`. +#' +#' @author Michael Witting, Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +#' chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) +#' chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), +#' intensity = c(53, 80, 130, 15, 5, 3, 2)) +#' +#' chrs <- MChromatograms(list(chr1, chr2, chr3)) +#' +#' ## Using `compareChromatograms` instead of `correlate`. +#' compareChromatograms(chr1, chr2) +#' compareChromatograms(chr2, chr1) +#' +#' compareChromatograms(chrs, chrs) +setMethod("correlate", signature = c(x = "Chromatogram", y = "Chromatogram"), + function(x, y, use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), ...) { + .Deprecated(new = "compareChromatograms") + lst <- list(...) + compareChromatograms( + x, y, ALIGNFUN = alignRt, FUN = cor, + ALIGNFUNARGS = c(list(method = align), lst), + FUNARGS = c(list(method = method, use = use), lst)) + }) + +#' @title Remove intensities from chromatographic data +#' +#' @aliases removeIntensity +#' +#' @rdname removeIntensity-Chromatogram +#' +#' @description +#' +#' `removeIntensities` allows to remove intensities from chromatographic data +#' matching certain conditions (depending on parameter `which`). The +#' intensities are actually not *removed* but replaced with `NA_real_`. To +#' actually **remove** the intensities (and the associated retention times) +#' use [clean()] afterwards. +#' +#' Parameter `which` allows to specify which intensities should be replaced by +#' `NA_real_`. By default (`which = "below_threshod"` intensities below +#' `threshold` are removed. If `x` is a `XChromatogram` or `XChromatograms` +#' object (and hence provides also chromatographic peak definitions within the +#' object) `which = "outside_chromPeak"` can be selected which removes any +#' intensity which is outside the boundaries of identified chromatographic +#' peak(s) in the chromatographic data. +#' +#' Note that [filterIntensity()] might be a better approach to subset/filter +#' chromatographic data. +#' +#' @param object an object representing chromatographic data. Can be a +#' [Chromatogram()], [MChromatograms()], [XChromatogram()] or +#' [XChromatograms()] object. +#' +#' @param which `character(1)` defining the condition to remove intensities. +#' See description for details and options. +#' +#' @param threshold `numeric(1)` defining the threshold below which intensities +#' are removed (if `which = "below_threshold"`). +#' +#' @return the input object with matching intensities being replaced by `NA`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' chr <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +#' +#' ## Remove all intensities below 20 +#' res <- removeIntensity(chr, threshold = 20) +#' intensity(res) +setMethod("removeIntensity", "Chromatogram", + function(object, which = "below_threshold", threshold = 0) { + which <- match.arg(which) + if (which == "below_threshold") + object@intensity[which(object@intensity < threshold)] <- + NA_real_ + object }) diff --git a/R/methods-MChromatograms.R b/R/methods-MChromatograms.R new file mode 100644 index 000000000..5e7617b42 --- /dev/null +++ b/R/methods-MChromatograms.R @@ -0,0 +1,599 @@ +#' @rdname findChromPeaks-Chromatogram-CentWaveParam +#' +#' @aliases findChromPeaks-Chromatogram-CentWaveParam +#' +#' @examples +#' +#' ## Perform peak detection on an MChromatograms object +#' od3 <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), +#' system.file("cdf/KO/ko16.CDF", package = "faahKO"), +#' system.file("cdf/KO/ko18.CDF", package = "faahKO")), +#' mode = "onDisk") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Extract chromatograms for a m/z - retention time slice +#' chrs <- chromatogram(od3, mz = 344, rt = c(2500, 3500)) +#' +#' ## Perform peak detection using CentWave +#' xchrs <- findChromPeaks(chrs, param = CentWaveParam()) +#' xchrs +#' +#' ## Extract the identified chromatographic peaks +#' chromPeaks(xchrs) +#' +#' ## plot the result +#' plot(xchrs) +setMethod("findChromPeaks", signature(object = "MChromatograms", + param = "CentWaveParam"), + function(object, param, BPPARAM = bpparam(), ...) { + .findChromPeaks_XChromatograms(object = object, param = param, + BPPARAM = BPPARAM, ...) + }) + +#' @rdname findChromPeaks-Chromatogram-CentWaveParam +setMethod("findChromPeaks", signature(object = "MChromatograms", + param = "MatchedFilterParam"), + function(object, param, BPPARAM = BPPARAM, ...) { + .findChromPeaks_XChromatograms(object = object, + param = param, + BPPARAM = BPPARAM, ...) + }) + +.findChromPeaks_XChromatograms <- function(object, param, BPPARAM, ...) { + startDate <- date() + if (missing(BPPARAM)) + BPPARAM <- bpparam() + object <- as(object, "XChromatograms") + object@.Data <- matrix(bplapply(c(object@.Data), FUN = findChromPeaks, + param = param, BPPARAM = BPPARAM), + ncol = ncol(object), + dimnames = dimnames(object@.Data)) + ph_len <- length(object@.processHistory) + if (ph_len && processType(object@.processHistory[[ph_len]]) == + .PROCSTEP.PEAK.DETECTION) + object@.processHistory <- + object@.processHistory[seq_len(ph_len - 1)] + object <- addProcessHistory( + object, XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.DETECTION, + fileIndex = seq_len(ncol(object)))) + if (validObject(object)) object +} + +#' @rdname correlate-Chromatogram +setMethod("correlate", + signature = c(x = "MChromatograms", y = "missing"), + function(x, y = NULL, use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), + ...) { + .Deprecated(new = "compareChromatograms") + dots <- list(...) + compareChromatograms( + x, x, ALIGNFUN = alignRt, FUN = cor, + ALIGNFUNARGS = c(list(method = align), dots), + FUNARGS = c(list(method = method, use = use), dots)) + }) + +#' @rdname correlate-Chromatogram +setMethod("correlate", + signature = c(x = "MChromatograms", y = "MChromatograms"), + function(x, y = NULL, use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), ...) { + .Deprecated(new = "compareChromatograms") + dots <- list(...) + compareChromatograms( + x, y, ALIGNFUN = alignRt, FUN = cor, + ALIGNFUNARGS = c(list(method = align), dots), + FUNARGS = c(list(method = method, use = use), dots)) + }) + +#' @rdname removeIntensity-Chromatogram +setMethod("removeIntensity", "MChromatograms", + function(object, which = "below_threshold", threshold = 0) { + object@.Data <- matrix(lapply(c(object@.Data), + FUN = removeIntensity, + which = which, + threshold = threshold), + ncol = ncol(object), + dimnames = dimnames(object@.Data)) + object + }) + +#' @title Filtering sets of chromatographic data +#' +#' @aliases filterColumnsIntensityAbove filterColumnsKeepTop +#' +#' @rdname filter-MChromatograms +#' +#' @description +#' +#' These functions allow to filter (subset) [MChromatograms()] or +#' [XChromatograms()] objects, i.e. sets of chromatographic data, without +#' changing the data (intensity and retention times) within the individual +#' chromatograms ([Chromatogram()] objects). +#' +#' - `filterColumnsIntensityAbove`: subsets a `MChromatograms` objects keeping +#' only columns (samples) for which `value` is larger than the provided +#' `threshold` in `which` rows (i.e. if `which = "any"` a +#' column is kept if **any** of the chromatograms in that column have a +#' `value` larger than `threshold` or with `which = "all"` **all** +#' chromatograms in that column fulfill this criteria). Parameter `value` +#' allows to define on which value the comparison should be performed, with +#' `value = "bpi"` the maximum intensity of each chromatogram is compared to +#' `threshold`, with `value = "tic" the total sum of intensities of each +#' chromatogram is compared to `threshold`. For `XChromatograms` object, +#' `value = "maxo"` and `value = "into"` are supported which compares the +#' largest intensity of all identified chromatographic peaks in the +#' chromatogram with `threshold`, or the integrated peak area, respectively. +#' +#' - `filterColumnsKeepTop`: subsets a `MChromatograms` object keeping the top +#' `n` columns sorted by the value specified with `sortBy`. In detail, for +#' each column the value defined by `sortBy` is extracted from each +#' chromatogram and aggregated using the `aggregationFun`. Thus, by default, +#' for each chromatogram the maximum intensity is determined +#' (`sortBy = "bpi"`) and these values are summed up for chromatograms in the +#' same column (`aggregationFun = sum`). The columns are then sorted by these +#' values and the top `n` columns are retained in the returned +#' `MChromatograms`. Similar to the `filterColumnsIntensityAbove` function, +#' this function allows to use for `XChromatograms` objects to sort the +#' columns by column `sortBy = "maxo"` or `sortBy = "into"` of the +#' `chromPeaks` matrix. +#' +#' @param aggregationFun for `filterColumnsKeepTop`: function to be used to +#' aggregate (combine) the values from all chromatograms in each column. +#' Defaults to `aggregationFun = sum` in which case the sum of the values +#' is used to rank the columns. Alternatively the `mean`, `median` or +#' similar function can be used. +#' +#' @param n for `filterColumnsKeepTop`: `integer(1)` specifying the number of +#' columns that should be returned. `n` will be rounded to the closest +#' (larger) integer value. +#' +#' @param object [MChromatograms()] or [XChromatograms()] object. +#' +#' @param sortBy for `filterColumnsKeepTop`: the value by which columns should +#' be ordered to determine the top n columns. Can be either `sortBy = "bpi"` +#' (the default), in which case the maximum intensity of each column's +#' chromatograms is used, or `sortBy = "tic"` to use the total intensity +#' sum of all chromatograms. For [XChromatograms()] objects also +#' `value = "maxo"` and `value = "into"` is supported to use the maximum +#' intensity or the integrated area of identified chromatographic peaks +#' in each chromatogram. +#' +#' @param threshold for `filterColumnsIntensityAbove`: `numeric(1)` with the +#' threshold value to compare against. +#' +#' @param value `character(1)` defining which value should be used in the +#' comparison or sorting. Can be `value = "bpi"` (default) to use the +#' maximum intensity per chromatogram or `value = "tic"` to use the sum +#' of intensities per chromatogram. For [XChromatograms()] objects also +#' `value = "maxo"` and `value = "into"` is supported to use the maximum +#' intensity or the integrated area of identified chromatographic peaks +#' in each chromatogram. +#' +#' @param which for `filterColumnsIntensityAbove`: `character(1)` defining +#' whether **any** (`which = "any"`, default) or **all** (`which = "all"`) +#' chromatograms in a column have to fulfill the criteria for the column +#' to be kept. +#' +#' @return a filtered `MChromatograms` (or `XChromatograms`) object with the +#' same number of rows (EICs) but eventually a lower number of columns +#' (samples). +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +#' chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) +#' chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), +#' intensity = c(53, 80, 130, 15, 5, 3, 2)) +#' +#' chrs <- MChromatograms(list(chr1, chr2, chr1, chr3, chr2, chr3), +#' ncol = 3, byrow = FALSE) +#' chrs +#' +#' #### filterColumnsIntensityAbove +#' ## +#' ## Keep all columns with for which the maximum intensity of any of its +#' ## chromatograms is larger 90 +#' filterColumnsIntensityAbove(chrs, threshold = 90) +#' +#' ## Require that ALL chromatograms in a column have a value larger 90 +#' filterColumnsIntensityAbove(chrs, threshold = 90, which = "all") +#' +#' ## If none of the columns fulfills the criteria no columns are returned +#' filterColumnsIntensityAbove(chrs, threshold = 900) +#' +#' ## Filtering XChromatograms allow in addition to filter on the columns +#' ## "maxo" or "into" of the identified chromatographic peaks within each +#' ## chromatogram. +#' +#' #### filterColumnsKeepTop +#' ## +#' ## Keep the 2 columns with the highest sum of maximal intensities in their +#' ## chromatograms +#' filterColumnsKeepTop(chrs, n = 1) +#' +#' ## Keep the 50 percent of columns with the highest total sum of signal. Note +#' ## that n will be rounded to the next larger integer value +#' filterColumnsKeepTop(chrs, n = 0.5 * ncol(chrs), sortBy = "tic") +setMethod("filterColumnsIntensityAbove", "MChromatograms", + function(object, threshold = 0, value = c("bpi", "tic"), + which = c("any", "all")) { + value <- match.arg(value) + which <- match.arg(which) + if (length(threshold) > 1 || !is.numeric(threshold)) + stop("'threshold' should be a 'numeric' of length 1") + nc <- ncol(object) + if (value == "bpi") + FUN <- max + else FUN <- sum + which_fun <- getFunction(which) + keep <- rep(FALSE, nc) + for (i in seq_len(nc)) { + vals <- vapply(object[, i], function(z) { + FUN(z@intensity, na.rm = TRUE) + }, FUN.VALUE = NA_real_, USE.NAMES = FALSE) + keep[i] <- which_fun(vals > threshold) + } + object[, keep] + }) + +#' @rdname filter-MChromatograms +setMethod("filterColumnsKeepTop", "MChromatograms", + function(object, n = 1L, sortBy = c("bpi", "tic"), + aggregationFun = sum) { + sortBy <- match.arg(sortBy) + if (length(n) > 1 || !is.numeric(n)) + stop("'n' should be an 'integer' of length 1") + n <- ceiling(n) + nc <- ncol(object) + nr <- nrow(object) + if (n > nc) + stop("'n' should be smaller or equal than the number of ", + "columns (", nc, ")") + if (sortBy == "bpi") + FUN <- max + else FUN <- sum + colval <- numeric(nc) + for (i in seq_len(nc)) { + if (nr == 1) + vals <- FUN(object[1, i]@intensity, na.rm = TRUE) + else + vals <- vapply(object[, i], function(z) { + FUN(z@intensity, na.rm = TRUE) + }, FUN.VALUE = NA_real_, USE.NAMES = FALSE) + colval[i] <- aggregationFun(vals, na.rm = TRUE) + } + idx <- order(colval, decreasing = TRUE)[seq_len(n)] + object[, sort(idx)] + }) + +#' @title Plot multiple chromatograms into the same plot +#' +#' @aliases plotChromatogramsOverlay plotChromatogramsOverlay,MChromatograms-method +#' +#' @description +#' +#' `plotOverlay` draws chromatographic peak data from multiple (different) +#' extracted ion chromatograms (EICs) into the same plot. This allows to +#' directly compare the peak shape of these EICs in the same sample. In +#' contrast to the `plot` function for [MChromatograms()] object, which draws +#' the data from the same EIC across multiple samples in the same plot, this +#' function draws the different EICs from the same sample into the same plot. +#' +#' If `plotChromatogramsOverlay` is called on a `XChromatograms` object any +#' present chromatographic peaks will also be highlighted/drawn depending on the +#' parameters `peakType`, `peakCol`, `peakBg` and `peakPch` (see also help on +#' the `plot` function for `XChromatogram()` object for details). +#' +#' @param col definition of the color in which the chromatograms should be +#' drawn. Can be of length 1 or equal to `nrow(object)` to plot each +#' overlayed chromatogram in a different color. +#' +#' @param main optional title of the plot. If not defined, the range of m/z +#' values is used. +#' +#' @param object [MChromatograms()] or [XChromatograms()] object. +#' +#' @param peakBg if `object` is a `XChromatograms` object: definition of +#' background color(s) for each chromatographic peak. Has to be either of +#' length 1 or equal to the number of peaks in `object`. If not specified, +#' the peak will be drawn in the color defined by `col`. +#' +#' @param peakCol if `object` is a `XChromatograms` object: definition of +#' color(s) for each chromatographic peak. Has to be either of length 1 or +#' equal to the number of peaks in `object`. If not specified, the peak will +#' be drawn in the color defined by `col`. +#' +#' @param peakPch if `object` is a `XChromatograms` object: *point character* to +#' be used to label the apex position of the chromatographic peak if +#' `peakType = "point"`. +#' +#' @param peakType if `object` is a `XChromatograms` object: how chromatographic +#' peaks should be drawn: `peakType = "polygon"` (the default): label the +#' full chromatographic peak area, `peakType = "rectangle"`: indicate the +#' chromatographic peak by a rectangle and `peakType = "point"`: label the +#' chromatographic peaks' apex position with a point. +#' +#' @param stacked `numeric(1)` defining the part (proportion) of the y-axis to +#' use to *stack* EICs depending on their m/z values. If `stacked = 0` (the +#' default) no stacking is performed. With `stacked = 1` half of the y-axis +#' is used for stacking and half for the intensity y-axis (i.e. the ratio +#' between stacking and intensity y-axis is 1:1). Note that if `stacking` +#' is different from 0 no y-axis and label are drawn. +#' +#' @param transform `function` to transform the intensity values before +#' plotting. Defaults to `transform = identity` which plots the data as it +#' is. With `transform = log10` intensity values would be log10 transformed +#' before plotting. +#' +#' @param type `character(1)` defing the type of the plot. By default +#' (`type = "l"`) each chromatogram is drawn as a line. +#' +#' @param xlab `character(1)` defining the x-axis label. +#' +#' @param xlim optional `numeric(2)` defining the x-axis limits. +#' +#' @param ylab `character(1)` defining the y-axis label. +#' +#' @param ylim optional `numeric(2)` defining the y-axis limits. +#' +#' @param ... optional arguments to be passed to the plotting functions (see +#' help on the base R `plot` function. +#' +#' @return silently returns a `list` (length equal to `ncol(object)` of +#' `numeric` (length equal to `nrow(object)`) with the y position of +#' each EIC. +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @name plotChromatogramsOverlay +#' +#' @examples +#' +#' ## Load preprocessed data and extract EICs for some features. +#' library(xcms) +#' data(xdata) +#' ## Update the path to the files for the local system +#' dirname(xdata) <- c(rep(system.file("cdf", "KO", package = "faahKO"), 4), +#' rep(system.file("cdf", "WT", package = "faahKO"), 4)) +#' ## Subset to the first 3 files. +#' xdata <- filterFile(xdata, 1:3, keepFeatures = TRUE) +#' +#' ## Define features for which to extract EICs +#' fts <- c("FT097", "FT163", "FT165") +#' chrs <- featureChromatograms(xdata, features = fts) +#' +#' plotChromatogramsOverlay(chrs) +#' +#' ## plot the overlay of EICs in the first sample +#' plotChromatogramsOverlay(chrs[, 1]) +#' +#' ## Define a different color for each feature (row in chrs). By default, also +#' ## all chromatographic peaks of a feature is labeled in the same color. +#' plotChromatogramsOverlay(chrs[, 1], +#' col = c("#ff000040", "#00ff0040", "#0000ff40")) +#' +#' ## Alternatively, we can define a color for each individual chromatographic +#' ## peak and provide this with the `peakBg` and `peakCol` parameters. +#' chromPeaks(chrs[, 1]) +#' +#' ## Use a color for each of the two identified peaks in that sample +#' plotChromatogramsOverlay(chrs[, 1], +#' col = c("#ff000040", "#00ff0040", "#0000ff40"), +#' peakBg = c("#ffff0020", "#00ffff20")) +#' +#' ## Plotting the data in all samples. +#' plotChromatogramsOverlay(chrs, +#' col = c("#ff000040", "#00ff0040", "#0000ff40")) +#' +#' ## Creating a "stacked" EIC plot: the EICs are placed along the y-axis +#' ## relative to their m/z value. With `stacked = 1` the y-axis is split in +#' ## half, the lower half being used for the stacking of the EICs, the upper +#' ## half being used for the *original* intensity axis. +#' res <- plotChromatogramsOverlay(chrs[, 1], stacked = 1, +#' col = c("#ff000040", "#00ff0040", "#0000ff40")) +#' ## add horizontal lines for the m/z values of each EIC +#' abline(h = res[[1]], col = "grey", lty = 2) +#' +#' ## Note that this type of visualization is different than the conventional +#' ## plot function for chromatographic data, which will draw the EICs for +#' ## multiple samples into the same plot +#' plot(chrs) +#' +#' ## Converting the object to a MChromatograms without detected peaks +#' chrs <- as(chrs, "MChromatograms") +#' +#' plotChromatogramsOverlay(chrs, +#' col = c("#ff000040", "#00ff0040", "#0000ff40")) +setMethod("plotChromatogramsOverlay", "MChromatograms", + function(object, col = "#00000060", type = "l", main = NULL, + xlab = "rtime", ylab = "intensity", xlim = numeric(), + ylim = numeric(), stacked = 0, transform = identity, ...) { + nsam <- ncol(object) + transform <- match.fun(transform) + if (nsam > 1) + par(mfrow = n2mfrow(nsam, 1)) + res <- vector("list", nsam) + for (i in seq_len(nsam)) { + res[[i]] <- .plot_xchromatograms_overlay( + object[, i, drop = FALSE], main = main, + xlab = xlab, ylab = ylab, xlim = xlim, + ylim = ylim, col = col, type = type, + stacked = stacked, transform = transform, ...) + } + invisible(res) + }) + +#' @rdname plotChromatogramsOverlay +setMethod("plotChromatogramsOverlay", "XChromatograms", + function(object, col = "#00000060", type = "l", main = NULL, + xlab = "rtime", ylab = "intensity", xlim = numeric(), + ylim = numeric(), peakType = c("polygon", "point", + "rectangle", "none"), + peakBg = NULL, peakCol = NULL, peakPch = 1, + stacked = 0, transform = identity, ...) { + transform <- match.fun(transform) + nsam <- ncol(object) + peakType <- match.arg(peakType) + if (nsam > 1) + par(mfrow = n2mfrow(nsam, 1)) + res <- vector("list", nsam) + for (i in seq_len(nsam)) { + res[[i]] <- .plot_xchromatograms_overlay( + object[, i, drop = FALSE], main = main, xlab = xlab, + ylab = ylab, xlim = xlim, ylim = ylim, col = col, + type = type, peakType = peakType, peakCol = peakCol, + peakBg = peakBg, peakPch = peakPch, + stacked = stacked, transform = transform, ...) + } + invisible(res) + }) + +.plot_single_xchromatograms <- function(x, type = "l", col = "#00000060", + peakType = c("polygon", "point", + "rectangle", "none"), + peakCol = NULL, peakBg = NULL, + peakPch = 1, yoffset = 0, + fill = NA, transform = identity, ...) { + peakType <- match.arg(peakType) + .plot_single_chromatograms(x, type = type, col = col, + yoffset = yoffset, fill = fill, + transform = transform, ...) + pks <- chromPeaks(x) + if (nrow(pks) && peakType != "none") { + .add_chromatogram_peaks(as(x, "Chromatogram"), pks, col = peakCol, + bg = peakBg, type = peakType, + pch = peakPch, yoffset = yoffset, + transform = transform, ...) + } +} + +.plot_single_chromatograms <- function(x, type = "l", col = "#00000060", + fill = NA, yoffset = 0, + transform = identity, ...) { + ints <- transform(intensity(x)) + yoffset + ints[is.infinite(ints)] <- 0 + if (!is.na(fill)) { + nnas <- !is.na(ints) + rts <- rtime(x)[nnas] + if (any(nnas)) + polygon(c(rts[1], rts, rts[length(rts)]), + c(yoffset, ints[nnas], yoffset), border = NA, col = fill) + } + plot.xy(xy.coords(rtime(x), ints), + type = type, col = col, ...) +} + +.plot_xchromatograms_overlay <- function(x, xlab = "rtime", ylab = "intensity", + type = "l", col = "#00000060", + xlim = numeric(), ylim = numeric(), + main = NULL, axes = TRUE, + frame.plot = axes, + peakType = c("polygon", + "point", + "rectangle", + "none"), + peakCol = NULL, + peakBg = NULL, + peakPch = 1, + fill = NA, + yoffset = 0, + stacked = 0, + transform = identity, ...) { + if (ncol(x) > 1) + stop(".plot_chromatograms_overlay supports only single column", + " XChromatograms") + peakType <- match.arg(peakType) + stacked <- stacked[1L] + nchr <- nrow(x) + if (length(col) != nchr) + col <- rep(col[1], nchr) + if (length(fill) != nchr) + fill <- rep(fill[1], nchr) + if(!length(xlim)) + xlim <- suppressWarnings(range(lapply(x, rtime), na.rm = TRUE)) + if(!length(ylim)) { + ylim <- transform(range(c(lapply(x, intensity), 0), na.rm = TRUE)) + ylim[is.infinite(ylim)] <- 0 + } + if (any(is.infinite(xlim))) + xlim <- c(0, 0) + if (any(is.infinite(ylim))) + ylim <- c(0, 0) + ## yposition and stacking + if (stacked != 0) { + mzs <- mz(x) + if (any(is.na(mzs))) + mzs <- seq_len(nchr) + else mzs <- rowMeans(mzs) + ymz_range <- stacked * ylim + mzr <- range(mzs) + ypos <- ymz_range[2] + (mzs - mzr[2]) * (ymz_range[2] - ymz_range[1]) / + (mzr[2] - mzr[1]) + ylim[2] <- ylim[2] + ymz_range[2] + ylab <- "" + } else + ypos <- rep(yoffset, nchr) + dev.hold() + on.exit(dev.flush()) + plot.new() + plot.window(xlim = xlim, ylim = ylim) + if (axes) { + axis(side = 1, ...) + if (stacked == 0) + axis(side = 2, ...) + } + if (frame.plot) + box(...) + if (!length(main)) { + mzs <- mz(x) + mzs <- c(suppressWarnings(min(mzs[, 1], na.rm = TRUE)), + suppressWarnings(max(mzs[, 2], na.rm = TRUE))) + main <- paste0("m/z: ", format(mzs[1], digits = 6), " - ", + format(mzs[2], digits = 6)) + } + title(main = main, xlab = xlab, ylab = ylab, ...) + ## Define colors for peaks - but only if it IS a XChromatograms. + if (is(x, "XChromatograms") && peakType != "none") { + pk_row <- chromPeaks(x)[, "row"] + np <- length(pk_row) + f <- factor(pk_row, levels = seq_len(nchr)) + if (!length(peakCol)) + peakCol <- col[f] + if (!length(peakBg)) + peakBg <- col[f] + if (length(peakCol) != np) + peakCol <- rep(peakCol[1L], np) + if (length(peakBg) != np) + peakBg <- rep(peakBg[1L], np) + for (i in order(ypos, decreasing = TRUE)) { + .plot_single_xchromatograms(x[i, 1], col = col[i], type = type, + peakType = peakType, + peakCol = peakCol[pk_row == i], + peakBg = peakBg[pk_row == i], + peakPch = peakPch, fill = fill[i], + yoffset = ypos[i], + transform = transform, ...) + } + } else { + for (i in order(ypos, decreasing = TRUE)) { + .plot_single_chromatograms(x[i, 1], col = col[i], type = type, + fill = fill[i], yoffset = ypos[i], + transform = transform, ...) + } + } + ypos +} diff --git a/R/methods-MsFeatureData.R b/R/methods-MsFeatureData.R index feede1d0b..eed77b978 100644 --- a/R/methods-MsFeatureData.R +++ b/R/methods-MsFeatureData.R @@ -1,17 +1,12 @@ ## Methods for the MsFeatureData class. #' @include functions-MsFeatureData.R do_adjustRtime-functions.R -setMethod("initialize", "MsFeatureData", function(.Object, ...) { - classVersion(.Object)["MsFeatureData"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - setValidity("MsFeatureData", function(object) { - return(validateMsFeatureData(object)) + validateMsFeatureData(object) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setMethod("show", "MsFeatureData", function(object) { cat("Object of class: ", class(object), "\n") @@ -28,28 +23,36 @@ setMethod("show", "MsFeatureData", function(object) { ## adjustedRtime: getter and setter for the adjustedRtime list. #' @noRd -#' +#' #' @rdname XCMSnExp-class setMethod("hasAdjustedRtime", "MsFeatureData", function(object) { !is.null(object$adjustedRtime) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class -setMethod("hasFeatures", "MsFeatureData", function(object) { - !is.null(object$featureDefinitions) +setMethod("hasFeatures", "MsFeatureData", function(object, + msLevel = integer()) { + if (length(msLevel) && !is.null(object$featureDefinitions) && + any(colnames(object$featureDefinitions) == "ms_level")) + any(msLevel %in% object$featureDefinitions$ms_level) + else !is.null(object$featureDefinitions) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class -setMethod("hasChromPeaks", "MsFeatureData", function(object) { - !is.null(object$chromPeaks) +setMethod("hasChromPeaks", "MsFeatureData", function(object, + msLevel = integer()) { + if (length(msLevel) && !is.null(object$chromPeaks) && + any(colnames(object$chromPeakData) == "ms_level")) + any(msLevel %in% object$chromPeakData$ms_level) + else !is.null(object$chromPeaks) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setMethod("adjustedRtime", "MsFeatureData", function(object) { if (hasAdjustedRtime(object)) @@ -58,71 +61,105 @@ setMethod("adjustedRtime", "MsFeatureData", function(object) { return(NULL) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setReplaceMethod("adjustedRtime", "MsFeatureData", function(object, value) { object$adjustedRtime <- value - if (validObject(object)) - return(object) + object }) #' @noRd -#' +#' #' @rdname XCMSnExp-class -setMethod("dropAdjustedRtime", "MsFeatureData", function(object) { +setMethod("dropAdjustedRtime", "MsFeatureData", function(object, rtraw) { if (hasAdjustedRtime(object)) { + if (hasChromPeaks(object)) { + message("Reverting retention times of identified peaks to ", + "original values ... ", appendLF = FALSE) + fts <- .applyRtAdjToChromPeaks( + chromPeaks(object), rtraw = adjustedRtime(object), + rtadj = rtraw) + chromPeaks(object) <- fts + message("OK") + } rm(list = "adjustedRtime", envir = object) } return(object) }) #' @noRd -#' +#' #' @rdname XCMSnExp-class -setMethod("featureDefinitions", "MsFeatureData", function(object) { - if (hasFeatures(object)) - return(object$featureDefinitions) - warning("No aligned feature information available.") - return(NULL) +setMethod("featureDefinitions", "MsFeatureData", function(object, + msLevel = integer()) { + if (length(object$featureDefinitions)) { + if (any(colnames(object$featureDefinitions) == "ms_level") && + length(msLevel)) + object$featureDefinitions[object$featureDefinitions$ms_level %in% + msLevel, ] + else object$featureDefinitions + } else { + warning("No aligned feature information available.", call. = FALSE) + DataFrame() + } }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setReplaceMethod("featureDefinitions", "MsFeatureData", function(object, value) { object$featureDefinitions <- value - if (validObject(object)) - return(object) + object }) #' @noRd -#' +#' #' @rdname XCMSnExp-class -setMethod("dropFeatureDefinitions", "MsFeatureData", function(object) { - if (hasFeatures(object)) - rm(list = "featureDefinitions", envir = object) - return(object) +setMethod("dropFeatureDefinitions", "MsFeatureData", + function(object, dropAdjustedRtime = FALSE) { + if (hasFeatures(object)) { + if (.hasFilledPeaks(object)) { + ## Remove filled in peaks + chromPeaks(object) <- chromPeaks( + object)[!chromPeakData(object)$is_filled, , drop = FALSE] + chromPeakData(object) <- extractROWS( + chromPeakData(object), which(!chromPeakData(object)$is_filled)) + } + if (dropAdjustedRtime) { + ## This will ensure that the retention times of the peaks + ## are restored. + object <- dropAdjustedRtime(object) + } + rm(list = "featureDefinitions", envir = object) + } + object }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setMethod("chromPeaks", "MsFeatureData", function(object) { - if (hasChromPeaks(object)) - return(object$chromPeaks) - warning("No chromatographic peaks available.") - return(NULL) + object$chromPeaks }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setReplaceMethod("chromPeaks", "MsFeatureData", function(object, value) { object$chromPeaks <- value - if (validObject(object)) - return(object) + object }) #' @noRd -#' +#' #' @rdname XCMSnExp-class setMethod("dropChromPeaks", "MsFeatureData", function(object) { if (hasChromPeaks(object)) rm(list = "chromPeaks", envir = object) - return(object) + if (.has_chrom_peak_data(object)) + rm(list = "chromPeakData", envir = object) + object +}) + +setMethod("chromPeakData", "MsFeatureData", function(object) { + .chrom_peak_data(object) +}) +setReplaceMethod("chromPeakData", "MsFeatureData", function(object, value) { + object$chromPeakData <- value + object }) diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index b71b04cbd..9715a40c6 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -43,7 +43,9 @@ #' #' @param msLevel \code{integer(1)} defining the MS level on which the peak #' detection should be performed. Defaults to \code{msLevel = 1}. -#' +#' +#' @param ... ignored. +#' #' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an #' \code{\link{XCMSnExp}} object with the results of the peak detection. #' If \code{return.type = "list"} a list of length equal to the number of @@ -58,57 +60,38 @@ setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWaveParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", - msLevel = 1L) { + msLevel = 1L, ...) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() ## Restrict to MS X data. if (length(msLevel) > 1) stop("Currently only peak detection in a single MS level is ", - "supported") - ## Restrict to MS level for peak detection, but keep the orignal - ## object. - object_mslevel <- filterMsLevel(object, msLevel. = msLevel) - if (length(object_mslevel) == 0) - stop("No MS level ", msLevel, " spectra present to perform ", - "peak detection") + "supported", call. = FALSE) ## Check if the data is centroided - suppressWarnings( - centroided <- isCentroided(object_mslevel[[1]]) - ) - ## issue #181: if there are too few mass peaks the function - ## returns NA. + centroided <- all(centroided(object)[msLevel(object) %in% msLevel]) if (is.na(centroided)) { - ## check all spectra in the file - takes longer. - centroided <- isCentroided(object_mslevel) - if (length(which(centroided)) > 0 & - length(which(!centroided)) == 0) - centroided <- TRUE - else centroided <- FALSE + idx <- which(msLevel(object) %in% msLevel) + idx <- idx[ceiling(length(idx) / 3)] + suppressWarnings( + centroided <- isCentroided(object[[idx]]) + ) } - if (!centroided) + if (is.na(centroided) || !centroided) warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") - ## (1) split the object per file. Ensure we keep adjusted - ## retention times (issue #213). - args <- list(X = 1:length(fileNames(object_mslevel)), - FUN = filterFile, object = object_mslevel) - if (hasAdjustedRtime(object_mslevel)) - args$keepAdjustedRtime <- TRUE - ## (2) use bplapply to do the peak detection. - resList <- bplapply(do.call("lapply", args), + resList <- bplapply(.split_by_file2(object, msLevel. = msLevel), FUN = findChromPeaks_OnDiskMSnExp, method = "centWave", param = param, BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", - fnames = fileNames(object_mslevel)) + fnames = fileNames(object)) if (return.type == "list") return(res$peaks) - object <- .peaks_to_result(res, object, startDate, param, msLevel, - object_mslevel) + object <- .peaks_to_result(res, object, startDate, param, msLevel) if (return.type == "xcmsSet") as(object, "xcmsSet") else object @@ -130,7 +113,7 @@ setMethod("findChromPeaks", #' defining the parallel processing mode using the #' \code{\link{register}} method from the \code{BiocParallel} #' package. -#' +#' #' @param object For \code{findChromPeaks}: an #' \code{\link{OnDiskMSnExp}} object containing the MS- and all #' other experiment-relevant data. @@ -148,7 +131,7 @@ setMethod("findChromPeaks", #' samples with matrices specifying the identified peaks. #' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object #' with the results of the peak detection. -#' +#' #' @seealso \code{\link{XCMSnExp}} for the object containing the results of #' the chromatographic peak detection. #' @@ -156,7 +139,7 @@ setMethod("findChromPeaks", setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MatchedFilterParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", - msLevel = 1L) { + msLevel = 1L, ...) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() @@ -164,18 +147,7 @@ setMethod("findChromPeaks", if (length(msLevel) > 1) stop("Currently only peak detection in a single MS level is ", "supported") - object_mslevel <- filterMsLevel(object, msLevel. = msLevel) - if (length(object_mslevel) == 0) - stop("No MS level ", msLevel, " spectra present to perform ", - "peak detection") - ## (1) split the object per file. Ensure we keep adjusted - ## retention times (issue #213). - args <- list(X = 1:length(fileNames(object_mslevel)), - FUN = filterFile, object = object_mslevel) - if (hasAdjustedRtime(object_mslevel)) - args$keepAdjustedRtime <- TRUE - ## (2) use bplapply to do the peak detection. - resList <- bplapply(do.call("lapply", args), + resList <- bplapply(.split_by_file2(object, msLevel. = msLevel), FUN = findChromPeaks_OnDiskMSnExp, method = "matchedFilter", param = param, @@ -183,11 +155,10 @@ setMethod("findChromPeaks", ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", - fnames = fileNames(object_mslevel)) + fnames = fileNames(object)) if (return.type == "list") return(res$peaks) - object <- .peaks_to_result(res, object, startDate, param, msLevel, - object_mslevel) + object <- .peaks_to_result(res, object, startDate, param, msLevel) if (return.type == "xcmsSet") as(object, "xcmsSet") else object @@ -197,21 +168,24 @@ setMethod("findChromPeaks", #' result object. #' #' @noRd -.peaks_to_result <- function(res, object, startDate, param, msLevel, - object_mslevel) { +.peaks_to_result <- function(res, object, startDate, param, msLevel) { xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.DETECTION, - fileIndex = 1:length(fileNames(object_mslevel)), + fileIndex = 1:length(fileNames(object)), msLevel = msLevel) object <- as(object, "XCMSnExp") - object@.processHistory <- c(processHistory(object), list(xph)) + phist <- object@.processHistory ## if (hasAdjustedRtime(object) | hasFeatures(object)) ## object@msFeatureData <- new("MsFeatureData") pks <- do.call(rbind, res$peaks) - if (length(pks) > 0) - chromPeaks(object) <- cbind(pks, is_filled = 0) - if (validObject(object)) - object + if (length(pks) > 0) { + chromPeaks(object) <- pks + chromPeakData(object)$ms_level <- as.integer(msLevel) + chromPeakData(object)$is_filled <- FALSE + } + object@.processHistory <- c(phist, list(xph)) + validObject(object) + object } ## massifquant @@ -258,7 +232,7 @@ setMethod("findChromPeaks", setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MassifquantParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", - msLevel = 1L) { + msLevel = 1L, ...) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() @@ -266,29 +240,17 @@ setMethod("findChromPeaks", if (length(msLevel) > 1) stop("Currently only peak detection in a single MS level is ", "supported") - object_mslevel <- filterMsLevel(object, msLevel. = msLevel) - if (length(object_mslevel) == 0) - stop("No MS level ", msLevel, " spectra present to perform ", - "peak detection") - ## (1) split the object per file. Ensure we keep adjusted - ## retention times (issue #213). - args <- list(X = 1:length(fileNames(object_mslevel)), - FUN = filterFile, object = object_mslevel) - if (hasAdjustedRtime(object_mslevel)) - args$keepAdjustedRtime <- TRUE - ## (2) use bplapply to do the peaks detection. - resList <- bplapply(do.call("lapply", args), + resList <- bplapply(.split_by_file2(object, msLevel. = msLevel), FUN = findChromPeaks_OnDiskMSnExp, method = "massifquant", param = param, BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", - fnames = fileNames(object_mslevel)) + fnames = fileNames(object)) if (return.type == "list") return(res$peaks) - object <- .peaks_to_result(res, object, startDate, param, msLevel, - object_mslevel) + object <- .peaks_to_result(res, object, startDate, param, msLevel) if (return.type == "xcmsSet") as(object, "xcmsSet") else object @@ -339,7 +301,7 @@ setMethod("findChromPeaks", setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MSWParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", - msLevel = 1L) { + msLevel = 1L, ...) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() @@ -352,19 +314,13 @@ setMethod("findChromPeaks", stop("No MS level ", msLevel, " spectra present to perform ", "peak detection") - rts <- split(rtime(object_mslevel), f = fromFile(object_mslevel)) + rts <- split(rtime(object_mslevel), + f = as.factor(fromFile(object_mslevel))) if (any(lengths(rts) > 1)) stop("The MSW method can only be applied to single spectrum,", " non-chromatographic, files (i.e. with a single ", "retention time).") - ## (1) split the object per file. Ensure we keep adjusted - ## retention times (issue #213). - args <- list(X = 1:length(fileNames(object_mslevel)), - FUN = filterFile, object = object_mslevel) - if (hasAdjustedRtime(object_mslevel)) - args$keepAdjustedRtime <- TRUE - ## (2) use bplapply to do the peak detection. - resList <- bplapply(do.call("lapply", args), + resList <- bplapply(.split_by_file2(object_mslevel), FUN = findPeaks_MSW_OnDiskMSnExp, method = "MSW", param = param, BPPARAM = BPPARAM) @@ -374,8 +330,7 @@ setMethod("findChromPeaks", fnames = fileNames(object_mslevel)) if (return.type == "list") return(res$peaks) - object <- .peaks_to_result(res, object, startDate, param, msLevel, - object_mslevel) + object <- .peaks_to_result(res, object, startDate, param, msLevel) if (return.type == "xcmsSet") as(object, "xcmsSet") else object @@ -400,9 +355,9 @@ setMethod("findChromPeaks", #' #' @param param An \code{CentWavePredIsoParam} object with the settings for the #' chromatographic peak detection algorithm. -#' +#' #' @inheritParams findChromPeaks-centWave -#' +#' #' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an #' \code{\link{XCMSnExp}} object with the results of the peak detection. #' If \code{return.type = "list"} a list of length equal to the number of @@ -417,7 +372,7 @@ setMethod("findChromPeaks", setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWavePredIsoParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", - msLevel = 1L) { + msLevel = 1L, ...) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() @@ -425,47 +380,30 @@ setMethod("findChromPeaks", if (length(msLevel) > 1) stop("Currently only peak detection in a single MS level is ", "supported") - object_mslevel <- filterMsLevel(object, msLevel. = msLevel) - if (length(object_mslevel) == 0) - stop("No MS level ", msLevel, " spectra present to perform ", - "peak detection") ## Check if the data is centroided - suppressWarnings( - centroided <- isCentroided(object_mslevel[[1]]) - ) - ## issue #181: if there are too few mass peaks the function - ## returns NA. + centroided <- all(centroided(object)[msLevel(object) %in% msLevel]) if (is.na(centroided)) { - ## check all spectra in the file - takes longer. - centroided <- isCentroided(object_mslevel) - if (length(which(centroided)) > 0 & - length(which(!centroided)) == 0) - centroided <- TRUE - else centroided <- FALSE + idx <- which(msLevel(object) %in% msLevel) + idx <- idx[ceiling(length(idx) / 3)] + suppressWarnings( + centroided <- isCentroided(object[[idx]]) + ) } - if (!centroided) + if (is.na(centroided) || !centroided) warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") - ## (1) split the object per file. Ensure we keep adjusted - ## retention times (issue #213). - args <- list(X = 1:length(fileNames(object_mslevel)), - FUN = filterFile, object = object_mslevel) - if (hasAdjustedRtime(object_mslevel)) - args$keepAdjustedRtime <- TRUE - ## (2) use bplapply to do the peak detection. - resList <- bplapply(do.call("lapply", args), + resList <- bplapply(.split_by_file2(object, msLevel. = msLevel), FUN = findChromPeaks_OnDiskMSnExp, method = "centWaveWithPredIsoROIs", param = param, BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", - fnames = fileNames(object_mslevel)) + fnames = fileNames(object)) if (return.type == "list") return(res$peaks) - object <- .peaks_to_result(res, object, startDate, param, msLevel, - object_mslevel) + object <- .peaks_to_result(res, object, startDate, param, msLevel) if (return.type == "xcmsSet") as(object, "xcmsSet") else object @@ -480,7 +418,7 @@ setMethod("findChromPeaks", #' the various binning methods. #' #' @param ... Additional parameters. -#' +#' #' @return For \code{profMat}: a \code{list} with a the profile matrix #' \code{matrix} (or matrices if \code{fileIndex} was not specified or if #' \code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for @@ -522,28 +460,38 @@ setMethod("profMat", signature(object = "OnDiskMSnExp"), function(object, sps <- spectra(z, BPPARAM = SerialParam()) mzs <- lapply(sps, mz) ## Fix for issue #301: got spectra with m/z being NA. - if (any(is.na(unlist(mzs)))) { + if (any(is.na(unlist(mzs, use.names = FALSE)))) { sps <- lapply(sps, clean, all = TRUE) mzs <- lapply(sps, mz) } - ## Fix for issue #312: remove empty spectra + ## Fix for issue #312: remove empty spectra, that we are however adding + ## later so that the ncol(profMat) == length(rtime(object)) pk_count <- lengths(mzs) - mzs <- mzs[pk_count > 0] - sps <- sps[pk_count > 0] + empty_spectra <- which(pk_count == 0) + if (length(empty_spectra)) { + mzs <- mzs[-empty_spectra] + sps <- sps[-empty_spectra] + } vps <- lengths(mzs, use.names = FALSE) - .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), - int = unlist(lapply(sps, intensity), - use.names = FALSE), - valsPerSpect = vps, - method = bmethod, - step = bstep, - baselevel = bbaselevel, - basespace = bbasespace, - mzrange. = bmzrange., - returnBreaks = breturnBreaks) + res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), + int = unlist(lapply(sps, intensity), + use.names = FALSE), + valsPerSpect = vps, + method = bmethod, + step = bstep, + baselevel = bbaselevel, + basespace = bbasespace, + mzrange. = bmzrange., + returnBreaks = breturnBreaks) + if (length(empty_spectra)) + if (returnBreaks) + res$profMat <- .insertColumn(res$profMat, empty_spectra, 0) + else + res <- .insertColumn(res, empty_spectra, 0) + res }, bmethod = method, bstep = step, bbaselevel = baselevel, bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks) - return(res) + res }) #' @rdname adjustRtime-obiwarp @@ -595,7 +543,8 @@ setMethod("adjustRtime", res <- rtime_all } res <- unlist(res, use.names = FALSE) - sNames <- unlist(split(featureNames(object), fromFile(object)), + sNames <- unlist(split(featureNames(object), + as.factor(fromFile(object))), use.names = FALSE) names(res) <- sNames res <- res[featureNames(object)] @@ -614,3 +563,26 @@ setMethod("hasAdjustedRtime", signature(object = "OnDiskMSnExp"), function(object) FALSE ) + +#' @title Extract isolation window target m/z definition +#' +#' @aliases isolationWindowTargetMz +#' +#' @description +#' +#' `isolationWindowTargetMz` extracts the isolation window target m/z definition +#' for each spectrum in `object`. +#' +#' @param object [OnDiskMSnExp-class] object. +#' +#' @return a `numeric` of length equal to the number of spectra in `object` with +#' the isolation window target m/z or `NA` if not specified/available. +#' +#' @author Johannes Rainer +#' +#' @md +setMethod("isolationWindowTargetMz", "OnDiskMSnExp", function(object) { + if ("isolationWindowTargetMZ" %in% colnames(.fdata(object))) + return(.fdata(object)$isolationWindowTargetMZ) + rep(NA_real_, length(object)) +}) diff --git a/R/methods-Params.R b/R/methods-Params.R index f84179049..d13d20f52 100644 --- a/R/methods-Params.R +++ b/R/methods-Params.R @@ -1,82 +1,17 @@ ## Methods for the Param class and sub-classes #' @include functions-Params.R -############################################################ -## Param -### -setMethod("as.list", signature(x = "Param"), function(x, ...) { - return(.param2list(x)) -}) -## The 'setAs' method. -setAs("Param" ,"list", function(from){ - return(.param2list(from)) -}) -setMethod("initialize", "Param", function(.Object, ...) { - classVersion(.Object)["Param"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -############################################################ -## GenericParam -### -setMethod("initialize", "GenericParam", function(.Object, ...) { - classVersion(.Object)["GenericParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) -#' @param object \code{GenericParam} object. -#' -#' @rdname GenericParam -setMethod("show", "GenericParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat(" fun:", object@fun, "\n") - cat(" arguments:\n") - if (length(object@args) > 0) { - for (i in 1:length(object@args)) { - if (!is.null(names(object@args))) - cat(" ", names(object@args)[i], "= ") - cat(object@args[[i]], "\n") - } - } -}) - -############################################################ -## CentWaveParam -### -setMethod("initialize", "CentWaveParam", function(.Object, ...) { - classVersion(.Object)["CentWaveParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname findChromPeaks-centWave -setMethod("show", "CentWaveParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" ppm:", ppm(object), "\n") - cat(" peakwidth:", paste(peakwidth(object), collapse = ", "), "\n") - cat(" snthresh:", snthresh(object), "\n") - cat(" prefilter:", paste(prefilter(object), collapse = ", "), "\n") - cat(" mzCenterFun:", mzCenterFun(object), "\n") - cat(" integrate:", integrate(object), "\n") - cat(" mzdiff:", mzdiff(object), "\n") - cat(" fitgauss:", fitgauss(object), "\n") - cat(" noise:", noise(object), "\n") - cat(" verboseColumns:", verboseColumns(object), "\n") - cat(" roiList length:", length(roiList(object)), "\n") - cat(" firstBaselineCheck", firstBaselineCheck(object), "\n") - cat(" roiScales length:", length(roiScales(object)), "\n") -}) - #' @aliases ppm -#' +#' #' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} #' slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("ppm", "CentWaveParam", function(object){ return(object@ppm)}) #' @aliases ppm<- -#' +#' #' @param value The value for the slot. -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("ppm", "CentWaveParam", function(object, value) { object@ppm <- value @@ -85,15 +20,15 @@ setReplaceMethod("ppm", "CentWaveParam", function(object, value) { }) #' @aliases peakwidth -#' +#' #' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the #' \code{peakwidth} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("peakwidth", "CentWaveParam", function(object) return(object@peakwidth)) #' @aliases peakwidth<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("peakwidth", "CentWaveParam", function(object, value) { object@peakwidth <- value @@ -102,15 +37,15 @@ setReplaceMethod("peakwidth", "CentWaveParam", function(object, value) { }) #' @aliases snthresh -#' +#' #' @description \code{snthresh},\code{snthresh<-}: getter and setter for the #' \code{snthresh} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("snthresh", "CentWaveParam", function(object) return(object@snthresh)) #' @aliases snthresh<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("snthresh", "CentWaveParam", function(object, value) { object@snthresh <- value @@ -119,15 +54,15 @@ setReplaceMethod("snthresh", "CentWaveParam", function(object, value) { }) #' @aliases prefilter -#' +#' #' @description \code{prefilter},\code{prefilter<-}: getter and setter for the #' \code{prefilter} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("prefilter", "CentWaveParam", function(object) return(object@prefilter)) #' @aliases prefilter<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("prefilter", "CentWaveParam", function(object, value) { object@prefilter <- value @@ -136,15 +71,15 @@ setReplaceMethod("prefilter", "CentWaveParam", function(object, value) { }) #' @aliases mzCenterFun -#' +#' #' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the #' \code{mzCenterFun} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("mzCenterFun", "CentWaveParam", function(object) return(object@mzCenterFun)) #' @aliases mzCenterFun<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("mzCenterFun", "CentWaveParam", function(object, value) { object@mzCenterFun <- value @@ -154,14 +89,14 @@ setReplaceMethod("mzCenterFun", "CentWaveParam", function(object, value) { #' @description \code{integrate},\code{integrate<-}: getter and setter for the #' \code{integrate} slot of the object. -#' +#' #' @param f For \code{integrate}: a \code{CentWaveParam} object. #' #' @rdname findChromPeaks-centWave setMethod("integrate", signature(f = "CentWaveParam"), function(f) return(f@integrate)) #' @aliases integrate<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("integrate", "CentWaveParam", function(object, value) { object@integrate <- as.integer(value) @@ -170,15 +105,15 @@ setReplaceMethod("integrate", "CentWaveParam", function(object, value) { }) #' @aliases mzdiff -#' +#' #' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the #' \code{mzdiff} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("mzdiff", "CentWaveParam", function(object) return(object@mzdiff)) #' @aliases mzdiff<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("mzdiff", "CentWaveParam", function(object, value) { object@mzdiff <- value @@ -187,15 +122,15 @@ setReplaceMethod("mzdiff", "CentWaveParam", function(object, value) { }) #' @aliases fitgauss -#' +#' #' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the #' \code{fitgauss} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("fitgauss", "CentWaveParam", function(object) return(object@fitgauss)) #' @aliases fitgauss<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("fitgauss", "CentWaveParam", function(object, value) { object@fitgauss <- value @@ -204,15 +139,15 @@ setReplaceMethod("fitgauss", "CentWaveParam", function(object, value) { }) #' @aliases noise -#' +#' #' @description \code{noise},\code{noise<-}: getter and setter for the #' \code{noise} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("noise", "CentWaveParam", function(object) return(object@noise)) #' @aliases noise<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("noise", "CentWaveParam", function(object, value) { object@noise <- value @@ -221,15 +156,15 @@ setReplaceMethod("noise", "CentWaveParam", function(object, value) { }) #' @aliases verboseColumns -#' +#' #' @description \code{verboseColumns},\code{verboseColumns<-}: getter and #' setter for the \code{verboseColumns} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("verboseColumns", "CentWaveParam", function(object) return(object@verboseColumns)) #' @aliases verboseColumns<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("verboseColumns", "CentWaveParam", function(object, value) { object@verboseColumns <- value @@ -238,15 +173,15 @@ setReplaceMethod("verboseColumns", "CentWaveParam", function(object, value) { }) #' @aliases roiList -#' +#' #' @description \code{roiList},\code{roiList<-}: getter and setter for the #' \code{roiList} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("roiList", "CentWaveParam", function(object) return(object@roiList)) #' @aliases roiList<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("roiList", "CentWaveParam", function(object, value) { object@roiList <- value @@ -255,15 +190,15 @@ setReplaceMethod("roiList", "CentWaveParam", function(object, value) { }) #' @aliases firstBaselineCheck -#' +#' #' @description \code{fistBaselineCheck},\code{firstBaselineCheck<-}: getter #' and setter for the \code{firstBaselineCheck} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("firstBaselineCheck", "CentWaveParam", function(object) return(object@firstBaselineCheck)) #' @aliases firstBaselineCheck<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("firstBaselineCheck", "CentWaveParam", function(object, value) { object@firstBaselineCheck <- value @@ -272,15 +207,15 @@ setReplaceMethod("firstBaselineCheck", "CentWaveParam", function(object, value) }) #' @aliases roiScales -#' +#' #' @description \code{roiScales},\code{roiScales<-}: getter and setter for the #' \code{roiScales} slot of the object. -#' +#' #' @rdname findChromPeaks-centWave setMethod("roiScales", "CentWaveParam", function(object) return(object@roiScales)) #' @aliases roiScales<- -#' +#' #' @rdname findChromPeaks-centWave setReplaceMethod("roiScales", "CentWaveParam", function(object, value) { object@roiScales <- value @@ -291,37 +226,17 @@ setReplaceMethod("roiScales", "CentWaveParam", function(object, value) { ############################################################ ## MatchedFilterParam -setMethod("initialize", "MatchedFilterParam", function(.Object, ...) { - classVersion(.Object)["MatchedFilterParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) -#' @rdname findChromPeaks-matchedFilter -setMethod("show", "MatchedFilterParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" binSize:", binSize(object), "\n") - cat(" impute:", impute(object), "\n") - cat(" baseValue:", baseValue(object), "\n") - cat(" distance:", distance(object), "\n") - cat(" fwhm:", fwhm(object), "\n") - cat(" sigma:", sigma(object), "\n") - cat(" max:", max(object), "\n") - cat(" snthresh:", snthresh(object), "\n") - cat(" steps:", steps(object), "\n") - cat(" mzdiff:", mzdiff(object), "\n") - cat(" index:", index(object), "\n") -}) #' @aliases binSize -#' +#' #' @description \code{binSize},\code{binSize<-}: getter and setter for the #' \code{binSize} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("binSize", "MatchedFilterParam", function(object) return(object@binSize)) #' @aliases binSize<- -#' +#' #' @param value The value for the slot. #' #' @rdname findChromPeaks-matchedFilter @@ -333,12 +248,12 @@ setReplaceMethod("binSize", "MatchedFilterParam", function(object, value) { #' @description \code{impute},\code{impute<-}: getter and setter for the #' \code{impute} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("impute", "MatchedFilterParam", function(object) return(object@impute)) #' @aliases impute<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("impute", "MatchedFilterParam", function(object, value) { object@impute <- value @@ -350,12 +265,12 @@ setReplaceMethod("impute", "MatchedFilterParam", function(object, value) { #' #' @description \code{baseValue},\code{baseValue<-}: getter and setter for the #' \code{baseValue} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("baseValue", "MatchedFilterParam", function(object) return(object@baseValue)) #' @aliases baseValue<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("baseValue", "MatchedFilterParam", function(object, value) { object@baseValue <- value @@ -364,15 +279,15 @@ setReplaceMethod("baseValue", "MatchedFilterParam", function(object, value) { }) #' @aliases distance -#' +#' #' @description \code{distance},\code{distance<-}: getter and setter for the #' \code{distance} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("distance", "MatchedFilterParam", function(object) return(object@distance)) #' @aliases distance<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("distance", "MatchedFilterParam", function(object, value) { object@distance <- value @@ -381,15 +296,15 @@ setReplaceMethod("distance", "MatchedFilterParam", function(object, value) { }) #' @aliases fwhm -#' +#' #' @description \code{fwhm},\code{fwhm<-}: getter and setter for the #' \code{fwhm} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("fwhm", "MatchedFilterParam", function(object) return(object@fwhm)) #' @aliases fwhm<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("fwhm", "MatchedFilterParam", function(object, value) { object@fwhm <- value @@ -398,15 +313,15 @@ setReplaceMethod("fwhm", "MatchedFilterParam", function(object, value) { }) #' @aliases sigma -#' +#' #' @description \code{sigma},\code{sigma<-}: getter and setter for the #' \code{sigma} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("sigma", "MatchedFilterParam", function(object) return(object@sigma)) #' @aliases sigma<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("sigma", "MatchedFilterParam", function(object, value) { object@sigma <- value @@ -416,14 +331,14 @@ setReplaceMethod("sigma", "MatchedFilterParam", function(object, value) { #' @description \code{max},\code{max<-}: getter and setter for the #' \code{max} slot of the object. -#' +#' #' @param x For \code{max}: a \code{MatchedFilterParam} object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("max", signature(x="MatchedFilterParam"), function(x) return(x@max)) #' @aliases max<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("max", "MatchedFilterParam", function(object, value) { object@max <- value @@ -433,7 +348,7 @@ setReplaceMethod("max", "MatchedFilterParam", function(object, value) { #' @description \code{snthresh},\code{snthresh<-}: getter and setter for the #' \code{snthresh} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("snthresh", "MatchedFilterParam", function(object) return(object@snthresh)) @@ -445,15 +360,15 @@ setReplaceMethod("snthresh", "MatchedFilterParam", function(object, value) { }) #' @aliases steps -#' +#' #' @description \code{steps},\code{steps<-}: getter and setter for the #' \code{steps} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("steps", "MatchedFilterParam", function(object) return(object@steps)) #' @aliases steps<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("steps", "MatchedFilterParam", function(object, value) { object@steps <- value @@ -463,7 +378,7 @@ setReplaceMethod("steps", "MatchedFilterParam", function(object, value) { #' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the #' \code{mzdiff} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("mzdiff", "MatchedFilterParam", function(object) return(object@mzdiff)) @@ -475,15 +390,15 @@ setReplaceMethod("mzdiff", "MatchedFilterParam", function(object, value) { }) #' @aliases index -#' +#' #' @description \code{index},\code{index<-}: getter and setter for the #' \code{index} slot of the object. -#' +#' #' @rdname findChromPeaks-matchedFilter setMethod("index", "MatchedFilterParam", function(object) return(object@index)) #' @aliases index<- -#' +#' #' @rdname findChromPeaks-matchedFilter setReplaceMethod("index", "MatchedFilterParam", function(object, value) { object@index <- value @@ -494,39 +409,14 @@ setReplaceMethod("index", "MatchedFilterParam", function(object, value) { ############################################################ ## MassifquantParam ### -setMethod("initialize", "MassifquantParam", function(.Object, ...) { - classVersion(.Object)["MassifquantParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname findChromPeaks-massifquant -setMethod("show", "MassifquantParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" ppm:", ppm(object), "\n") - cat(" peakwidth:", paste(peakwidth(object), collapse = ", "), "\n") - cat(" snthresh:", snthresh(object), "\n") - cat(" prefilter:", paste(prefilter(object), collapse = ", "), "\n") - cat(" mzCenterFun:", mzCenterFun(object), "\n") - cat(" integrate:", integrate(object), "\n") - cat(" mzdiff:", mzdiff(object), "\n") - cat(" fitgauss:", fitgauss(object), "\n") - cat(" noise:", noise(object), "\n") - cat(" verboseColumns:", verboseColumns(object), "\n") - cat(" criticalValue:", criticalValue(object), "\n") - cat(" consecMissedLimit:", consecMissedLimit(object), "\n") - cat(" unions:", unions(object), "\n") - cat(" checkBack:", checkBack(object), "\n") - cat(" withWave:", withWave(object), "\n") -}) #' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} #' slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("ppm", "MassifquantParam", function(object){ return(object@ppm)}) #' @param value The value for the slot. -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("ppm", "MassifquantParam", function(object, value) { object@ppm <- value @@ -536,7 +426,7 @@ setReplaceMethod("ppm", "MassifquantParam", function(object, value) { #' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the #' \code{peakwidth} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("peakwidth", "MassifquantParam", function(object) return(object@peakwidth)) @@ -549,7 +439,7 @@ setReplaceMethod("peakwidth", "MassifquantParam", function(object, value) { #' @description \code{snthresh},\code{snthresh<-}: getter and setter for the #' \code{snthresh} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("snthresh", "MassifquantParam", function(object) return(object@snthresh)) @@ -562,7 +452,7 @@ setReplaceMethod("snthresh", "MassifquantParam", function(object, value) { #' @description \code{prefilter},\code{prefilter<-}: getter and setter for the #' \code{prefilter} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("prefilter", "MassifquantParam", function(object) return(object@prefilter)) @@ -575,7 +465,7 @@ setReplaceMethod("prefilter", "MassifquantParam", function(object, value) { #' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the #' \code{mzCenterFun} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("mzCenterFun", "MassifquantParam", function(object) return(object@mzCenterFun)) @@ -588,7 +478,7 @@ setReplaceMethod("mzCenterFun", "MassifquantParam", function(object, value) { #' @description \code{integrate},\code{integrate<-}: getter and setter for the #' \code{integrate} slot of the object. -#' +#' #' @param f For \code{integrate}: a \code{MassifquantParam} object. #' #' @rdname findChromPeaks-massifquant @@ -603,7 +493,7 @@ setReplaceMethod("integrate", "MassifquantParam", function(object, value) { #' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the #' \code{mzdiff} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("mzdiff", "MassifquantParam", function(object) return(object@mzdiff)) @@ -616,7 +506,7 @@ setReplaceMethod("mzdiff", "MassifquantParam", function(object, value) { #' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the #' \code{fitgauss} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("fitgauss", "MassifquantParam", function(object) return(object@fitgauss)) @@ -629,7 +519,7 @@ setReplaceMethod("fitgauss", "MassifquantParam", function(object, value) { #' @description \code{noise},\code{noise<-}: getter and setter for the #' \code{noise} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("noise", "MassifquantParam", function(object) return(object@noise)) @@ -642,7 +532,7 @@ setReplaceMethod("noise", "MassifquantParam", function(object, value) { #' @description \code{verboseColumns},\code{verboseColumns<-}: getter and #' setter for the \code{verboseColumns} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("verboseColumns", "MassifquantParam", function(object) return(object@verboseColumns)) @@ -654,15 +544,15 @@ setReplaceMethod("verboseColumns", "MassifquantParam", function(object, value) { }) #' @aliases criticalValue -#' +#' #' @description \code{criticalValue},\code{criticalValue<-}: getter and #' setter for the \code{criticalValue} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("criticalValue", "MassifquantParam", function(object) return(object@criticalValue)) #' @aliases criticalValue<- -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("criticalValue", "MassifquantParam", function(object, value) { object@criticalValue <- value @@ -671,15 +561,15 @@ setReplaceMethod("criticalValue", "MassifquantParam", function(object, value) { }) #' @aliases consecMissedLimit -#' +#' #' @description \code{consecMissedLimit},\code{consecMissedLimit<-}: getter and #' setter for the \code{consecMissedLimit} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("consecMissedLimit", "MassifquantParam", function(object) return(object@consecMissedLimit)) #' @aliases consecMissedLimit<- -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("consecMissedLimit", "MassifquantParam", function(object, value) { @@ -689,15 +579,15 @@ setReplaceMethod("consecMissedLimit", "MassifquantParam", }) #' @aliases unions -#' +#' #' @description \code{unions},\code{unions<-}: getter and #' setter for the \code{unions} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("unions", "MassifquantParam", function(object) return(object@unions)) #' @aliases unions<- -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("unions", "MassifquantParam", function(object, value) { object@unions <- as.integer(value) @@ -706,15 +596,15 @@ setReplaceMethod("unions", "MassifquantParam", function(object, value) { }) #' @aliases checkBack -#' +#' #' @description \code{checkBack},\code{checkBack<-}: getter and #' setter for the \code{checkBack} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("checkBack", "MassifquantParam", function(object) return(object@checkBack)) #' @aliases checkBack<- -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("checkBack", "MassifquantParam", function(object, value) { object@checkBack <- as.integer(value) @@ -723,15 +613,15 @@ setReplaceMethod("checkBack", "MassifquantParam", function(object, value) { }) #' @aliases withWave -#' +#' #' @description \code{withWave},\code{withWave<-}: getter and #' setter for the \code{withWave} slot of the object. -#' +#' #' @rdname findChromPeaks-massifquant setMethod("withWave", "MassifquantParam", function(object) return(object@withWave)) #' @aliases withWave<- -#' +#' #' @rdname findChromPeaks-massifquant setReplaceMethod("withWave", "MassifquantParam", function(object, value) { object@withWave <- value @@ -743,41 +633,14 @@ setReplaceMethod("withWave", "MassifquantParam", function(object, value) { ############################################################ ## MSWParam ### -setMethod("initialize", "MSWParam", function(.Object, ...) { - classVersion(.Object)["MSWParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname findPeaks-MSW -setMethod("show", "MSWParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" snthresh:", snthresh(object), "\n") - cat(" verboseColumns:", verboseColumns(object), "\n") - cat(" scales:", paste(scales(object), collapse = ","), "\n") - cat(" nearbyPeak:", nearbyPeak(object), "\n") - cat(" peakScaleRange:", peakScaleRange(object), "\n") - cat(" ampTh:", ampTh(object), "\n") - cat(" minNoiseLevel:", minNoiseLevel(object), "\n") - cat(" ridgeLength:", ridgeLength(object), "\n") - cat(" peakThr:", peakThr(object), "\n") - cat(" tuneIn:", tuneIn(object), "\n") - parms <- addParams(object) - if (length(parms) > 0) { - cat(" additional parameters:\n") - for (i in 1:length(parms)) { - cat(" ", names(parms)[i], ": ", parms[[i]], "\n", sep = "") - } - } -}) #' @description \code{snthresh},\code{snthresh<-}: getter and setter for the #' \code{snthresh} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("snthresh", "MSWParam", function(object){ return(object@snthresh)}) #' @param value The value for the slot. -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("snthresh", "MSWParam", function(object, value) { object@snthresh <- value @@ -787,7 +650,7 @@ setReplaceMethod("snthresh", "MSWParam", function(object, value) { #' @description \code{verboseColumns},\code{verboseColumns<-}: getter and setter #' for the \code{verboseColumns} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("verboseColumns", "MSWParam", function(object){ return(object@verboseColumns)}) @@ -799,14 +662,14 @@ setReplaceMethod("verboseColumns", "MSWParam", function(object, value) { }) #' @aliases scales -#' +#' #' @description \code{scales},\code{scales<-}: getter and setter for the #' \code{scales} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("scales", "MSWParam", function(object){ return(object@scales)}) #' @aliases scales<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("scales", "MSWParam", function(object, value) { object@scales <- value @@ -815,14 +678,14 @@ setReplaceMethod("scales", "MSWParam", function(object, value) { }) #' @aliases nearbyPeak -#' +#' #' @description \code{nearbyPeak},\code{nearbyPeak<-}: getter and setter for the #' \code{nearbyPeak} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("nearbyPeak", "MSWParam", function(object){ return(object@nearbyPeak)}) #' @aliases nearbyPeak<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("nearbyPeak", "MSWParam", function(object, value) { object@nearbyPeak <- value @@ -831,15 +694,15 @@ setReplaceMethod("nearbyPeak", "MSWParam", function(object, value) { }) #' @aliases peakScaleRange -#' +#' #' @description \code{peakScaleRange},\code{peakScaleRange<-}: getter and setter #' for the \code{peakScaleRange} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("peakScaleRange", "MSWParam", function(object){ return(object@peakScaleRange)}) #' @aliases peakScaleRange<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("peakScaleRange", "MSWParam", function(object, value) { object@peakScaleRange <- value @@ -848,14 +711,14 @@ setReplaceMethod("peakScaleRange", "MSWParam", function(object, value) { }) #' @aliases ampTh -#' +#' #' @description \code{ampTh},\code{ampTh<-}: getter and setter for the #' \code{ampTh} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("ampTh", "MSWParam", function(object){ return(object@ampTh)}) #' @aliases ampTh<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("ampTh", "MSWParam", function(object, value) { object@ampTh <- value @@ -864,15 +727,15 @@ setReplaceMethod("ampTh", "MSWParam", function(object, value) { }) #' @aliases minNoiseLevel -#' +#' #' @description \code{minNoiseLevel},\code{minNoiseLevel<-}: getter and setter #' for the \code{minNoiseLevel} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("minNoiseLevel", "MSWParam", function(object){ return(object@minNoiseLevel)}) #' @aliases minNoiseLevel<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("minNoiseLevel", "MSWParam", function(object, value) { object@minNoiseLevel <- value @@ -881,15 +744,15 @@ setReplaceMethod("minNoiseLevel", "MSWParam", function(object, value) { }) #' @aliases ridgeLength -#' +#' #' @description \code{ridgeLength},\code{ridgeLength<-}: getter and setter for #' the \code{ridgeLength} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("ridgeLength", "MSWParam", function(object){ return(object@ridgeLength)}) #' @aliases ridgeLength<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("ridgeLength", "MSWParam", function(object, value) { object@ridgeLength <- value @@ -898,14 +761,14 @@ setReplaceMethod("ridgeLength", "MSWParam", function(object, value) { }) #' @aliases peakThr -#' +#' #' @description \code{peakThr},\code{peakThr<-}: getter and setter for the #' \code{peakThr} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("peakThr", "MSWParam", function(object){ return(object@peakThr)}) #' @aliases peakThr<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("peakThr", "MSWParam", function(object, value) { object@peakThr <- value @@ -914,14 +777,14 @@ setReplaceMethod("peakThr", "MSWParam", function(object, value) { }) #' @aliases tuneIn -#' +#' #' @description \code{tuneIn},\code{tuneIn<-}: getter and setter for the #' \code{tuneIn} slot of the object. -#' +#' #' @rdname findPeaks-MSW setMethod("tuneIn", "MSWParam", function(object){ return(object@tuneIn)}) #' @aliases tuneIn<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("tuneIn", "MSWParam", function(object, value) { object@tuneIn <- value @@ -930,18 +793,18 @@ setReplaceMethod("tuneIn", "MSWParam", function(object, value) { }) #' @aliases addParams -#' +#' #' @description \code{addParams},\code{addParams<-}: getter and setter for the #' \code{addParams} slot of the object. This slot stores optional additional #' parameters to be passed to the #' \code{\link{identifyMajorPeaks}} and -#' \code{\link{sav.gol}} functions from the +#' \code{\link{peakDetectionCWT}} functions from the #' \code{MassSpecWavelet} package. #' #' @rdname findPeaks-MSW setMethod("addParams", "MSWParam", function(object){ return(object@addParams)}) #' @aliases addParams<- -#' +#' #' @rdname findPeaks-MSW setReplaceMethod("addParams", "MSWParam", function(object, value) { object@addParams <- value @@ -954,58 +817,30 @@ setAs("MSWParam" ,"list", function(from){ ## Rename ampTh to amp.Th names(L) <- sub(names(L), pattern = "ampTh", replacement = "amp.Th", fixed = TRUE) - return(L) + L }) setMethod("as.list", signature(x = "MSWParam"), function(x, ...) { L <- .param2list(x) ## Rename ampTh to amp.Th names(L) <- sub(names(L), pattern = "ampTh", replacement = "amp.Th", fixed = TRUE) - return(L) + L }) ############################################################ ## CentWavePredIsoParam ### -setMethod("initialize", "CentWavePredIsoParam", function(.Object, ...) { - classVersion(.Object)["CentWavePredIsoParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname findChromPeaks-centWaveWithPredIsoROIs -setMethod("show", "CentWavePredIsoParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" ppm:", ppm(object), "\n") - cat(" peakwidth:", paste(peakwidth(object), collapse = ", "), "\n") - cat(" snthresh:", snthresh(object), "\n") - cat(" prefilter:", paste(prefilter(object), collapse = ", "), "\n") - cat(" mzCenterFun:", mzCenterFun(object), "\n") - cat(" integrate:", integrate(object), "\n") - cat(" mzdiff:", mzdiff(object), "\n") - cat(" fitgauss:", fitgauss(object), "\n") - cat(" noise:", noise(object), "\n") - cat(" verboseColumns:", verboseColumns(object), "\n") - cat(" roiList length:", length(roiList(object)), "\n") - cat(" firstBaselineCheck", firstBaselineCheck(object), "\n") - cat(" roiScales length:", length(roiScales(object)), "\n") - cat(" snthreshIsoROIs:", snthreshIsoROIs(object), "\n") - cat(" maxCharge:", maxCharge(object), "\n") - cat(" maxIso:", maxIso(object), "\n") - cat(" mzIntervalExtension:", mzIntervalExtension(object), "\n") - cat(" polarity:", polarity(object), "\n") -}) #' @aliases snthreshIsoROIs -#' +#' #' @description \code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and #' setter for the \code{snthreshIsoROIs} slot of the object. -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("snthreshIsoROIs", "CentWavePredIsoParam", function(object){ return(object@snthreshIsoROIs)}) #' @aliases snthreshIsoROIs<- -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("snthreshIsoROIs", "CentWavePredIsoParam", function(object, value) { object@snthreshIsoROIs <- value @@ -1014,15 +849,15 @@ setReplaceMethod("snthreshIsoROIs", "CentWavePredIsoParam", function(object, val }) #' @aliases maxCharge -#' +#' #' @description \code{maxCharge},\code{maxCharge<-}: getter and #' setter for the \code{maxCharge} slot of the object. -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("maxCharge", "CentWavePredIsoParam", function(object){ return(object@maxCharge)}) #' @aliases maxCharge<- -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("maxCharge", "CentWavePredIsoParam", function(object, value) { object@maxCharge <- as.integer(value) @@ -1031,15 +866,15 @@ setReplaceMethod("maxCharge", "CentWavePredIsoParam", function(object, value) { }) #' @aliases maxIso -#' +#' #' @description \code{maxIso},\code{maxIso<-}: getter and #' setter for the \code{maxIso} slot of the object. -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("maxIso", "CentWavePredIsoParam", function(object){ return(object@maxIso)}) #' @aliases maxIso<- -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("maxIso", "CentWavePredIsoParam", function(object, value) { object@maxIso <- as.integer(value) @@ -1048,15 +883,15 @@ setReplaceMethod("maxIso", "CentWavePredIsoParam", function(object, value) { }) #' @aliases mzIntervalExtension -#' +#' #' @description \code{mzIntervalExtension},\code{mzIntervalExtension<-}: getter #' and setter for the \code{mzIntervalExtension} slot of the object. -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("mzIntervalExtension", "CentWavePredIsoParam", function(object){ return(object@mzIntervalExtension)}) #' @aliases mzIntervalExtension<- -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("mzIntervalExtension", "CentWavePredIsoParam", function(object, value) { @@ -1067,12 +902,12 @@ setReplaceMethod("mzIntervalExtension", "CentWavePredIsoParam", #' @description \code{polarity},\code{polarity<-}: getter and #' setter for the \code{polarity} slot of the object. -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("polarity", "CentWavePredIsoParam", function(object){ return(object@polarity)}) #' @aliases polarity<- -#' +#' #' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("polarity", "CentWavePredIsoParam", function(object, value) { object@polarity <- value @@ -1083,38 +918,25 @@ setReplaceMethod("polarity", "CentWavePredIsoParam", function(object, value) { ############################################################ ## PeakDensityParam -setMethod("initialize", "PeakDensityParam", function(.Object, ...) { - classVersion(.Object)["PeakDensityParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname groupChromPeaks-density -setMethod("show", "PeakDensityParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" sampleGroups:", class(object@sampleGroups), "of length", - length(object@sampleGroups), "\n") - cat(" bw:", object@bw, "\n") - cat(" minFraction:", minFraction(object), "\n") - cat(" minSamples:", minSamples(object), "\n") - cat(" binSize:", binSize(object), "\n") - cat(" maxFeatures:", maxFeatures(object), "\n") -}) #' @aliases sampleGroups -#' -#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter -#' for the \code{sampleGroups} slot of the object. Its length should match +#' +#' @description `sampleGroups`,`sampleGroups<-`: getter and setter +#' for the `sampleGroups` slot of the object. Its length should match #' the number of samples in the experiment and it should not contain -#' \code{NA}s. -#' +#' `NA`s. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("sampleGroups", "PeakDensityParam", function(object){ return(object@sampleGroups)}) #' @aliases sampleGroups<- -#' +#' #' @param value The value for the slot. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-density setReplaceMethod("sampleGroups", "PeakDensityParam", function(object, value) { if (length(value) == 0 | any(is.na(value))) @@ -1126,15 +948,17 @@ setReplaceMethod("sampleGroups", "PeakDensityParam", function(object, value) { }) #' @aliases bw -#' -#' @description \code{bw},\code{bw<-}: getter and setter for the \code{bw} slot +#' +#' @description `bw`,`bw<-`: getter and setter for the `bw` slot #' of the object. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("bw", "PeakDensityParam", function(object){ return(object@bw)}) #' @aliases bw<- -#' +#' #' @rdname groupChromPeaks-density setReplaceMethod("bw", "PeakDensityParam", function(object, value) { object@bw <- value @@ -1143,15 +967,17 @@ setReplaceMethod("bw", "PeakDensityParam", function(object, value) { }) #' @aliases minFraction -#' -#' @description \code{minFraction},\code{minFraction<-}: getter and setter for -#' the \code{minFraction} slot of the object. -#' +#' +#' @description `minFraction`,`minFraction<-`: getter and setter for +#' the `minFraction` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("minFraction", "PeakDensityParam", function(object){ return(object@minFraction)}) #' @aliases minFraction<- -#' +#' #' @rdname groupChromPeaks-density setReplaceMethod("minFraction", "PeakDensityParam", function(object, value) { object@minFraction <- value @@ -1160,15 +986,17 @@ setReplaceMethod("minFraction", "PeakDensityParam", function(object, value) { }) #' @aliases minSamples -#' -#' @description \code{minSamples},\code{minSamples<-}: getter and setter for the -#' \code{minSamples} slot of the object. -#' +#' +#' @description `minSamples`,`minSamples<-`: getter and setter for the +#' `minSamples` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("minSamples", "PeakDensityParam", function(object){ return(object@minSamples)}) #' @aliases minSamples<- -#' +#' #' @rdname groupChromPeaks-density setReplaceMethod("minSamples", "PeakDensityParam", function(object, value) { object@minSamples <- value @@ -1176,9 +1004,11 @@ setReplaceMethod("minSamples", "PeakDensityParam", function(object, value) { return(object) }) -#' @description \code{binSize},\code{binSize<-}: getter and setter for the -#' \code{binSize} slot of the object. -#' +#' @description `binSize`,`binSize<-`: getter and setter for the +#' `binSize` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("binSize", "PeakDensityParam", function(object){ return(object@binSize)}) @@ -1190,15 +1020,17 @@ setReplaceMethod("binSize", "PeakDensityParam", function(object, value) { }) #' @aliases maxFeatures -#' -#' @description \code{maxFeatures},\code{maxFeatures<-}: getter and setter for -#' the \code{maxFeatures} slot of the object. -#' +#' +#' @description `maxFeatures`,`maxFeatures<-`: getter and setter for +#' the `maxFeatures` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("maxFeatures", "PeakDensityParam", function(object){ return(object@maxFeatures)}) #' @aliases maxFeatures<- -#' +#' #' @rdname groupChromPeaks-density setReplaceMethod("maxFeatures", "PeakDensityParam", function(object, value) { object@maxFeatures <- value @@ -1209,31 +1041,17 @@ setReplaceMethod("maxFeatures", "PeakDensityParam", function(object, value) { ############################################################ ## MzClustParam -setMethod("initialize", "MzClustParam", function(.Object, ...) { - classVersion(.Object)["MzClustParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) -#' @rdname groupChromPeaks-mzClust -setMethod("show", "MzClustParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" sampleGroups:", class(object@sampleGroups), "of length", - length(object@sampleGroups), "\n") - cat(" ppm:", object@ppm, "\n") - cat(" absMz:", object@absMz, "\n") - cat(" minFraction:", minFraction(object), "\n") - cat(" minSamples:", minSamples(object), "\n") -}) - -#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter -#' for the \code{sampleGroups} slot of the object. -#' +#' @description `sampleGroups`,`sampleGroups<-`: getter and setter +#' for the `sampleGroups` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("sampleGroups", "MzClustParam", function(object){ return(object@sampleGroups)}) #' @param value The value for the slot. -#' +#' #' @rdname groupChromPeaks-mzClust setReplaceMethod("sampleGroups", "MzClustParam", function(object, value) { object@sampleGroups <- value @@ -1241,9 +1059,11 @@ setReplaceMethod("sampleGroups", "MzClustParam", function(object, value) { return(object) }) -#' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} +#' @description `ppm`,`ppm<-`: getter and setter for the `ppm` #' slot of the object. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("ppm", "MzClustParam", function(object){ return(object@ppm)}) @@ -1255,15 +1075,17 @@ setReplaceMethod("ppm", "MzClustParam", function(object, value) { }) #' @aliases absMz -#' -#' @description \code{absMz},\code{absMz<-}: getter and setter for the -#' \code{absMz} slot of the object. -#' +#' +#' @description `absMz`,`absMz<-`: getter and setter for the +#' `absMz` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("absMz", "MzClustParam", function(object){ return(object@absMz)}) #' @aliases absMz<- -#' +#' #' @rdname groupChromPeaks-mzClust setReplaceMethod("absMz", "MzClustParam", function(object, value) { object@absMz <- value @@ -1271,9 +1093,11 @@ setReplaceMethod("absMz", "MzClustParam", function(object, value) { return(object) }) -#' @description \code{minFraction},\code{minFraction<-}: getter and setter for -#' the \code{minFraction} slot of the object. -#' +#' @description `minFraction`,`minFraction<-`: getter and setter for +#' the `minFraction` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("minFraction", "MzClustParam", function(object){ return(object@minFraction)}) @@ -1284,9 +1108,11 @@ setReplaceMethod("minFraction", "MzClustParam", function(object, value) { return(object) }) -#' @description \code{minSamples},\code{minSamples<-}: getter and setter for the -#' \code{minSamples} slot of the object. -#' +#' @description `minSamples`,`minSamples<-`: getter and setter for the +#' `minSamples` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("minSamples", "MzClustParam", function(object){ return(object@minSamples)}) @@ -1300,31 +1126,17 @@ setReplaceMethod("minSamples", "MzClustParam", function(object, value) { ############################################################ ## NearestPeaksParam -setMethod("initialize", "NearestPeaksParam", function(.Object, ...) { - classVersion(.Object)["NearestPeaksParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) -#' @rdname groupChromPeaks-nearest -setMethod("show", "NearestPeaksParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" sampleGroups:", class(object@sampleGroups), "of length", - length(object@sampleGroups), "\n") - cat(" mzVsRtBalance:", object@mzVsRtBalance, "\n") - cat(" absMz:", object@absMz, "\n") - cat(" absRt:", object@absRt, "\n") - cat(" kNN:", object@kNN, "\n") -}) - -#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter -#' for the \code{sampleGroups} slot of the object. -#' +#' @description `sampleGroups`,`sampleGroups<-`: getter and setter +#' for the `sampleGroups` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("sampleGroups", "NearestPeaksParam", function(object){ return(object@sampleGroups)}) #' @param value The value for the slot. -#' +#' #' @rdname groupChromPeaks-nearest setReplaceMethod("sampleGroups", "NearestPeaksParam", function(object, value) { object@sampleGroups <- value @@ -1333,15 +1145,17 @@ setReplaceMethod("sampleGroups", "NearestPeaksParam", function(object, value) { }) #' @aliases mzVsRtBalance -#' -#' @description \code{mzVsRtBalance},\code{mzVsRtBalance<-}: getter and setter -#' for the \code{mzVsRtBalance} slot of the object. -#' +#' +#' @description `mzVsRtBalance`,`mzVsRtBalance<-`: getter and setter +#' for the `mzVsRtBalance` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("mzVsRtBalance", "NearestPeaksParam", function(object){ return(object@mzVsRtBalance)}) #' @aliases mzVsRtBalance<- -#' +#' #' @rdname groupChromPeaks-nearest setReplaceMethod("mzVsRtBalance", "NearestPeaksParam", function(object, value) { object@mzVsRtBalance <- value @@ -1349,9 +1163,11 @@ setReplaceMethod("mzVsRtBalance", "NearestPeaksParam", function(object, value) { return(object) }) -#' @description \code{absMz},\code{absMz<-}: getter and setter for the -#' \code{absMz} slot of the object. -#' +#' @description `absMz`,`absMz<-`: getter and setter for the +#' `absMz` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("absMz", "NearestPeaksParam", function(object){ return(object@absMz)}) @@ -1363,15 +1179,17 @@ setReplaceMethod("absMz", "NearestPeaksParam", function(object, value) { }) #' @aliases absRt -#' -#' @description \code{absRt},\code{absRt<-}: getter and setter for the -#' \code{absRt} slot of the object. -#' +#' +#' @description `absRt`,`absRt<-`: getter and setter for the +#' `absRt` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("absRt", "NearestPeaksParam", function(object){ return(object@absRt)}) #' @aliases absRt<- -#' +#' #' @rdname groupChromPeaks-nearest setReplaceMethod("absRt", "NearestPeaksParam", function(object, value) { object@absRt <- value @@ -1380,15 +1198,17 @@ setReplaceMethod("absRt", "NearestPeaksParam", function(object, value) { }) #' @aliases kNN -#' -#' @description \code{kNN},\code{kNN<-}: getter and setter for the -#' \code{kNN} slot of the object. -#' +#' +#' @description `kNN`,`kNN<-`: getter and setter for the +#' `kNN` slot of the object. +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("kNN", "NearestPeaksParam", function(object){ return(object@kNN)}) #' @aliases kNN<- -#' +#' #' @rdname groupChromPeaks-nearest setReplaceMethod("kNN", "NearestPeaksParam", function(object, value) { object@kNN <- value @@ -1399,33 +1219,15 @@ setReplaceMethod("kNN", "NearestPeaksParam", function(object, value) { ############################################################ ## PeakGroupsParam -setMethod("initialize", "PeakGroupsParam", function(.Object, ...) { - classVersion(.Object)["PeakGroupsParam"] <- "0.0.2" - callNextMethod(.Object, ...) -}) - -#' @rdname adjustRtime-peakGroups -setMethod("show", "PeakGroupsParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" minFraction:", object@minFraction, "\n") - cat(" extraPeaks:", object@extraPeaks, "\n") - cat(" smooth:", object@smooth, "\n") - cat(" span:", object@span, "\n") - cat(" family:", object@family, "\n") - pgm <- peakGroupsMatrix(object) - if (nrow(pgm)) - cat(" number of peak groups:", nrow(pgm), "\n") -}) #' @description \code{minFraction},\code{minFraction<-}: getter and setter #' for the \code{minFraction} slot of the object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("minFraction", "PeakGroupsParam", function(object){ return(object@minFraction)}) #' @param value The value for the slot. -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("minFraction", "PeakGroupsParam", function(object, value) { object@minFraction <- value @@ -1434,15 +1236,15 @@ setReplaceMethod("minFraction", "PeakGroupsParam", function(object, value) { }) #' @aliases extraPeaks -#' +#' #' @description \code{extraPeaks},\code{extraPeaks<-}: getter and setter #' for the \code{extraPeaks} slot of the object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("extraPeaks", "PeakGroupsParam", function(object){ return(object@extraPeaks)}) #' @aliases extraPeaks<- -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("extraPeaks", "PeakGroupsParam", function(object, value) { object@extraPeaks <- value @@ -1451,17 +1253,17 @@ setReplaceMethod("extraPeaks", "PeakGroupsParam", function(object, value) { }) #' @aliases smooth -#' +#' #' @description \code{smooth},\code{smooth<-}: getter and setter #' for the \code{smooth} slot of the object. -#' +#' #' @param x a \code{PeakGroupsParam} object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("smooth", "PeakGroupsParam", function(x){ return(x@smooth)}) #' @aliases smooth<- -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("smooth", "PeakGroupsParam", function(object, value) { object@smooth <- value @@ -1470,15 +1272,15 @@ setReplaceMethod("smooth", "PeakGroupsParam", function(object, value) { }) #' @aliases span -#' +#' #' @description \code{span},\code{span<-}: getter and setter #' for the \code{span} slot of the object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("span", "PeakGroupsParam", function(object){ return(object@span)}) #' @aliases span<- -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("span", "PeakGroupsParam", function(object, value) { object@span <- value @@ -1487,15 +1289,15 @@ setReplaceMethod("span", "PeakGroupsParam", function(object, value) { }) #' @aliases family -#' +#' #' @description \code{family},\code{family<-}: getter and setter #' for the \code{family} slot of the object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("family", "PeakGroupsParam", function(object){ return(object@family)}) #' @aliases family<- -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("family", "PeakGroupsParam", function(object, value) { object@family <- value @@ -1504,54 +1306,66 @@ setReplaceMethod("family", "PeakGroupsParam", function(object, value) { }) #' @aliases peakGroupsMatrix -#' +#' #' @description \code{peakGroupsMatrix},\code{peakGroupsMatrix<-}: getter and #' setter for the \code{peakGroupsMatrix} slot of the object. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("peakGroupsMatrix", "PeakGroupsParam", function(object){ return(object@peakGroupsMatrix)}) #' @aliases peakGroupsMatrix<- -#' +#' #' @rdname adjustRtime-peakGroups setReplaceMethod("peakGroupsMatrix", "PeakGroupsParam", function(object, value) { object@peakGroupsMatrix <- value if (validObject(object)) return(object) }) +#' @aliases subset +#' +#' @description \code{subset},\code{subset<-}: getter and +#' setter for the \code{subset} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("subset", "PeakGroupsParam", function(x){ + return(x@subset)}) +#' @aliases subset<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("subset", "PeakGroupsParam", function(object, value) { + object@subset <- value + if (validObject(object)) + return(object) +}) +#' @aliases subsetAdjust +#' +#' @description \code{subsetAdjust},\code{subsetAdjust<-}: getter and +#' setter for the \code{subsetAdjust} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("subsetAdjust", "PeakGroupsParam", function(object){ + return(object@subsetAdjust)}) +#' @aliases subsetAdjust<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("subsetAdjust", "PeakGroupsParam", function(object, value) { + object@subsetAdjust <- value + if (validObject(object)) + return(object) +}) ############################################################ ## ObiwarpParam -setMethod("initialize", "ObiwarpParam", function(.Object, ...) { - classVersion(.Object)["ObiwarpParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - -#' @rdname adjustRtime-obiwarp -setMethod("show", "ObiwarpParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" binSize:", binSize(object), "\n") - cat(" centerSample:", centerSample(object), "\n") - cat(" response:", response(object), "\n") - cat(" distFun:", distFun(object), "\n") - cat(" gapInit:", gapInit(object), "\n") - cat(" gapExtend:", gapExtend(object), "\n") - cat(" factorDiag:", factorDiag(object), "\n") - cat(" factorGap:", factorGap(object), "\n") - cat(" localAlignment:", localAlignment(object), "\n") - cat(" initPenalty:", initPenalty(object), "\n") -}) #' @description \code{binSize},\code{binSize<-}: getter and setter #' for the \code{binSize} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("binSize", "ObiwarpParam", function(object){ return(object@binSize)}) #' @param value The value for the slot. -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("binSize", "ObiwarpParam", function(object, value) { object@binSize <- value @@ -1560,15 +1374,15 @@ setReplaceMethod("binSize", "ObiwarpParam", function(object, value) { }) #' @aliases centerSample -#' +#' #' @description \code{centerSample},\code{centerSample<-}: getter and setter #' for the \code{centerSample} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("centerSample", "ObiwarpParam", function(object){ return(object@centerSample)}) #' @aliases centerSample<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("centerSample", "ObiwarpParam", function(object, value) { object@centerSample <- as.integer(value) @@ -1577,15 +1391,15 @@ setReplaceMethod("centerSample", "ObiwarpParam", function(object, value) { }) #' @aliases response -#' +#' #' @description \code{response},\code{response<-}: getter and setter #' for the \code{response} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("response", "ObiwarpParam", function(object){ return(object@response)}) #' @aliases response<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("response", "ObiwarpParam", function(object, value) { object@response <- as.integer(value) @@ -1594,15 +1408,15 @@ setReplaceMethod("response", "ObiwarpParam", function(object, value) { }) #' @aliases distFun -#' +#' #' @description \code{distFun},\code{distFun<-}: getter and setter #' for the \code{distFun} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("distFun", "ObiwarpParam", function(object){ return(object@distFun)}) #' @aliases distFun<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("distFun", "ObiwarpParam", function(object, value) { object@distFun <- value @@ -1611,10 +1425,10 @@ setReplaceMethod("distFun", "ObiwarpParam", function(object, value) { }) #' @aliases gapInit -#' +#' #' @description \code{gapInit},\code{gapInit<-}: getter and setter #' for the \code{gapInit} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("gapInit", "ObiwarpParam", function(object){ if (length(object@gapInit) == 0) { @@ -1627,7 +1441,7 @@ setMethod("gapInit", "ObiwarpParam", function(object){ } return(object@gapInit)}) #' @aliases gapInit<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("gapInit", "ObiwarpParam", function(object, value) { object@gapInit <- value @@ -1636,10 +1450,10 @@ setReplaceMethod("gapInit", "ObiwarpParam", function(object, value) { }) #' @aliases gapExtend -#' +#' #' @description \code{gapExtend},\code{gapExtend<-}: getter and setter #' for the \code{gapExtend} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("gapExtend", "ObiwarpParam", function(object){ if (length(object@gapExtend) == 0) { @@ -1654,7 +1468,7 @@ setMethod("gapExtend", "ObiwarpParam", function(object){ } return(object@gapExtend)}) #' @aliases gapExtend<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("gapExtend", "ObiwarpParam", function(object, value) { object@gapExtend <- value @@ -1663,15 +1477,15 @@ setReplaceMethod("gapExtend", "ObiwarpParam", function(object, value) { }) #' @aliases factorDiag -#' +#' #' @description \code{factorDiag},\code{factorDiag<-}: getter and setter #' for the \code{factorDiag} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("factorDiag", "ObiwarpParam", function(object){ return(object@factorDiag)}) #' @aliases factorDiag<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("factorDiag", "ObiwarpParam", function(object, value) { object@factorDiag <- value @@ -1680,15 +1494,15 @@ setReplaceMethod("factorDiag", "ObiwarpParam", function(object, value) { }) #' @aliases factorGap -#' +#' #' @description \code{factorGap},\code{factorGap<-}: getter and setter #' for the \code{factorGap} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("factorGap", "ObiwarpParam", function(object){ return(object@factorGap)}) #' @aliases factorGap<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("factorGap", "ObiwarpParam", function(object, value) { object@factorGap <- value @@ -1697,15 +1511,15 @@ setReplaceMethod("factorGap", "ObiwarpParam", function(object, value) { }) #' @aliases localAlignment -#' +#' #' @description \code{localAlignment},\code{localAlignment<-}: getter and setter #' for the \code{localAlignment} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("localAlignment", "ObiwarpParam", function(object){ return(object@localAlignment)}) #' @aliases localAlignment<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("localAlignment", "ObiwarpParam", function(object, value) { object@localAlignment <- value @@ -1714,15 +1528,15 @@ setReplaceMethod("localAlignment", "ObiwarpParam", function(object, value) { }) #' @aliases initPenalty -#' +#' #' @description \code{initPenalty},\code{initPenalty<-}: getter and setter #' for the \code{initPenalty} slot of the object. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("initPenalty", "ObiwarpParam", function(object){ return(object@initPenalty)}) #' @aliases initPenalty<- -#' +#' #' @rdname adjustRtime-obiwarp setReplaceMethod("initPenalty", "ObiwarpParam", function(object, value) { object@initPenalty <- value @@ -1730,34 +1544,47 @@ setReplaceMethod("initPenalty", "ObiwarpParam", function(object, value) { return(object) }) +#' @description \code{subset},\code{subset<-}: getter and +#' setter for the \code{subset} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("subset", "ObiwarpParam", function(x){ + return(x@subset)}) +#' @rdname adjustRtime-obiwarp +setReplaceMethod("subset", "ObiwarpParam", function(object, value) { + object@subset <- value + if (validObject(object)) + return(object) +}) +#' @description \code{subsetAdjust},\code{subsetAdjust<-}: getter and +#' setter for the \code{subsetAdjust} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("subsetAdjust", "ObiwarpParam", function(object){ + return(object@subsetAdjust)}) +#' @rdname adjustRtime-obiwarp +setReplaceMethod("subsetAdjust", "ObiwarpParam", function(object, value) { + object@subsetAdjust <- value + if (validObject(object)) + return(object) +}) + ############################################################ ## FillChromPeaksParam ### -setMethod("initialize", "FillChromPeaksParam", function(.Object, ...) { - classVersion(.Object)["FillChromPeaksParam"] <- "0.0.1" - callNextMethod(.Object, ...) -}) -#' @rdname fillChromPeaks -setMethod("show", "FillChromPeaksParam", function(object) { - cat("Object of class: ", class(object), "\n") - cat("Parameters:\n") - cat(" expandMz:", object@expandMz, "\n") - cat(" expandRt:", object@expandRt, "\n") - cat(" ppm:", object@ppm, "\n") -}) #' @aliases expandMz -#' +#' #' @description \code{expandMz},\code{expandMz<-}: getter and setter #' for the \code{expandMz} slot of the object. #' #' @param value The value for the slot. -#' +#' #' @rdname fillChromPeaks setMethod("expandMz", "FillChromPeaksParam", function(object){ return(object@expandMz)}) #' @aliases expandMz<- -#' +#' #' @rdname fillChromPeaks setReplaceMethod("expandMz", "FillChromPeaksParam", function(object, value) { object@expandMz <- value @@ -1766,15 +1593,15 @@ setReplaceMethod("expandMz", "FillChromPeaksParam", function(object, value) { }) #' @aliases expandRt -#' +#' #' @description \code{expandRt},\code{expandRt<-}: getter and setter #' for the \code{expandRt} slot of the object. -#' +#' #' @rdname fillChromPeaks setMethod("expandRt", "FillChromPeaksParam", function(object){ return(object@expandRt)}) #' @aliases expandRt<- -#' +#' #' @rdname fillChromPeaks setReplaceMethod("expandRt", "FillChromPeaksParam", function(object, value) { object@expandRt <- value @@ -1784,7 +1611,7 @@ setReplaceMethod("expandRt", "FillChromPeaksParam", function(object, value) { #' @description \code{ppm},\code{ppm<-}: getter and setter #' for the \code{ppm} slot of the object. -#' +#' #' @rdname fillChromPeaks setMethod("ppm", "FillChromPeaksParam", function(object){ return(object@ppm)}) diff --git a/R/methods-ProcessHistory.R b/R/methods-ProcessHistory.R index 38ec1fb23..88316fbde 100644 --- a/R/methods-ProcessHistory.R +++ b/R/methods-ProcessHistory.R @@ -1,16 +1,6 @@ ## Methods for ProcessHistory and XProcessHistory. #' @include functions-ProcessHistory.R -setMethod("initialize", "ProcessHistory", function(.Object, ...) { - classVersion(.Object)["ProcessHistory"] <- "0.0.2" - callNextMethod(.Object, ...) -}) - -setMethod("initialize", "XProcessHistory", function(.Object, ...) { - classVersion(.Object)["XProcessHistory"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - #' @rdname ProcessHistory-class setMethod("show", "ProcessHistory", function(object) { cat("Object of class \"", class(object), "\"\n", sep = "") @@ -49,7 +39,7 @@ setMethod("processParam", "XProcessHistory", function(object) { #' @aliases processParam<- #' #' @param value An object extending the \code{Param} class. -#' +#' #' @noRd setReplaceMethod("processParam", "XProcessHistory", function(object, value) { object@param <- value @@ -58,7 +48,7 @@ setReplaceMethod("processParam", "XProcessHistory", function(object, value) { }) #' @description \code{msLevel}: returns the MS level on which a certain analysis #' has been performed, or \code{NA} if not defined. -#' +#' #' @rdname ProcessHistory-class setMethod("msLevel", "XProcessHistory", function(object) { if (.hasSlot(object, "msLevel")) @@ -75,7 +65,7 @@ setMethod("msLevel", "XProcessHistory", function(object) { #' #' @return The \code{processType} method returns a character string with the #' processing step type. -#' +#' #' @rdname ProcessHistory-class setMethod("processType", "ProcessHistory", function(object) { return(object@type) @@ -94,7 +84,7 @@ setReplaceMethod("processType", "ProcessHistory", function(object, value) { #' #' @return The \code{processDate} method returns a character string with the #' time stamp of the processing step start. -#' +#' #' @rdname ProcessHistory-class setMethod("processDate", "ProcessHistory", function(object) { return(object@date) @@ -113,7 +103,7 @@ setReplaceMethod("processDate", "ProcessHistory", function(object, value) { #' #' @return The \code{processInfo} method returns a character string with #' optional additional informations. -#' +#' #' @rdname ProcessHistory-class setMethod("processInfo", "ProcessHistory", function(object) { return(object@info) @@ -132,7 +122,7 @@ setReplaceMethod("processInfo", "ProcessHistory", function(object, value) { #' #' @return The \code{fileIndex} method returns a integer vector with the index #' of the files/samples on which the processing step was applied. -#' +#' #' @rdname ProcessHistory-class setMethod("fileIndex", "ProcessHistory", function(object) { return(object@fileIndex) @@ -143,4 +133,3 @@ setReplaceMethod("fileIndex", "ProcessHistory", function(object, value) { if (validObject(object)) return(object) }) - diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index e8a076517..a2318f4fb 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -4,23 +4,23 @@ #' do_adjustRtime-functions.R methods-xcmsRaw.R functions-OnDiskMSnExp.R setMethod("initialize", "XCMSnExp", function(.Object, ...) { - classVersion(.Object)["XCMSnExp"] <- "0.0.1" .Object <- callNextMethod(.Object, ...) lockEnvironment(.Object@msFeatureData) return(.Object) }) #' @aliases show,MsFeatureData-method -#' +#' #' @rdname XCMSnExp-class setMethod("show", "XCMSnExp", function(object) { callNextMethod() - ## And not XCMSnExp related stuff. cat("- - - xcms preprocessing - - -\n") if (hasChromPeaks(object)) { cat("Chromatographic peak detection:\n") ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) - cat(" method:", .param2string(ph[[1]]@param), "\n") + if (length(ph)) + cat(" method:", .param2string(ph[[1]]@param), "\n") + else cat(" unknown method.\n") cat(" ", nrow(chromPeaks(object)), " peaks identified in ", length(fileNames(object)), " samples.\n", sep = "") cat(" On average ", @@ -30,12 +30,16 @@ setMethod("show", "XCMSnExp", function(object) { if (hasAdjustedRtime(object)) { cat("Alignment/retention time adjustment:\n") ph <- processHistory(object, type = .PROCSTEP.RTIME.CORRECTION) - cat(" method:", .param2string(ph[[1]]@param), "\n") + if (length(ph)) + cat(" method:", .param2string(ph[[1]]@param), "\n") + else cat(" unknown method.\n") } if (hasFeatures(object)) { cat("Correspondence:\n") ph <- processHistory(object, type = .PROCSTEP.PEAK.GROUPING) - cat(" method:", .param2string(ph[[1]]@param), "\n") + if (length(ph)) + cat(" method:", .param2string(ph[[1]]@param), "\n") + else cat(" unknown method.\n") cat(" ", nrow(featureDefinitions(object)), " features identified.\n", sep = "") cat(" Median mz range of features: ", @@ -47,9 +51,10 @@ setMethod("show", "XCMSnExp", function(object) { featureDefinitions(object)[, "rtmin"]), digits = 5), "\n", sep = "") if (.hasFilledPeaks(object)) { - totF <- chromPeaks(object)[, "is_filled"] == 1 - fp <- chromPeaks(object)[totF, , drop = FALSE] - cat("", sum(totF), "filled peaks (on average", + fp <- chromPeaks(object)[chromPeakData(object)$is_filled, , + drop = FALSE] + cat("", sum(chromPeakData(object)$is_filled), + "filled peaks (on average", mean(table(fp[, "sample"])), "per sample).\n") } } @@ -75,8 +80,8 @@ setMethod("hasAdjustedRtime", "XCMSnExp", function(object) { #' results (i.e. features). #' #' @rdname XCMSnExp-class -setMethod("hasFeatures", "XCMSnExp", function(object) { - hasFeatures(object@msFeatureData) +setMethod("hasFeatures", "XCMSnExp", function(object, msLevel = integer()) { + hasFeatures(object@msFeatureData, msLevel = msLevel) }) #' @aliases hasChromPeaks hasChromPeaks,MsFeatureData-method @@ -87,10 +92,23 @@ setMethod("hasFeatures", "XCMSnExp", function(object) { #' detection results. #' #' @rdname XCMSnExp-class -setMethod("hasChromPeaks", "XCMSnExp", function(object) { - hasChromPeaks(object@msFeatureData) +setMethod("hasChromPeaks", "XCMSnExp", function(object, msLevel = integer()) { + hasChromPeaks(object@msFeatureData, msLevel = msLevel) +}) + +#' @aliases hasFilledChromPeaks +#' +#' @description +#' +#' \code{hasFilledChromPeaks}: whether the object contains any filled-in +#' chromatographic peaks. +#' +#' @rdname XCMSnExp-class +setMethod("hasFilledChromPeaks", "XCMSnExp", function(object) { + .hasFilledPeaks(object) }) + #' @aliases adjustedRtime adjustedRtime,MsFeatureData-method #' #' @description @@ -122,7 +140,8 @@ setMethod("adjustedRtime", "XCMSnExp", function(object, bySample = FALSE) { ## Have to re-order the adjusted retention times by spectrum name, such ## that rtime are. res <- unlist(res, use.names = FALSE) - sNames <- unlist(split(featureNames(object), fromFile(object)), + sNames <- unlist(split(featureNames(object), + as.factor(fromFile(object))), use.names = FALSE) names(res) <- sNames res <- res[featureNames(object)] @@ -162,8 +181,8 @@ setReplaceMethod("adjustedRtime", "XCMSnExp", function(object, value) { } lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd - if (validObject(object)) - return(object) + validObject(object) + object }) #' @aliases featureDefinitions featureDefinitions,MsFeatureData-method @@ -179,7 +198,7 @@ setReplaceMethod("adjustedRtime", "XCMSnExp", function(object, value) { #' \code{chromPeaks}. #' See also \code{\link{featureSummary}} for a function to calculate simple #' feature summaries. -#' +#' #' @return #' #' For \code{featureDefinitions}: a \code{DataFrame} with peak grouping @@ -192,63 +211,69 @@ setReplaceMethod("adjustedRtime", "XCMSnExp", function(object, value) { #' chromatographic peaks (rows) in the matrix returned by the #' \code{chromPeaks} method that belong to that feature group. The method #' returns \code{NULL} if no feature definitions are present. +#' \code{featureDefinitions} supports also parameters \code{mz}, \code{rt}, +#' \code{ppm} and \code{type} to return only features within certain ranges (see +#' description of \code{chromPeaks} for details). #' #' @rdname XCMSnExp-class -setMethod("featureDefinitions", "XCMSnExp", function(object, mz = numeric(), - rt = numeric(), ppm = 0, - type = c("any", "within", - "apex_within")) { - feat_def <- featureDefinitions(object@msFeatureData) - type <- match.arg(type) - ## Select features within rt range. - if (length(rt) && nrow(feat_def)) { - rt <- range(rt) - if (type == "any") - keep <- which(feat_def$rtmin <= rt[2] & feat_def$rtmax >= rt[1]) - if (type == "within") - keep <- which(feat_def$rtmin >= rt[1] & feat_def$rtmax <= rt[2]) - if (type == "apex_within") - keep <- which(feat_def$rtmed >= rt[1] & feat_def$rtmed <= rt[2]) - feat_def <- feat_def[keep, , drop = FALSE] - } - ## Select peaks within mz range, considering also ppm - if (length(mz) && nrow(feat_def)) { - mz <- range(mz) - ## Increase mz by ppm. - if (is.finite(mz[1])) - mz[1] <- mz[1] - mz[1] * ppm / 1e6 - if (is.finite(mz[2])) - mz[2] <- mz[2] + mz[2] * ppm / 1e6 - if (type == "any") - keep <- which(feat_def$mzmin <= mz[2] & feat_def$mzmax >= mz[1]) - if (type == "within") - keep <- which(feat_def$mzmin >= mz[1] & feat_def$mzmax <= mz[2]) - if (type == "apex_within") - keep <- which(feat_def$mzmed >= mz[1] & feat_def$mzmed <= mz[2]) - feat_def <- feat_def[keep, , drop = FALSE] - } - feat_def -}) +setMethod("featureDefinitions", "XCMSnExp", + function(object, mz = numeric(), rt = numeric(), ppm = 0, + type = c("any", "within", "apex_within"), + msLevel = integer()) { + feat_def <- featureDefinitions(object@msFeatureData, + msLevel = msLevel) + type <- match.arg(type) + ## Select features within rt range. + if (length(rt) && nrow(feat_def)) { + rt <- range(rt) + if (type == "any") + keep <- which(feat_def$rtmin <= rt[2] & + feat_def$rtmax >= rt[1]) + if (type == "within") + keep <- which(feat_def$rtmin >= rt[1] & + feat_def$rtmax <= rt[2]) + if (type == "apex_within") + keep <- which(feat_def$rtmed >= rt[1] & + feat_def$rtmed <= rt[2]) + feat_def <- feat_def[keep, , drop = FALSE] + } + ## Select peaks within mz range, considering also ppm + if (length(mz) && nrow(feat_def)) { + mz <- range(mz) + ## Increase mz by ppm. + if (is.finite(mz[1])) + mz[1] <- mz[1] - mz[1] * ppm / 1e6 + if (is.finite(mz[2])) + mz[2] <- mz[2] + mz[2] * ppm / 1e6 + if (type == "any") + keep <- which(feat_def$mzmin <= mz[2] & + feat_def$mzmax >= mz[1]) + if (type == "within") + keep <- which(feat_def$mzmin >= mz[1] & + feat_def$mzmax <= mz[2]) + if (type == "apex_within") + keep <- which(feat_def$mzmed >= mz[1] & + feat_def$mzmed <= mz[2]) + feat_def <- feat_def[keep, , drop = FALSE] + } + feat_def + }) #' @aliases featureDefinitions<- featureDefinitions<-,MsFeatureData-method #' #' @rdname XCMSnExp-class setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { - if (hasFeatures(object)) - object <- dropFeatureDefinitions(object) + ## if (hasFeatures(object)) + ## object <- dropFeatureDefinitions(object) newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) + newFd@.xData <- xcms:::.copy_env(object@msFeatureData) featureDefinitions(newFd) <- value lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd - if (validObject(object)) { - ## Lock the environment so that only accessor methods can change values. - ## lockEnvironment(newFd, bindings = TRUE) - ## object@msFeatureData <- newFd - return(object) - } + validObject(object) + object }) -#' @aliases chromPeaks chromPeaks,MsFeatureData-method +#' @aliases chromPeaks chromPeaks,MsFeatureData-method chromPeakData,MsFeatureData-method chromPeakData #' #' @description #' @@ -265,14 +290,24 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { #' only chromatographic peaks overlapping the defined retention time and/or #' m/z ranges. Argument \code{type} allows to define how \emph{overlapping} is #' determined: for \code{type == "any"} (the default), all peaks that are even -#' partially overlapping the region are returned, for \code{type == "within"} -#' the full peak has to be within the region and for -#' \code{type == "apex_within"} the peak's apex position (highest signal of the -#' peak) has to be within the region. +#' partially overlapping the region are returned (i.e. for which either +#' \code{"mzmin"} or \code{"mzmax"} of the \code{chromPeaks} or +#' \code{featureDefinitions} matrix are within the provided m/z range), for +#' \code{type == "within"} the full peak has to be within the region (i.e. +#' both \code{"mzmin"} and \code{"mzmax"} have to be within the m/z range) and +#' for \code{type == "apex_within"} the peak's apex position (highest signal +#' of the peak) has to be within the region (i.e. the peak's or features m/z +#' has to be within the m/z range). #' See description of the return value for details on the returned matrix. #' Users usually don't have to use the \code{chromPeaks<-} method directly #' as detected chromatographic peaks are added to the object by the -#' \code{\link{findChromPeaks}} method. +#' \code{\link{findChromPeaks}} method. Also, \code{chromPeaks<-} will replace +#' any existing \code{chromPeakData}. +#' +#' \code{chromPeakData} and \code{chromPeakData<-} allow to get or set arbitrary +#' chromatographic peak annotations. These are returned or ar returned as a +#' \code{DataFrame}. Note that the number of rows and the rownames of the +#' \code{DataFrame} have to match those of \code{chromPeaks}. #' #' @param rt optional \code{numeric(2)} defining the retention time range for #' which chromatographic peaks should be returned. @@ -284,7 +319,15 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { #' \code{mz} range should be extended. For a value of \code{ppm = 10}, all #' peaks within \code{mz[1] - ppm / 1e6} and \code{mz[2] + ppm / 1e6} are #' returned. -#' +#' +#' @param msLevel \code{integer} specifying the MS level(s) for which identified +#' chromatographic peaks should be returned. +#' +#' @param isFilledColumn \code{logical(1)} whether a column \code{"is_filled"} +#' is included in the returned \code{"matrix"} providing the information +#' if a peak was filled in. Alternatively, this information would be +#' provided by the \code{chromPeakData} data frame. +#' #' @return #' #' For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} (each row @@ -294,18 +337,17 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { #' scans/retention times), #' \code{"mzmin"} (minimal mz value), #' \code{"mzmax"} (maximal mz value), -#' \code{"rt"} (retention time for the peak apex), +#' \code{"rt"} (retention time of the peak apex), #' \code{"rtmin"} (minimal retention time), #' \code{"rtmax"} (maximal retention time), #' \code{"into"} (integrated, original, intensity of the peak), #' \code{"maxo"} (maximum intentity of the peak), #' \code{"sample"} (sample index in which the peak was identified) and -#' \code{"is_filled"} defining whether the chromatographic peak was -#' identified by the peak picking algorithm (\code{0}) or was added by the -#' \code{fillChromPeaks} method (\code{1}). #' Depending on the employed peak detection algorithm and the -#' \code{verboseColumns} parameter of it additional columns might be -#' returned. For \code{bySample = TRUE} the chronatographic peaks are +#' \code{verboseColumns} parameter of it, additional columns might be +#' returned. If parameter \code{isFilledColumn} was set to \code{TRUE} a column +#' named \code{"is_filled"} is also returned. +#' For \code{bySample = TRUE} the chromatographic peaks are #' returned as a \code{list} of matrices, each containing the #' chromatographic peaks of a specific sample. For samples in which no #' peaks were detected a matrix with 0 rows is returned. @@ -313,14 +355,20 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { #' @rdname XCMSnExp-class setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, rt = numeric(), mz = numeric(), - ppm = 0, + ppm = 0, msLevel = integer(), type = c("any", "within", - "apex_within")) { - pks <- chromPeaks(object@msFeatureData) + "apex_within"), + isFilledColumn = FALSE) { type <- match.arg(type) + pks <- chromPeaks(object@msFeatureData) + if (isFilledColumn) + pks <- cbind(pks, is_filled = as.numeric(chromPeakData(object)$is_filled)) + if (length(msLevel)) + pks <- pks[which(chromPeakData(object)$ms_level %in% msLevel), , + drop = FALSE] ## Select peaks within rt range. if (length(rt)) { - rt <- range(rt) + rt <- range(as.numeric(rt)) if (type == "any") keep <- which(pks[, "rtmin"] <= rt[2] & pks[, "rtmax"] >= rt[1]) if (type == "within") @@ -331,7 +379,7 @@ setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, } ## Select peaks within mz range, considering also ppm if (length(mz) && length(pks)) { - mz <- range(mz) + mz <- range(as.numeric(mz)) ## Increase mz by ppm. if (is.finite(mz[1])) mz[1] <- mz[1] - mz[1] * ppm / 1e6 @@ -350,7 +398,7 @@ setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, ## without detected peaks. res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) - if (length(pks)) { + if (nrow(pks)) { tmp <- split.data.frame(pks, f = pks[, "sample"]) res[as.numeric(names(tmp))] <- tmp @@ -368,7 +416,7 @@ setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, } else pks }) -#' @aliases chromPeaks<- chromPeaks<-,MsFeatureData-method +#' @aliases chromPeaks<- chromPeaks<-,MsFeatureData-method chromPeakData<-,MsFeatureData-method chromPeakData<- #' #' @rdname XCMSnExp-class setReplaceMethod("chromPeaks", "XCMSnExp", function(object, value) { @@ -380,14 +428,16 @@ setReplaceMethod("chromPeaks", "XCMSnExp", function(object, value) { ## Ensure that we remove ALL related process history steps newFd@.xData <- .copy_env(object@msFeatureData) ## Set rownames if not present - if (is.null(rownames(value))) + if (is.null(rownames(value)) && nrow(value)) rownames(value) <- .featureIDs(nrow(value), prefix = "CP") chromPeaks(newFd) <- value + chromPeakData(newFd) <- DataFrame(ms_level = rep(1L, nrow(value)), + is_filled = rep(FALSE, nrow(value)), + row.names = rownames(value)) lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd - if (validObject(object)) { - return(object) - } + validObject(object) + object }) #' @description @@ -404,7 +454,7 @@ setReplaceMethod("chromPeaks", "XCMSnExp", function(object, value) { #' #' @param adjusted logical(1) whether adjusted or raw (i.e. the original #' retention times reported in the files) should be returned. -#' +#' #' @return #' #' For \code{rtime}: if \code{bySample = FALSE} a numeric vector with @@ -428,7 +478,7 @@ setMethod("rtime", "XCMSnExp", function(object, bySample = FALSE, ## res <- theM(object) res <- callNextMethod(object = object) if (bySample) { - tmp <- split(res, fromFile(object)) + tmp <- split(res, as.factor(fromFile(object))) res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) res[as.numeric(names(tmp))] <- tmp @@ -456,7 +506,8 @@ setMethod("mz", "XCMSnExp", function(object, bySample = FALSE, BPPARAM = bpparam()) { res <- callNextMethod(object = object, BPPARAM = BPPARAM) if (bySample) { - tmp <- lapply(split(res, fromFile(object)), unlist, use.names = FALSE) + tmp <- lapply(split(res, as.factor(fromFile(object))), + unlist, use.names = FALSE) res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) res[as.numeric(names(tmp))] <- tmp @@ -485,7 +536,8 @@ setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE, BPPARAM = bpparam()) { res <- callNextMethod(object = object, BPPARAM = BPPARAM) if (bySample) { - tmp <- lapply(split(res, fromFile(object)), unlist, use.names = FALSE) + tmp <- lapply(split(res, as.factor(fromFile(object))), + unlist, use.names = FALSE) res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) res[as.numeric(names(tmp))] <- tmp @@ -506,7 +558,7 @@ setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE, #' #' @param BPPARAM Parameter class for parallel processing. See #' \code{\link{bpparam}}. -#' +#' #' @return #' #' For \code{spectra}: if \code{bySample = FALSE} a \code{list} with @@ -524,7 +576,7 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, else object <- as(object, "OnDiskMSnExp") res <- callNextMethod(object = object, BPPARAM = BPPARAM) if (bySample) { - tmp <- split(res, fromFile(object)) + tmp <- split(res, as.factor(fromFile(object))) ## That's to ensure that we're always returning something for all files. res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) @@ -534,10 +586,10 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, }) #' @aliases processHistory -#' +#' #' @description #' -#' \code{processHistory}: returns a \code{list} with +#' \code{processHistory}: returns a \code{list} of #' \code{\link{ProcessHistory}} objects (or objects inheriting from this #' base class) representing the individual processing steps that have been #' performed, eventually along with their settings (\code{Param} parameter @@ -545,7 +597,7 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, #' \code{msLevel} allow to restrict to process steps of a certain type or #' performed on a certain file or MS level. #' -#' @param fileIndex For \code{processHistory}: optional \code{numeric} +#' @param fileIndex For \code{processHistory}: optional \code{integer} #' specifying the index of the files/samples for which the #' \code{\link{ProcessHistory}} objects should be retrieved. #' @@ -554,11 +606,11 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, #' type. Use the \code{processHistoryTypes} to list all supported values. #' For \code{chromPeaks}: \code{character} specifying which peaks to return #' if \code{rt} or \code{mz} are defined. For \code{type = "any"} all -#' chromatographic peaks partially overlapping the range defined by +#' chromatographic peaks partially overlapping the range defined by #' \code{mz} and/or \code{rt} are returned, \code{type = "within"} returns #' only peaks completely within the region and \code{type = "apex_within"} #' peaks for which the peak's apex is within the region. -#' +#' #' @return #' #' For \code{processHistory}: a \code{list} of @@ -604,7 +656,7 @@ setMethod("processHistory", "XCMSnExp", function(object, fileIndex, type, #' #' The \code{addProcessHistory} method returns the input object with the #' provided \code{\link{ProcessHistory}} appended to the process history. -#' +#' #' @noRd setMethod("addProcessHistory", "XCMSnExp", function(object, ph) { if (!inherits(ph, "ProcessHistory")) @@ -632,25 +684,22 @@ setMethod("dropChromPeaks", "XCMSnExp", function(object, if (hasChromPeaks(object)) { phTypes <- unlist(lapply(processHistory(object), function(z) processType(z))) - object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.DETECTION) - ## Make sure we delete all related process history steps - object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.GROUPING) - object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) - object <- dropProcessHistories(object, type = .PROCSTEP.CALIBRATION) + object <- dropProcessHistories( + object, type = c(.PROCSTEP.PEAK.DETECTION, .PROCSTEP.PEAK.GROUPING, + .PROCSTEP.PEAK.FILLING, .PROCSTEP.CALIBRATION, + .PROCSTEP.PEAK.REFINEMENT)) newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) - newFd <- dropChromPeaks(newFd) - if (hasFeatures(newFd)) - newFd <- dropFeatureDefinitions(newFd) - ## Drop adjusted retention times if performed AFTER peak detection. - if (hasAdjustedRtime(newFd) & !keepAdjustedRtime) { + if (hasAdjustedRtime(object)) { idx_rt_adj <- which(phTypes == .PROCSTEP.RTIME.CORRECTION) idx_pk_det <- which(phTypes == .PROCSTEP.PEAK.DETECTION) - if (idx_rt_adj > idx_pk_det) { - object <- dropProcessHistories(object, - type = .PROCSTEP.RTIME.CORRECTION) - newFd <- dropAdjustedRtime(newFd) - } + if (length(idx_rt_adj) && length(idx_pk_det) && + max(idx_rt_adj) > max(idx_pk_det) && !keepAdjustedRtime) { + object <- dropProcessHistories( + object, type = .PROCSTEP.RTIME.CORRECTION) + } else + keepAdjustedRtime <- TRUE + if (keepAdjustedRtime) + adjustedRtime(newFd) <- adjustedRtime(object@msFeatureData) } lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd @@ -662,7 +711,7 @@ setMethod("dropChromPeaks", "XCMSnExp", function(object, #' @aliases dropFeatureDefinitions dropFeatureDefinitions,MsFeatureData-method #' #' @description -#' +#' #' \code{dropFeatureDefinitions}: drops the results from a #' correspondence (peak grouping) analysis, i.e. the definition of the mz-rt #' features and returns the object without that information. Note that for @@ -685,7 +734,7 @@ setMethod("dropChromPeaks", "XCMSnExp", function(object, #' chromatographic peaks removes all process history steps related to peak #' grouping. Setting e.g. \code{dropLastN = 1} will only remove the most #' recent peak grouping related process history step. -#' +#' #' @rdname XCMSnExp-class setMethod("dropFeatureDefinitions", "XCMSnExp", function(object, keepAdjustedRtime = FALSE, @@ -700,42 +749,29 @@ setMethod("dropFeatureDefinitions", "XCMSnExp", function(object, if (length(idx_fal) == 0) idx_fal <- -1L ## 1) drop last related process history step and results - object <- dropProcessHistories(object, - type = .PROCSTEP.PEAK.GROUPING, - num = 1) + object <- dropProcessHistories( + object, type = .PROCSTEP.PEAK.GROUPING, num = 1) ## Drop also eventual filterFeatureDefinitions - object <- dropGenericProcessHistory(object, - fun = "filterFeatureDefinitions") + object <- dropGenericProcessHistory( + object, fun = "filterFeatureDefinitions") newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - newFd <- dropFeatureDefinitions(newFd) - if (.hasFilledPeaks(object)) { - ## Remove filled in peaks - chromPeaks(newFd) <- - chromPeaks(newFd)[chromPeaks(newFd)[, "is_filled"] == 0, , - drop = FALSE] - object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + drop_proc_hist <- character() + dropAdjustedRtime <- FALSE + if (max(idx_art) > max(idx_fal) & !keepAdjustedRtime) { + drop_proc_hist <- .PROCSTEP.PEAK.GROUPING + dropAdjustedRtime <- TRUE } + newFd <- dropFeatureDefinitions(newFd, dropAdjustedRtime) + if (.hasFilledPeaks(object)) + drop_proc_hist <- c(drop_proc_hist, .PROCSTEP.PEAK.FILLING) lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd - ## 2) If retention time correction was performed after the latest peak - ## alignment, drop also the retention time correction and all related - ## process history steps. - ## Otherwise (grouping performed after retention time adjustment) do - ## nothing - this keeps eventual alignment related process history - ## steps performed before retention time correction. - if (hasAdjustedRtime(object)) { - if (max(idx_art) > max(idx_fal) & !keepAdjustedRtime) { - object <- dropProcessHistories(object, - type = .PROCSTEP.PEAK.GROUPING) - ## This will ensure that the retention times of the peaks - ## are restored. - object <- dropAdjustedRtime(object) - warning("Removed also correspondence (peak grouping) results as", - " these based on the retention time correction results", - " that were dropped.") - } - } + if (!hasChromPeaks(object)) + drop_proc_hist <- c(drop_proc_hist, .PROCSTEP.PEAK.DETECTION, + .PROCSTEP.PEAK.REFINEMENT) + if (length(drop_proc_hist)) + object <- dropProcessHistories(object, drop_proc_hist) } if (validObject(object)) return(object) @@ -769,70 +805,257 @@ setMethod("dropAdjustedRtime", "XCMSnExp", function(object) { idx_fal <- -1L ## Copy the content of the object newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) - ## Revert applied adjustments in peaks: - if (hasChromPeaks(newFd)) { - message("Reverting retention times of identified peaks to ", - "original values ... ", appendLF = FALSE) - fts <- .applyRtAdjToChromPeaks(chromPeaks(newFd), - rtraw = adjustedRtime(object, - bySample = TRUE), - rtadj = rtime(object, - bySample = TRUE, - adjusted = FALSE)) - ## Replacing peaks in MsFeatureData, not in XCMSnExp to avoid - ## all results being removed. - chromPeaks(newFd) <- fts - message("OK") - } - ## 1) Drop the retention time adjustment and (the latest) related process - ## history - object <- dropProcessHistories(object, - type = .PROCSTEP.RTIME.CORRECTION, - num = 1) - newFd <- dropAdjustedRtime(newFd) - object@msFeatureData <- newFd - lockEnvironment(newFd, bindings = TRUE) + newFd@.xData <- .copy_env(object@msFeatureData) + object <- dropProcessHistories( + object, type = .PROCSTEP.RTIME.CORRECTION, num = 1) + newFd <- dropAdjustedRtime( + newFd, rtime(object, bySample = TRUE, adjusted = FALSE)) ## 2) If grouping has been performed AFTER retention time correction it ## has to be dropped too, including ALL related process histories. if (hasFeatures(object)) { if (max(idx_fal) > max(idx_art)) { - object <- dropFeatureDefinitions(object) - object <- dropProcessHistories(object, - type = .PROCSTEP.PEAK.GROUPING, - num = -1) + newFd <- dropFeatureDefinitions(newFd) + object <- dropProcessHistories( + object, type = .PROCSTEP.PEAK.GROUPING, num = -1) } - } else { + } else { ## If there is any peak alignment related process history, but no ## peak alignment results, drop them. - object <- dropProcessHistories(object, - type = .PROCSTEP.PEAK.GROUPING, - num = -1) + object <- dropProcessHistories( + object, type = .PROCSTEP.PEAK.GROUPING, num = -1) } + object@msFeatureData <- newFd + lockEnvironment(newFd, bindings = TRUE) } if (validObject(object)) return(object) }) +#' @title XCMSnExp filtering and subsetting +#' +#' @aliases XCMSnExp-filter +#' #' @description #' -#' The \code{[} method allows to subset a \code{\link{XCMSnExp}} -#' object by spectra. Be aware that the \code{[} method removes all -#' preprocessing results, except adjusted retention times if -#' \code{keepAdjustedRtime = TRUE} is passed to the method. +#' The methods listed on this page allow to filter and subset [XCMSnExp] +#' objects. Most of them are inherited from the [OnDiskMSnExp] object defined +#' in the `MSnbase` package and have been adapted for `XCMSnExp` to enable +#' correct subsetting of preprocessing results. +#' +#' - `[`: subset a `XCMSnExp` object by spectra. Be aware that this removes +#' **all** preprocessing results, except adjusted retention times if +#' `keepAdjustedRtime = TRUE` is passed to the method. +#' +#' - `[[`: extracts a single `Spectrum` object (defined in `MSnbase`). The +#' reported retention time is the adjusted retention time if alignment has +#' been performed. +#' +#' - `filterChromPeaks`: subset the `chromPeaks` `matrix` in `object`. Parameter +#' `method` allows to specify how the chromatographic peaks should be +#' filtered. Currently, only `method = "keep"` is supported which allows to +#' specify chromatographic peaks to keep with parameter `keep` (i.e. provide +#' a `logical`, `integer` or `character` defining which chromatographic peaks +#' to keep). Feature definitions (if present) are updated correspondingly. +#' +#' - `filterFeatureDefinitions`: allows to subset the feature definitions of +#' an `XCMSnExp` object. Parameter `features` allow to define which features +#' to keep. It can be a `logical`, `integer` (index of features to keep) or +#' `character` (feature IDs) vector. +#' +#' - `filterFile`: allows to reduce the `XCMSnExp` to data from only selected +#' files. Identified chromatographic peaks for these files are retained while +#' correspondence results (feature definitions) are removed by default. To +#' force keeping feature definitions use `keepFeatures = TRUE`. Adjusted +#' retention times (if present) are retained by default if present. Use +#' `keepAdjustedRtime = FALSE` to drop them. +#' +#' - `filterMsLevel`: reduces the `XCMSnExp` object to spectra of the +#' specified MS level(s). Chromatographic peaks and identified features are +#' also subsetted to the respective MS level. See also the `filterMsLevel` +#' documentation in `MSnbase` for details and examples. +#' +#' - `filterMz`: filters the data set based on the provided m/z value range. +#' All chromatographic peaks and features (grouped peaks) falling +#' **completely** within the provided mz value range are retained +#' (i.e. if their minimal m/z value is `>= mz[1]` and the maximal m/z value +#' `<= mz[2]`. Adjusted retention times, if present, are kept. +#' +#' - `filterRt`: filters the data set based on the provided retention time +#' range. All chromatographic peaks and features (grouped peaks) +#' **completely** within the specified retention time window are retained +#' (i.e. if the retention time corresponding to the peak's apex is within the +#' specified rt range). If retention time correction has been performed, +#' the method will by default filter the object by adjusted retention times. +#' The argument `adjusted` allows to specify manually whether filtering +#' should be performed on raw or adjusted retention times. Filtering by +#' retention time does not drop any preprocessing results nor does it remove +#' or change alignment results (i.e. adjusted retention times). +#' The method returns an empty object if no spectrum or feature is within +#' the specified retention time range. +#' +#' - `split`: splits an `XCMSnExp` object into a `list` of `XCMSnExp` objects +#' based on the provided parameter `f`. Note that by default all +#' pre-processing results are removed by the splitting, except adjusted +#' retention times, if the optional argument `keepAdjustedRtime = TRUE` is +#' provided. +#' +#' @details +#' +#' All subsetting methods try to ensure that the returned data is +#' consistent. Correspondence results for example are removed by default if the +#' data set is sub-setted by file, since the correspondence results are +#' dependent on the files on which correspondence was performed. This can be +#' changed by setting `keepFeatures = TRUE`. +#' For adjusted retention times, most subsetting methods +#' support the argument `keepAdjustedRtime` (even the `[` method) +#' that forces the adjusted retention times to be retained even if the +#' default would be to drop them. +#' +#' @note +#' +#' The `filterFile` method removes also process history steps not +#' related to the files to which the object should be sub-setted and updates +#' the `fileIndex` attribute accordingly. Also, the method does not +#' allow arbitrary ordering of the files or re-ordering of the files within +#' the object. +#' +#' Note also that most of the filtering methods, and also the subsetting +#' operations `[` drop all or selected preprocessing results. To +#' consolidate the alignment results, i.e. ensure that adjusted retention +#' times are always preserved, use the [applyAdjustedRtime()] +#' function on the object that contains the alignment results. This replaces +#' the raw retention times with the adjusted ones. +#' +#' @param adjusted For `filterRt`: `logical` indicating whether the +#' object should be filtered by original (`adjusted = FALSE`) or +#' adjusted retention times (`adjusted = TRUE`). +#' For `spectra`: whether the retention times in the individual +#' `Spectrum` objects should be the adjusted or raw retention times. #' -#' @param x For \code{[} and \code{[[}: an \code{\link{XCMSnExp}} object. +#' @param drop For `[` and `[[`: not supported. #' -#' @param i For \code{[}: \code{numeric} or \code{logical} vector specifying to +#' @param f For `split` a vector of length equal to the length of x +#' defining how `x` should be splitted. It is converted internally to +#' a `factor`. +#' +#' @param features For `filterFeatureDefinitions`: either a `integer` +#' specifying the indices of the features (rows) to keep, a `logical` +#' with a length matching the number of rows of `featureDefinitions` +#' or a `character` with the feature (row) names. +#' +#' @param file For `filterFile`: `integer` defining the file index +#' within the object to subset the object by file or `character` +#' specifying the file names to sub set. The indices are expected to be +#' increasingly ordered, if not they are ordered internally. +#' +#' @param i For `[`: `numeric` or `logical` vector specifying to #' which spectra the data set should be reduced. -#' For \code{[[}: a single integer or character. +#' For `[[`: a single integer or character. +#' +#' @param j For `[` and `[[`: not supported. +#' +#' @param keep For `filterChromPeaks`: `logical`, `integer` or `character` +#' defining which chromatographic peaks should be retained. +#' +#' @param keepAdjustedRtime For `filterFile`, `filterMsLevel`, +#' `[`, `split`: `logical(1)` defining whether the adjusted +#' retention times should be kept, even if e.g. features are being removed +#' (and the retention time correction was performed on these features). +#' +#' @param keepFeatures For `filterFile`: `logical(1)` whether +#' correspondence results (feature definitions) should be kept or dropped. +#' Defaults to `keepFeatures = FALSE` hence feature definitions are removed +#' from the returned object by default. +#' +#' @param method For `filterChromPeaks`: `character(1)` allowing to specify the +#' method by which chromatographic peaks should be filtered. Currently only +#' `method = "keep"` is supported (i.e. specify with parameter `keep` which +#' chromatographic peaks should be retained). +#' +#' @param msLevel. For `filterMz`, `filterRt`: `numeric` +#' defining the MS level(s) to which operations should be applied or to +#' which the object should be subsetted. +#' +#' @param mz For `filterMz`: `numeric(2)` defining the lower and upper +#' mz value for the filtering. +#' +#' @param object A [XCMSnExp] object. +#' +#' @param rt For `filterRt`: `numeric(2)` defining the retention time +#' window (lower and upper bound) for the filtering. +#' +#' @param x For `[` and `[[`: an `XCMSnExp` object. +#' +#' @param ... Optional additional arguments. +#' +#' @return All methods return an [XCMSnExp] object. +#' +#' @author Johannes Rainer #' -#' @param j For \code{[} and \code{[[}: not supported. +#' @seealso [XCMSnExp] for base class documentation. #' -#' @param drop For \code{[} and \code{[[}: not supported. +#' @seealso [XChromatograms()] for similar filter functions on +#' `XChromatograms` objects. #' #' @rdname XCMSnExp-filter-methods +#' +#' @md +#' +#' @examples +#' +#' ## Loading a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Subset the dataset to the first and third file. +#' xod_sub <- filterFile(faahko_sub, file = c(1, 3)) +#' +#' ## The number of chromatographic peaks per file for the full object +#' table(chromPeaks(faahko_sub)[, "sample"]) +#' +#' ## The number of chromatographic peaks per file for the subset +#' table(chromPeaks(xod_sub)[, "sample"]) +#' +#' basename(fileNames(faahko_sub)) +#' basename(fileNames(xod_sub)) +#' +#' ## Filter on mz values; chromatographic peaks and features within the +#' ## mz range are retained (as well as adjusted retention times). +#' xod_sub <- filterMz(faahko_sub, mz = c(300, 400)) +#' head(chromPeaks(xod_sub)) +#' nrow(chromPeaks(xod_sub)) +#' nrow(chromPeaks(faahko_sub)) +#' +#' ## Filter on rt values. All chromatographic peaks and features within the +#' ## retention time range are retained. Filtering is performed by default on +#' ## adjusted retention times, if present. +#' xod_sub <- filterRt(faahko_sub, rt = c(2700, 2900)) +#' +#' range(rtime(xod_sub)) +#' head(chromPeaks(xod_sub)) +#' range(chromPeaks(xod_sub)[, "rt"]) +#' +#' nrow(chromPeaks(faahko_sub)) +#' nrow(chromPeaks(xod_sub)) +#' +#' ## Extract a single Spectrum +#' faahko_sub[[4]] +#' +#' ## Subsetting using [ removes all preprocessing results - using +#' ## keepAdjustedRtime = TRUE would keep adjusted retention times, if present. +#' xod_sub <- faahko_sub[fromFile(faahko_sub) == 1] +#' xod_sub +#' +#' ## Using split does also remove preprocessing results, but it supports the +#' ## optional parameter keepAdjustedRtime. +#' ## Split the object into a list of XCMSnExp objects, one per file +#' xod_list <- split(faahko_sub, f = fromFile(faahko_sub)) +#' xod_list setMethod("[", "XCMSnExp", function(x, i, j, ..., drop = TRUE) { if (!missing(j)) stop("subsetting by columns ('j') not supported") @@ -840,43 +1063,29 @@ setMethod("[", "XCMSnExp", function(x, i, j, ..., drop = TRUE) { return(x) else if (!(is.numeric(i) | is.logical(i))) stop("'i' has to be either numeric or logical") + ## Only result we might eventually keep is adjusted rtimes... + newFd <- new("MsFeatureData") + ph <- list() ## Check if we have keepAdjustedRtime as an additional parameter ## in ... keepAdjustedRtime <- list(...)$ke if (is.null(keepAdjustedRtime)) keepAdjustedRtime <- FALSE - if (hasFeatures(x) | hasChromPeaks(x)) { - suppressMessages( - x <- dropFeatureDefinitions(x, keepAdjustedRtime = - keepAdjustedRtime)) - suppressMessages( - x <- dropChromPeaks(x, keepAdjustedRtime = - keepAdjustedRtime)) + if ((hasFeatures(x) || hasChromPeaks(x)) && length(i) > 30) warning("Removed preprocessing results") + if (hasAdjustedRtime(x) && keepAdjustedRtime) { + new_adj <- rtime(x, adjusted = TRUE)[i] + adjustedRtime(newFd) <- + unname(split(new_adj, f = fromFile(x)[i])) + p <- processHistory(x, type = .PROCSTEP.RTIME.CORRECTION) + ph <- p[length(ph)] } - if (hasAdjustedRtime(x)) { - if (keepAdjustedRtime) { - ## Subset the adjusted rtime - new_adj <- rtime(x, adjusted = TRUE)[i] - newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(x@msFeatureData) - adjustedRtime(newFd) <- - unname(split(new_adj, f = fromFile(x)[i])) - lockEnvironment(newFd, bindings = TRUE) - x@msFeatureData <- newFd - } else { - suppressMessages(x <- dropAdjustedRtime(x)) - } - } + lockEnvironment(newFd, bindings = TRUE) + x@msFeatureData <- newFd + x@.processHistory <- ph callNextMethod() }) -#' @description -#' -#' \code{[[} extracts a single \code{\link{Spectrum}} -#' object from an \code{XCMSnExp}. The reported retention time is the -#' adjusted retention time if alignment has been performed on \code{x}. -#' #' @rdname XCMSnExp-filter-methods setMethod("[[", "XCMSnExp", function(x, i, j, drop = FALSE) { @@ -905,7 +1114,7 @@ setMethod("[[", "XCMSnExp", #' #' @param x \code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} #' object. -#' +#' #' @param object \code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} #' object. #' @@ -927,14 +1136,14 @@ setMethod("[[", "XCMSnExp", #' parent class. #' #' @author Johannes Rainer -setMethod("bin", "XCMSnExp", function(object, binSize = 1L, msLevel.) { - if (hasAdjustedRtime(object) | hasFeatures(object) | - hasChromPeaks(object)) { +setMethod("bin", "XCMSnExp", function(x, binSize = 1L, msLevel.) { + if (hasAdjustedRtime(x) | hasFeatures(x) | + hasChromPeaks(x)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") - object <- dropAdjustedRtime(object) - object <- dropFeatureDefinitions(object) - object <- dropChromPeaks(object) + x <- dropAdjustedRtime(x) + x <- dropFeatureDefinitions(x) + x <- dropChromPeaks(x) warning("Removed preprocessing results") } callNextMethod() @@ -967,16 +1176,6 @@ setMethod("clean", "XCMSnExp", function(object, all = FALSE, callNextMethod() }) -#' @description -#' -#' \code{filterMsLevel}: reduces the \code{\link{XCMSnExp}} -#' object to spectra of the specified MS level(s). See -#' \code{\link{filterMsLevel}} documentation for details and -#' examples. Presently, if \code{msLevel.} is provided, the function -#' removes identified chromatographic peaks and correspondence results -#' while keeping adjusted retention times by default (if present). The -#' latter can be changed setting \code{keepAdjustedRtime = FALSE}. -#' #' @rdname XCMSnExp-filter-methods setMethod("filterMsLevel", "XCMSnExp", function(object, msLevel., keepAdjustedRtime = @@ -993,41 +1192,44 @@ setMethod("filterMsLevel", "XCMSnExp", function(object, msLevel., return(res) } - ## In future we might want to keep also the chromatographic peaks of the - ## correct MS level. - if (hasChromPeaks(object)) - warning("Identified chromatographic peaks removed") - if (hasFeatures(object)) - warning("Feature definitions removed") - - ## Create a new empty MsFeatureData and just add adjusted retention times newMfd <- new("MsFeatureData") ph <- processHistory(object) - ## 2) Subset adjusted retention time - ## if (hasAdjustedRtime(object) & length(keep_fts)) { if (hasAdjustedRtime(object)) { if (keepAdjustedRtime) { ## issue #210: keep adjusted retention times if wanted. - ## CAVE: we're still removing the chromatographic peaks at the - ## moment thus we might miss later the peaks and groups on which - ## alignment has been performed (for peak groups method). - keep_by_file <- base::split(keep_logical, fromFile(object)) + keep_by_file <- base::split(keep_logical, as.factor(fromFile(object))) adj_rt <- base::mapply(FUN = function(y, z) { return(y[z]) }, y = adjustedRtime(object, bySample = TRUE), z = keep_by_file, SIMPLIFY = FALSE) adjustedRtime(newMfd) <- adj_rt - ph <- dropProcessHistoriesList(ph, - type = c(.PROCSTEP.PEAK.DETECTION, - .PROCSTEP.PEAK.GROUPING, - .PROCSTEP.PEAK.FILLING, - .PROCSTEP.CALIBRATION)) } else { object <- dropAdjustedRtime(object) ph <- dropProcessHistoriesList(ph, type = .PROCSTEP.RTIME.CORRECTION) } } + if (hasChromPeaks(object)) { + newMfd2 <- .filterChromPeaks( + object@msFeatureData, which(chromPeakData(object)$ms_level %in% + msLevel.)) + if (hasChromPeaks(newMfd2)) { + chromPeaks(newMfd) <- chromPeaks(newMfd2) + chromPeakData(newMfd) <- chromPeakData(newMfd2) + } + if (hasFeatures(newMfd2)) + featureDefinitions(newMfd) <- featureDefinitions(newMfd2) + } + ## Subset processing history + keep_ph <- vapply(ph, function(z) { + if (inherits(z, "XProcessHistory")) { + is_ok <- any(z@msLevel == msLevel.) + if (is.na(is_ok) || is_ok) TRUE + else FALSE + } else TRUE + }, logical(1)) + ph <- ph[keep_ph] + ## Subsetting the object. tmp <- as(object, "OnDiskMSnExp")[base::which(keep_logical)] object <- as(tmp, "XCMSnExp") ## Put the stuff back @@ -1035,8 +1237,8 @@ setMethod("filterMsLevel", "XCMSnExp", function(object, msLevel., lockEnvironment(newMfd, bindings = TRUE) object@msFeatureData <- newMfd object@.processHistory <- ph - if (validObject(object)) - object + validObject(object) + object }) #' @description \code{filterAcquisitionNum}: filters the @@ -1048,7 +1250,7 @@ setMethod("filterMsLevel", "XCMSnExp", function(object, msLevel., #' acquisition numbers of the spectra to which the data set should be #' sub-setted. #' -#' @param file For \code{filterAcquisitionNum}: +#' @param file For \code{filterAcquisitionNum}: #' \code{integer} defining the file index within the object to subset the #' object by file. #' @@ -1066,214 +1268,18 @@ setMethod("filterAcquisitionNum", "XCMSnExp", function(object, n, file) { callNextMethod() }) -#' @aliases XCMSnExp-filter -#' -#' @title XCMSnExp filtering and subsetting -#' -#' @description -#' -#' The methods listed on this page allow to filter and subset -#' \code{\link{XCMSnExp}} objects. Most of them are inherited from the -#' \code{\link{OnDiskMSnExp}} object and have been adapted for -#' \code{\link{XCMSnExp}} to enable subsetting also on the preprocessing -#' results. -#' -#' \code{filterFile}: allows to reduce the -#' \code{\link{XCMSnExp}} to data from only certain files. Identified -#' chromatographic peaks for these files are retained while all eventually -#' present features (peak grouping information) are dropped. By default -#' also adjusted retention times are removed (if present). This can be -#' overwritten by setting \code{keepAdjustedRtime = TRUE}. -#' -#' @details -#' -#' All subsetting methods try to ensure that the returned data is -#' consistent. Correspondence results for example are removed if the data -#' set is sub-setted by file, since the correspondence results are dependent -#' on the files on which correspondence was performed. Thus, some filter -#' and sub-setting methods drop some of the preprocessing results. An -#' exception are the adjusted retention times: most subsetting methods -#' support the argument \code{keepAdjustedRtime} (even the \code{[} method) -#' that forces the adjusted retention times to be retained even if the -#' default would be to drop them. -#' -#' @note -#' -#' The \code{filterFile} method removes also process history steps not -#' related to the files to which the object should be sub-setted and updates -#' the \code{fileIndex} attribute accordingly. Also, the method does not -#' allow arbitrary ordering of the files or re-ordering of the files within -#' the object. -#' -#' Note also that most of the filtering methods, and also the subsetting -#' operations \code{[} drop all or selected preprocessing results. To -#' consolidate the alignment results, i.e. ensure that adjusted retention -#' times are always preserved, use the \code{\link{applyAdjustedRtime}} -#' function on the object that contains the alignment results. This replaces -#' the raw retention times with the adjusted ones. -#' -#' @param object A \code{\link{XCMSnExp}} object. -#' -#' @param file For \code{filterFile}: \code{integer} defining the file index -#' within the object to subset the object by file or \code{character} -#' specifying the file names to sub set. The indices are expected to be -#' increasingly ordered, if not they are ordered internally. -#' -#' @param keepAdjustedRtime For \code{filterFile}, \code{filterMsLevel}, -#' \code{[} \code{split}: \code{logical(1)} defining whether the adjusted -#' retention times should be kept, even if e.g. features are being removed -#' (and the retention time correction was performed on these features). -#' -#' @return All methods return an \code{\link{XCMSnExp}} object. -#' -#' @author Johannes Rainer -#' -#' @seealso \code{\link{XCMSnExp}} for base class documentation. -#' #' @rdname XCMSnExp-filter-methods -#' -#' @examples -#' -#' ## Load some of the files from the faahKO package. -#' library(faahKO) -#' fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) -#' ## Read the files -#' od <- readMSData(fs, mode = "onDisk") -#' -#' ## Perform peak detection on them using the matched filter algorithm. Note -#' ## that we use a large value for binSize to reduce the runtime of the -#' ## example code. -#' mfp <- MatchedFilterParam(binSize = 5) -#' xod <- findChromPeaks(od, param = mfp) -#' -#' ## Subset the dataset to the first and third file. -#' xod_sub <- filterFile(xod, file = c(1, 3)) -#' -#' ## The number of chromatographic peaks per file for the full object -#' table(chromPeaks(xod)[, "sample"]) -#' -#' ## The number of chromatographic peaks per file for the subset -#' table(chromPeaks(xod_sub)[, "sample"]) -#' -#' basename(fileNames(xod)) -#' basename(fileNames(xod_sub)) -#' -#' ## Filter on mz values; chromatographic peaks and features within the -#' ## mz range are retained (as well as adjusted retention times). -#' xod_sub <- filterMz(xod, mz = c(300, 400)) -#' head(chromPeaks(xod_sub)) -#' nrow(chromPeaks(xod_sub)) -#' nrow(chromPeaks(xod)) -#' -#' ## Filter on rt values. All chromatographic peaks and features within the -#' ## retention time range are retained. Filtering is performed by default on -#' ## adjusted retention times, if present. -#' xod_sub <- filterRt(xod, rt = c(2700, 2900)) -#' -#' range(rtime(xod_sub)) -#' head(chromPeaks(xod_sub)) -#' range(chromPeaks(xod_sub)[, "rt"]) -#' -#' nrow(chromPeaks(xod)) -#' nrow(chromPeaks(xod_sub)) -#' -#' ## Extract a single Spectrum -#' xod[[4]] -#' -#' ## Subsetting using [ removes all preprocessing results - using -#' ## keepAdjustedRtime = TRUE would keep adjusted retention times, if present. -#' xod_sub <- xod[fromFile(xod) == 1] -#' xod_sub -#' -#' ## Using split does also remove preprocessing results, but it supports the -#' ## optional parameter keepAdjustedRtime. -#' ## Split the object into a list of XCMSnExp objects, one per file -#' xod_list <- split(xod, f = fromFile(xod)) -#' xod_list -setMethod("filterFile", "XCMSnExp", function(object, file, - keepAdjustedRtime = FALSE) { - if (missing(file)) return(object) - if (is.character(file)) { - file <- base::match(file, basename(fileNames(object))) - } - ## This will not work if we want to get the files in a different - ## order (i.e. c(3, 1, 2, 5)) - file <- base::sort(unique(file)) - ## Error checking - seems that's not performed downstream. - if (!all(file %in% 1:length(fileNames(object)))) - stop("'file' has to be within 1 and the number of files in the object!") - ## Drop features - has_features <- hasFeatures(object) - has_chrom_peaks <- hasChromPeaks(object) - has_adj_rt <- hasAdjustedRtime(object) - if (has_features) { - message("Correspondence results (features) removed.") - object <- dropFeatureDefinitions(object, - keepAdjustedRtime = keepAdjustedRtime) - } - if (has_adj_rt & !keepAdjustedRtime){ - object <- dropAdjustedRtime(object) - has_adj_rt <- FALSE - } - ## Extracting all the XCMSnExp data from the object. - ph <- processHistory(object) - newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) - ## Subset original data: - suppressWarnings( - object <- callNextMethod(object = object, file = file) - ) - ## Subset the results per file: - if (has_adj_rt) { - adjustedRtime(newFd) <- adjustedRtime(newFd)[file] - } - if (has_chrom_peaks) { - pks <- chromPeaks(newFd) - pks <- pks[pks[, "sample"] %in% file, , drop = FALSE] - pks[, "sample"] <- match(pks[, "sample"], file) - chromPeaks(newFd) <- pks - } - ## Remove ProcessHistory not related to any of the files. - if (length(ph)) { - kp <- unlist(lapply(ph, function(z) { - any(fileIndex(z) %in% file) - })) - ph <- ph[kp] - } - ## Update file index in process histories. - if (length(ph)) { - ph <- lapply(ph, function(z) { - updateFileIndex(z, old = file, new = 1:length(file)) - }) - } - lockEnvironment(newFd, bindings = TRUE) - object@msFeatureData <- newFd - object@.processHistory <- ph - if (validObject(object)) +setMethod( + "filterFile", "XCMSnExp", + function(object, file, keepAdjustedRtime = hasAdjustedRtime(object), + keepFeatures = FALSE) { + object <- .filter_file_XCMSnExp(object, file = file, + keepAdjustedRtime = keepAdjustedRtime, + keepFeatures = keepFeatures) + validObject(object) object -}) + }) -#' @description -#' -#' \code{filterMz}: filters the data set based on the -#' provided mz value range. All chromatographic peaks and features (grouped -#' peaks) falling completely within the provided mz value range are retained -#' (if their minimal mz value is \code{>= mz[1]} and the maximal mz value -#' \code{<= mz[2]}. Adjusted retention times, if present, are not altered by -#' the filtering. -#' -#' @param mz For \code{filterMz}: \code{numeric(2)} defining the lower and upper -#' mz value for the filtering. -#' -#' @param msLevel. For \code{filterMz}, \code{filterRt}, \code{numeric(1)} -#' defining the MS level(s) to which operations should be applied or to -#' which the object should be subsetted. See \code{\link{filterMz}} -#' for more details -#' -#' @param ... Optional additional arguments. -#' #' @rdname XCMSnExp-filter-methods setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { if (missing(mz)) @@ -1285,40 +1291,16 @@ setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { object <- callNextMethod() # just adds to processing queue. if (hasChromPeaks(object)) { - fts <- chromPeaks(object) - keepIdx <- which(fts[, "mzmin"] >= mz[1] & fts[, "mzmax"] <= mz[2]) + pks <- chromPeaks(object) + keepIdx <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2]) newE <- .filterChromPeaks(object@msFeatureData, idx = keepIdx) lockEnvironment(newE, bindings = TRUE) object@msFeatureData <- newE } - if (validObject(object)) - return(object) + validObject(object) + object }) -#' @description -#' -#' \code{filterRt}: filters the data set based on the -#' provided retention time range. All chromatographic peaks and features -#' (grouped peaks) the specified retention time window are retained (i.e. if -#' the retention time corresponding to the peak's apex is within the -#' specified rt range). If retention time correction has been performed, -#' the method will by default filter the object by adjusted retention times. -#' The argument \code{adjusted} allows to specify manually whether filtering -#' should be performed by raw or adjusted retention times. Filtering by -#' retention time does not drop any preprocessing results nor does it remove -#' or change alignment results (i.e. adjusted retention times). -#' The method returns an empty object if no spectrum or feature is within -#' the specified retention time range. -#' -#' @param rt For \code{filterRt}: \code{numeric(2)} defining the retention time -#' window (lower and upper bound) for the filtering. -#' -#' @param adjusted For \code{filterRt}: \code{logical} indicating whether the -#' object should be filtered by original (\code{adjusted = FALSE}) or -#' adjusted retention times (\code{adjusted = TRUE}). -#' For \code{spectra}: whether the retention times in the individual -#' \code{Spectrum} objects should be the adjusted or raw retention times. -#' #' @rdname XCMSnExp-filter-methods setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., adjusted = hasAdjustedRtime(object)) { @@ -1353,7 +1335,6 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., } ## Extract the stuff we want to keep - ## mfd <- as(.copy_env(object@msFeatureData), "MsFeatureData") newMfd <- new("MsFeatureData") ph <- processHistory(object) ## 1) Subset peaks within the retention time range and peak groups. @@ -1370,8 +1351,7 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., } keep_fts <- base::which(ftrt >= rt[1] & ftrt <= rt[2]) if (length(keep_fts)) - newMfd <- .filterChromPeaks(object, idx = keep_fts) - ## features(newMfd) <- features(object)[keep_fts, , drop = FALSE] + newMfd <- .filterChromPeaks(object@msFeatureData, idx = keep_fts) else ph <- dropProcessHistoriesList(ph, type = c(.PROCSTEP.PEAK.DETECTION, @@ -1384,7 +1364,7 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., if (hasAdjustedRtime(object)) { ## Subset the adjusted retention times (which are stored as a list of ## rts by file): - keep_by_file <- base::split(keep_logical, fromFile(object)) + keep_by_file <- base::split(keep_logical, as.factor(fromFile(object))) adj_rt <- base::mapply(FUN = function(y, z) { return(y[z]) }, y = adjustedRtime(object, bySample = TRUE), z = keep_by_file, @@ -1401,8 +1381,7 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., lockEnvironment(newMfd, bindings = TRUE) object@msFeatureData <- newMfd object@.processHistory <- ph - if (validObject(object)) - return(object) + object }) @@ -1426,11 +1405,8 @@ setMethod("normalize", "XCMSnExp", function(object, method = c("max", "sum"), ...) { if (hasAdjustedRtime(object) | hasFeatures(object) | hasChromPeaks(object)) { - ## object@.processHistory <- list() - ## object@msFeatureData <- new("MsFeatureData") - object <- dropAdjustedRtime(object) - object <- dropFeatureDefinitions(object) - object <- dropChromPeaks(object) + object@.processHistory <- list() + object@msFeatureData <- new("MsFeatureData") warning("Removed preprocessing results") } callNextMethod() @@ -1458,11 +1434,8 @@ setMethod("pickPeaks", "XCMSnExp", function(object, halfWindowSize = 3L, SNR = 0L, ...) { if (hasAdjustedRtime(object) | hasFeatures(object) | hasChromPeaks(object)) { - ## object@.processHistory <- list() - ## object@msFeatureData <- new("MsFeatureData") - object <- dropAdjustedRtime(object) - object <- dropFeatureDefinitions(object) - object <- dropChromPeaks(object) + object@.processHistory <- list() + object@msFeatureData <- new("MsFeatureData") warning("Removed preprocessing results") } callNextMethod() @@ -1486,11 +1459,8 @@ setMethod("removePeaks", "XCMSnExp", function(object, t = "min", verbose = FALSE msLevel.) { if (hasAdjustedRtime(object) | hasFeatures(object) | hasChromPeaks(object)) { - ## object@.processHistory <- list() - ## object@msFeatureData <- new("MsFeatureData") - object <- dropAdjustedRtime(object) - object <- dropFeatureDefinitions(object) - object <- dropChromPeaks(object) + object@.processHistory <- list() + object@msFeatureData <- new("MsFeatureData") warning("Removed preprocessing results") } callNextMethod() @@ -1509,68 +1479,88 @@ setMethod("smooth", "XCMSnExp", function(x, method = c("SavitzkyGolay", ...) { if (hasAdjustedRtime(x) | hasFeatures(x) | hasChromPeaks(x)) { - ## x@.processHistory <- list() - ## x@msFeatureData <- new("MsFeatureData") - x <- dropAdjustedRtime(x) - x <- dropFeatureDefinitions(x) - x <- dropChromPeaks(x) + x@.processHistory <- list() + x@msFeatureData <- new("MsFeatureData") warning("Removed preprocessing results") } callNextMethod() }) #' @aliases setAs -#' +#' #' @rdname XCMSnExp-class #' #' @name XCMSnExp-class setAs(from = "XCMSnExp", to = "xcmsSet", def = .XCMSnExp2xcmsSet) +#' @rdname XCMSnExp-peak-grouping-results +setMethod("quantify", "XCMSnExp", function(object, ...) { + .XCMSnExp2SummarizedExperiment(object, ...) +}) #' @title Peak grouping/correspondence based on time dimension peak densities #' #' @description #' -#' \code{groupChromPeaks,XCMSnExp,PeakDensityParam}: +#' `groupChromPeaks,XCMSnExp,PeakDensityParam`: #' performs correspondence (peak grouping within and across samples) within #' in mz dimension overlapping slices of MS data based on the density #' distribution of the identified chromatographic peaks in the slice along #' the time axis. #' -#' @note -#' -#' Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause -#' all eventually present previous correspondence results to be dropped. +#' The correspondence analysis can be performed on chromatographic peaks of +#' any MS level (if present and if chromatographic peak detection has been +#' performed for that MS level) defining features combining these peaks. The +#' MS level can be selected with the parameter `msLevel`. By default, calling +#' `groupChromPeaks` will remove any previous correspondence results. This can +#' be disabled with `add = TRUE`, which will add newly defined features to +#' already present feature definitions. #' -#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object +#' @param object For `groupChromPeaks`: an [XCMSnExp] object #' containing the results from a previous peak detection analysis (see -#' \code{\link{findChromPeaks}}). +#' [findChromPeaks()]). +#' +#' For all other methods: a `PeakDensityParam` object. #' -#' For all other methods: a \code{PeakDensityParam} object. -#' -#' @param param A \code{PeakDensityParam} object containing all settings for +#' @param param A `PeakDensityParam` object containing all settings for #' the peak grouping algorithm. #' +#' @param msLevel `integer(1)` (default `msLevel = 1L`) defining the MS level +#' on which the correspondence should be performed. It is required that +#' chromatographic peaks of the respective MS level are present. +#' +#' @param add `logical(1)` (default `add = FALSE`) allowing to perform an +#' additional round of correspondence (e.g. on a different MS level) and +#' add features to the already present feature definitions. +#' #' @return #' -#' For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the +#' For `groupChromPeaks`: a [XCMSnExp] object with the #' results of the correspondence analysis. The definition of the resulting -#' mz-rt features can be accessed with the \code{\link{featureDefinitions}} -#' method. -#' -#' @seealso \code{\link{XCMSnExp}} for the object containing the results of -#' the correspondence. -#' +#' mz-rt features can be accessed with the [featureDefinitions()] method +#' +#' @seealso +#' +#' [XCMSnExp] for the object containing the results of the correspondence. +#' +#' [plotChromPeakDensity()] for plotting chromatographic peak density with the +#' possibility to test different parameter settings. +#' +#' @md +#' #' @rdname groupChromPeaks-density setMethod("groupChromPeaks", signature(object = "XCMSnExp", param = "PeakDensityParam"), - function(object, param) { - if (!hasChromPeaks(object)) - stop("No chromatographic peak detection results in 'object'! ", - "Please perform first a peak detection using the ", - "'findChromPeaks' method.") - ## Get rid of any previous results. - if (hasFeatures(object)) + function(object, param, msLevel = 1L, add = FALSE) { + if (length(msLevel) != 1) + stop("Can only perform the correspondence analysis on one MS", + " level at a time. Please repeat for other MS levels ", + "with parameter `add = TRUE`.") + if (!hasChromPeaks(object, msLevel)) + stop("No chromatographic peak for MS level ", msLevel, + " present. Please perform first a peak detection ", + "using the 'findChromPeaks' method.", call. = FALSE) + if (hasFeatures(object) && !add) object <- dropFeatureDefinitions(object) ## Check if we've got any sample groups: if (length(sampleGroups(param)) == 0) { @@ -1582,31 +1572,49 @@ setMethod("groupChromPeaks", if (length(sampleGroups(param)) != length(fileNames(object))) stop("The 'sampleGroups' value in the provided 'param' ", "class does not match the number of available files/", - "samples!") + "samples!", call. = FALSE) } + if (hasChromPeaks(object) && !.has_chrom_peak_data(object)) + object <- updateObject(object) + if (hasFeatures(object) && + !any(colnames(featureDefinitions(object)) == "ms_level")) + object <- updateObject(object) startDate <- date() - res <- do_groupChromPeaks_density(chromPeaks(object), - sampleGroups = sampleGroups(param), - bw = bw(param), - minFraction = minFraction(param), - minSamples = minSamples(param), - binSize = binSize(param), - maxFeatures = maxFeatures(param)) + res <- do_groupChromPeaks_density( + chromPeaks(object, msLevel = msLevel), + sampleGroups = sampleGroups(param), + bw = bw(param), + minFraction = minFraction(param), + minSamples = minSamples(param), + binSize = binSize(param), + maxFeatures = maxFeatures(param)) xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.GROUPING, - fileIndex = 1:length(fileNames(object))) - object <- addProcessHistory(object, xph) + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, xph) ## Add the results. - df <- DataFrame(res$featureDefinitions) - if (nrow(df) == 0) - stop("Unable to group any chromatographic peaks. You might ", - "have to adapt your settings.") - df$peakidx <- res$peakIndex - if (nrow(df) > 0) + df <- DataFrame(res) + if (!nrow(df)) { + warning("Unable to group any chromatographic peaks. ", + "You might have to adapt your settings.") + return(object) + } + df$ms_level <- as.integer(msLevel) + if (!all(chromPeakData(object)$ms_level %in% msLevel)) + df <- .update_feature_definitions( + df, rownames(chromPeaks(object, msLevel = msLevel)), + rownames(chromPeaks(object))) + if (hasFeatures(object)) { + startFrom <- max(as.integer( + sub("FT", "", rownames(featureDefinitions(object))))) + 1 + rownames(df) <- .featureIDs(nrow(df), from = startFrom) + df <- rbindFill(featureDefinitions(object), df) + } else rownames(df) <- .featureIDs(nrow(df)) featureDefinitions(object) <- df - if (validObject(object)) - return(object) + validObject(object) + object }) @@ -1614,41 +1622,47 @@ setMethod("groupChromPeaks", #' #' @description #' -#' \code{groupChromPeaks,XCMSnExp,MzClustParam}: -#' performs high resolution peak grouping for single spectrum -#' metabolomics data. +#' `groupChromPeaks,XCMSnExp,MzClustParam`: performs high resolution peak +#' grouping for single spectrum metabolomics data. #' -#' @note Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause +#' @note Calling `groupChromPeaks` on an `XCMSnExp` object will cause #' all eventually present previous correspondence results to be dropped. #' -#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object -#' containing the results from a previous chromatographic peak detection -#' analysis (see \code{\link{findChromPeaks}}). +#' @param object For `groupChromPeaks`: an [XCMSnExp] object containing the +#' results from a previous chromatographic peak detection analysis (see +#' [findChromPeaks()]). #' -#' For all other methods: a \code{MzClustParam} object. -#' -#' @param param A \code{MzClustParam} object containing all settings for +#' For all other methods: a `MzClustParam` object. +#' +#' @param param A `MzClustParam` object containing all settings for #' the peak grouping algorithm. #' +#' @param msLevel `integer(1)` defining the MS level. Currently only MS level +#' 1 is supported. +#' #' @return #' -#' For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the -#' results of the peak grouping step (i.e. the features). These can be -#' accessed with the \code{\link{featureDefinitions}} method. -#' -#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' For `groupChromPeaks`: a [XCMSnExp] object with the results of the peak +#' grouping step (i.e. the features). These can be accessed with the +#' [featureDefinitions()] method. +#' +#' @seealso [XCMSnExp] for the object containing the results of #' the peak grouping. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-mzClust setMethod("groupChromPeaks", signature(object = "XCMSnExp", param = "MzClustParam"), - function(object, param) { + function(object, param, msLevel = 1L) { if (!hasChromPeaks(object)) stop("No chromatographic peak detection results in 'object'! ", "Please perform first a peak detection using the ", "'findChromPeak' method.") + if (any(msLevel != 1)) + stop("Currently peak grouping is only supported for MS level 1") ## I'm expecting a single spectrum per file! - rtL <- split(rtime(object), f = fromFile(object)) + rtL <- split(rtime(object), f = as.factor(fromFile(object))) if (any(lengths(rtL) > 1)) stop("'object' contains multiple spectra per sample! This ", "algorithm does only work for single spectra ", @@ -1668,8 +1682,10 @@ setMethod("groupChromPeaks", "class does not match the number of available files/", "samples!") } + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) startDate <- date() - res <- do_groupPeaks_mzClust(chromPeaks(object), + res <- do_groupPeaks_mzClust(chromPeaks(object, msLevel = msLevel), sampleGroups = sampleGroups(param), ppm = ppm(param), absMz = absMz(param), @@ -1677,19 +1693,24 @@ setMethod("groupChromPeaks", minSamples = minSamples(param)) xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.GROUPING, - fileIndex = 1:length(fileNames(object))) - object <- addProcessHistory(object, xph) + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, xph) ## Add the results. df <- DataFrame(res$featureDefinitions) df$peakidx <- res$peakIndex if (nrow(df) == 0) stop("Unable to group any chromatographic peaks. You might ", "have to adapt your settings.") + if (!all(chromPeakData(object)$ms_level %in% msLevel)) + df <- .update_feature_definitions( + df, rownames(chromPeaks(object, msLevel = msLevel)), + rownames(chromPeaks(object))) if (nrow(df) > 0) rownames(df) <- .featureIDs(nrow(df)) featureDefinitions(object) <- df - if (validObject(object)) - return(object) + validObject(object) + object }) @@ -1697,44 +1718,59 @@ setMethod("groupChromPeaks", #' #' @description #' -#' \code{groupChromPeaks,XCMSnExp,NearestPeaksParam}: +#' `groupChromPeaks,XCMSnExp,NearestPeaksParam`: #' performs peak grouping based on the proximity between chromatographic #' peaks from different samples in the mz-rt range. #' -#' @note +#' The correspondence analysis can be performed on chromatographic peaks of +#' any MS level (if present and if chromatographic peak detection has been +#' performed for that MS level) defining features combining these peaks. The +#' MS level can be selected with the parameter `msLevel`. By default, calling +#' `groupChromPeaks` will remove any previous correspondence results. This can +#' be disabled with `add = TRUE`, which will add newly defined features to +#' already present feature definitions. #' -#' Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause -#' all eventually present previous alignment results to be dropped. +#' @param object For `groupChromPeaks`: an [XCMSnExp] object containing the +#' results from a previous chromatographic peak detection +#' analysis (see [findChromPeaks()]). #' -#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object -#' containing the results from a previous chromatographic peak detection -#' analysis (see \code{\link{findChromPeaks}}). +#' For all other methods: a `NearestPeaksParam` object. #' -#' For all other methods: a \code{NearestPeaksParam} object. -#' -#' @param param A \code{NearestPeaksParam} object containing all settings for -#' the peak grouping algorithm. +#' @param msLevel `integer(1)` defining the MS level on which the correspondence +#' should be performed. It is required that chromatographic peaks of the +#' respective MS level are present. +#' +#' @param add `logical(1)` (default `add = FALSE`) allowing to perform an +#' additional round of correspondence (e.g. on a different MS level) and +#' add features to the already present feature definitions. +#' +#' @param msLevel `integer(1)` defining the MS level. Currently only MS level +#' 1 is supported. #' #' @return #' -#' For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the -#' results of the peak grouping/correspondence step (i.e. the mz-rt -#' features). These can be accessed with the -#' \code{\link{featureDefinitions}} method. -#' -#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' For `groupChromPeaks`: a [XCMSnExp] object with the results of the peak +#' grouping/correspondence step (i.e. the mz-rt features). These can be +#' accessed with the [featureDefinitions()] method. +#' +#' @seealso [XCMSnExp] for the object containing the results of #' the peak grouping. -#' +#' +#' @md +#' #' @rdname groupChromPeaks-nearest setMethod("groupChromPeaks", signature(object = "XCMSnExp", param = "NearestPeaksParam"), - function(object, param) { - if (!hasChromPeaks(object)) - stop("No chromatographic peak detection results in 'object'! ", - "Please perform first a peak detection using the ", - "'findChromPeaks' method.") - ## Get rid of any previous results. - if (hasFeatures(object)) + function(object, param, msLevel = 1L, add = FALSE) { + if (length(msLevel) != 1) + stop("Can only perform the correspondence analysis on one MS", + " level at a time. Please repeat for other MS levels ", + "with parameter `add = TRUE`.") + if (!hasChromPeaks(object, msLevel)) + stop("No chromatographic peak for MS level ", msLevel, + " present. Please perform first a peak detection ", + "using the 'findChromPeaks' method.", call. = FALSE) + if (hasFeatures(object) && !add) object <- dropFeatureDefinitions(object) ## Check if we've got any sample groups: if (length(sampleGroups(param)) == 0) { @@ -1748,28 +1784,47 @@ setMethod("groupChromPeaks", "class does not match the number of available files/", "samples!") } + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) + if (hasFeatures(object) && + !any(colnames(featureDefinitions(object)) == "ms_level")) + object <- updateObject(object) startDate <- date() - res <- do_groupChromPeaks_nearest(chromPeaks(object), - sampleGroups = sampleGroups(param), - mzVsRtBalance = mzVsRtBalance(param), - absMz = absMz(param), - absRt = absRt(param), - kNN = kNN(param)) + res <- do_groupChromPeaks_nearest( + chromPeaks(object, msLevel = msLevel), + sampleGroups = sampleGroups(param), + mzVsRtBalance = mzVsRtBalance(param), + absMz = absMz(param), + absRt = absRt(param), + kNN = kNN(param)) xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.GROUPING, - fileIndex = 1:length(fileNames(object))) + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) object <- addProcessHistory(object, xph) ## Add the results. df <- DataFrame(res$featureDefinitions) + if (!nrow(df)) { + warning("Unable to group any chromatographic peaks. ", + "You might have to adapt your settings.") + return(object) + } df$peakidx <- res$peakIndex - if (nrow(df) == 0) - stop("Unable to group any chromatographic peaks. You might ", - "have to adapt your settings.") - if (nrow(df) > 0) + df$ms_level <- as.integer(msLevel) + if (!all(chromPeakData(object)$ms_level %in% msLevel)) + df <- .update_feature_definitions( + df, rownames(chromPeaks(object, msLevel = msLevel)), + rownames(chromPeaks(object))) + if (hasFeatures(object)) { + startFrom <- max(as.integer( + sub("FT", "", rownames(featureDefinitions(object))))) + 1 + rownames(df) <- .featureIDs(nrow(df), from = startFrom) + df <- rbind(featureDefinitions(object), df) + } else rownames(df) <- .featureIDs(nrow(df)) featureDefinitions(object) <- df - if (validObject(object)) - return(object) + validObject(object) + object }) #' @title Retention time correction based on alignment of house keeping peak @@ -1803,10 +1858,13 @@ setMethod("groupChromPeaks", #' \code{\link{groupChromPeaks}}). #' #' For all other methods: a \code{PeakGroupsParam} object. -#' +#' #' @param param A \code{PeakGroupsParam} object containing all settings for #' the retention time correction method.. #' +#' @param msLevel \code{integer(1)} specifying the MS level. Currently only MS +#' level 1 is supported. +#' #' @return #' #' For \code{adjustRtime}: a \code{\link{XCMSnExp}} object with the @@ -1816,18 +1874,20 @@ setMethod("groupChromPeaks", #' peaks (accessed \emph{via} \code{\link{chromPeaks}}. Note that retention #' time correction drops all previous alignment results from the result #' object. -#' +#' #' @seealso \code{\link{XCMSnExp}} for the object containing the results of #' the alignment. -#' +#' #' @rdname adjustRtime-peakGroups setMethod("adjustRtime", signature(object = "XCMSnExp", param = "PeakGroupsParam"), - function(object, param) { + function(object, param, msLevel = 1L) { if (hasAdjustedRtime(object)) { message("Removing previous alignment results") object <- dropAdjustedRtime(object) } + if (any(msLevel != 1)) + stop("Alignment is currently only supported for MS level 1") if (!hasChromPeaks(object)) stop("No chromatographic peak detection results in 'object'! ", "Please perform first a peak detection using the ", @@ -1836,6 +1896,8 @@ setMethod("adjustRtime", stop("No feature definitions found in 'object'! Please ", "perform first a peak grouping using the ", "'groupChromPeak' method.") + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) startDate <- date() ## If param does contain a peakGroupsMatrix extract that one, ## otherwise generate it. @@ -1844,15 +1906,19 @@ setMethod("adjustRtime", else pkGrpMat <- adjustRtimePeakGroups(object, param = param) res <- do_adjustRtime_peakGroups( - chromPeaks(object), - peakIndex = featureDefinitions(object)$peakidx, + chromPeaks(object, msLevel = msLevel), + peakIndex = .update_feature_definitions( + featureDefinitions(object), rownames(chromPeaks(object)), + rownames(chromPeaks(object, msLevel = msLevel)))$peakidx, rtime = rtime(object, bySample = TRUE), minFraction = minFraction(param), extraPeaks = extraPeaks(param), smooth = smooth(param), span = span(param), family = family(param), - peakGroupsMatrix = pkGrpMat + peakGroupsMatrix = pkGrpMat, + subset = subset(param), + subsetAdjust = subsetAdjust(param) ) ## Add the pkGrpMat that's being used to the param object. peakGroupsMatrix(param) <- pkGrpMat @@ -1873,10 +1939,10 @@ setMethod("adjustRtime", xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.RTIME.CORRECTION, fileIndex = 1:length(fileNames(object)), - msLevel = msLevel(ph[[length(ph)]])) + msLevel = msLevel) object <- addProcessHistory(object, xph) - if (validObject(object)) - object + validObject(object) + object }) @@ -1894,7 +1960,7 @@ setMethod("adjustRtime", #' of on MS level. Retention times for spectra of other MS levels are #' subsequently adjusted based on the adjustment function defined on the #' retention times of the spectra of MS level \code{msLevel}. -#' +#' #' Calling \code{adjustRtime} on an \code{XCMSnExp} object will cause #' all peak grouping (correspondence) results and any previous retention #' time adjustment results to be dropped. @@ -1902,13 +1968,13 @@ setMethod("adjustRtime", #' @param object For \code{adjustRtime}: an \code{\link{XCMSnExp}} object. #' #' For all other methods: a \code{ObiwarpParam} object. -#' +#' #' @param param A \code{ObiwarpParam} object containing all settings for #' the alignment method. #' #' @param msLevel \code{integer} defining the MS level on which the retention #' time should be performed. -#' +#' #' @return #' #' For \code{adjustRtime,XCMSnExp,ObiwarpParam}: a @@ -1923,7 +1989,7 @@ setMethod("adjustRtime", #' For \code{adjustRtime,OnDiskMSnExp,ObiwarpParam}: a \code{numeric} with #' the adjusted retention times per spectra (in the same order than #' \code{rtime}). -#' +#' #' @seealso \code{\link{XCMSnExp}} for the object containing the results of #' the alignment. #' @@ -1931,7 +1997,7 @@ setMethod("adjustRtime", #' John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of #' ESI-LC-MS Proteomic Data Sets by Ordered Bijective Interpolated Warping" #' \emph{Anal. Chem.} 2006, 78 (17), 6140-6152. -#' +#' #' @rdname adjustRtime-obiwarp setMethod("adjustRtime", signature(object = "XCMSnExp", param = "ObiwarpParam"), @@ -1939,7 +2005,10 @@ setMethod("adjustRtime", ## Drop adjusted retention times if there are some. if (hasAdjustedRtime(object)) object <- dropAdjustedRtime(object) - ## We don't require any detected or aligned peaks. + if (any(msLevel != 1)) + stop("Alignment is currently only supported for MS level 1") + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) startDate <- date() res <- adjustRtime(as(object, "OnDiskMSnExp"), param = param, msLevel = msLevel) @@ -1949,15 +2018,16 @@ setMethod("adjustRtime", ## Add the results. adjustedRtime<- should also fix the retention ## times for the peaks! Want to keep also the latest alignment ## information - adjustedRtime(object) <- unname(split(res, fromFile(object))) + adjustedRtime(object) <- unname( + split(res, as.factor(fromFile(object)))) ## Add the process history step. xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.RTIME.CORRECTION, fileIndex = 1:length(fileNames(object)), msLevel = msLevel) object <- addProcessHistory(object, xph) - if (validObject(object)) - object + validObject(object) + object }) #' @rdname XCMSnExp-class @@ -1979,9 +2049,9 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' @aliases featureValues -#' +#' #' @title Accessing mz-rt feature data values -#' +#' #' @description #' #' \code{featureValues,XCMSnExp} : extract a \code{matrix} for @@ -1991,6 +2061,18 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' chromatographic peaks from the same sample can be assigned to a feature. #' Parameter \code{method} allows to specify the method to be used in such #' cases to chose from which of the peaks the value should be returned. +#' Parameter `msLevel` allows to choose a specific MS level for which feature +#' values should be returned (given that features have been defined for that MS +#' level). +#' +#' \code{quantify,XCMSnExp}: return the preprocessing results as an +#' \code{\link{SummarizedExperiment}} object containing the feature abundances +#' as assay matrix, the feature definitions (returned by +#' \code{\link{featureDefinitions}}) as \code{rowData} and the phenotype +#' information as \code{colData}. This is an ideal container for further +#' processing of the data. Internally, the \code{\link{featureValues}} method +#' is used to extract the feature abundances, parameters for that method can +#' be passed to \code{quantify} with \code{...}. #' #' @note #' @@ -1998,10 +2080,10 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' \code{xcmsSet} objects. Note that \code{missing = 0} should be used to #' get the same behaviour as \code{groupval}, i.e. report missing values as 0 #' after a call to \code{fillPeaks}. -#' +#' #' @param object A \code{\link{XCMSnExp}} object providing the feature #' definitions. -#' +#' #' @param method \code{character} specifying the method to resolve #' multi-peak mappings within the same sample, i.e. to define the #' \emph{representative} peak for a feature in samples where more than @@ -2012,11 +2094,10 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' \code{"maxo"}. #' #' @param value \code{character} specifying the name of the column in -#' \code{chromPeaks(object)} that should be returned or \code{"index"} (the -#' default) to return the index of the peak in the -#' \code{chromPeaks(object)} matrix corresponding to the -#' \emph{representative} peak for the feature in the respective sample. -#' To return the integrated peak area use \code{value = "into"}. +#' \code{chromPeaks(object)} that should be returned. Defaults to +#' \code{"into"} in which case the integrated peak area is returned. To +#' get the index of the peak in the \code{chromPeaks(object)} matrix use +#' \code{"index"}. #' #' @param intensity \code{character} specifying the name of the column in the #' \code{chromPeaks(objects)} matrix containing the intensity value of the @@ -2032,7 +2113,14 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' \code{NA} (the default), a \code{numeric} or #' \code{missing = "rowmin_half"}. The latter replaces any \code{NA} with #' half of the row's minimal (non-missing) value. -#' +#' +#' @param msLevel for `featureValues`: `integer` defining the MS level(s) for +#' which feature values should be returned. By default, values for features +#' defined for all MS levels are returned. +#' +#' @param ... For \code{quantify}: additional parameters to be passed on to the +#' \code{\link{featureValues}} method. +#' #' @return #' #' For \code{featureValues}: a \code{matrix} with @@ -2042,115 +2130,131 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, #' \code{matrix} are the same than those of the \code{featureDefinitions} #' \code{DataFrame}. \code{NA} is reported for features without #' corresponding chromatographic peak in the respective sample(s). -#' +#' +#' For \code{quantify}: a \code{\link{SummarizedExperiment}} representing +#' the preprocessing results. +#' #' @author Johannes Rainer -#' +#' #' @seealso #' \code{\link{XCMSnExp}} for information on the data object. +#' #' \code{\link{featureDefinitions}} to extract the \code{DataFrame} with the #' feature definitions. +#' +#' \code{\link{featureChromatograms}} to extract ion chromatograms for each +#' feature. +#' #' \code{\link{hasFeatures}} to evaluate whether the #' \code{\link{XCMSnExp}} provides feature definitions. +#' #' \code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. -#' +#' #' @rdname XCMSnExp-peak-grouping-results -setMethod("featureValues", - signature(object = "XCMSnExp"), - function(object, method = c("medret", "maxint", "sum"), - value = "index", intensity = "into", filled = TRUE, - missing = NA) { - ## Input argument checkings - if (!hasFeatures(object)) - stop("No peak groups present! Use 'groupChromPeaks' first.") - if (!hasChromPeaks(object)) - stop("No detected chromatographic peaks present! Use ", - "'findChromPeaks' first.") - method <- match.arg(method) - if (method == "sum" & !(value %in% c("into", "maxo"))) - stop("method 'sum' is only allowed if value is set to 'into'", - " or 'maxo'") - if (is.character(missing)) { - if (!(missing %in% c("rowmin_half"))) - stop("if 'missing' is not 'NA' or a numeric it should", - " be one of: \"rowmin_half\".") - } else { - if (!is.numeric(missing) & !is.na(missing)) - stop("'missing' should be either 'NA', a numeric or one", - " of: \"rowmin_half\".") - } - fNames <- basename(fileNames(object)) - nSamples <- seq_along(fNames) - ## Copy all of the objects to avoid costly S4 method calls - - ## improves speed at the cost of higher memory demand. - fts <- chromPeaks(object) - - ## issue #157: replace all values for filled-in peaks with NA - if (!filled) - fts[fts[, "is_filled"] == 1, ] <- NA - grps <- featureDefinitions(object) - ftIdx <- grps$peakidx - ## Match columns - idx_rt <- match("rt", colnames(fts)) - idx_int <- match(intensity, colnames(fts)) - idx_samp <- match("sample", colnames(fts)) - - vals <- matrix(nrow = length(ftIdx), ncol = length(nSamples)) - - if (method == "sum") { - for (i in seq_along(ftIdx)) { - cur_pks <- fts[ftIdx[[i]], c(value, "sample")] - int_sum <- split(cur_pks[, value], cur_pks[, "sample"]) - vals[i, as.numeric(names(int_sum))] <- - unlist(lapply(int_sum, base::sum), use.names = FALSE) - } - } else { - ## Selecting only a single one. - ## Get the indices for the elements. - if (method == "medret") { - medret <- grps$rtmed - for (i in seq_along(ftIdx)) { - gidx <- ftIdx[[i]][ - base::order(base::abs(fts[ftIdx[[i]], - idx_rt] - medret[i]))] - vals[i, ] <- gidx[ - base::match(nSamples, fts[gidx, idx_samp])] - } - } - if (method == "maxint") { - for (i in seq_along(ftIdx)) { - gidx <- ftIdx[[i]][ - base::order(fts[ftIdx[[i]], idx_int], - decreasing = TRUE)] - vals[i, ] <- gidx[base::match(nSamples, - fts[gidx, idx_samp])] - } - } - if (value != "index") { - if (!any(colnames(fts) == value)) - stop("Column '", value, "' not present in the ", - "chromatographic peaks matrix!") - vals <- fts[vals, value] - dim(vals) <- c(length(ftIdx), length(nSamples)) - } - } - - if (value != "index") { - if (is.numeric(missing)) { - vals[is.na(vals)] <- missing - } - if (!is.na(missing) & missing == "rowmin_half") { - for (i in seq_len(nrow(vals))) { - nas <- is.na(vals[i, ]) - if (any(nas)) - vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2 - } - } - } - - colnames(vals) <- fNames - rownames(vals) <- rownames(grps) - vals +setMethod("featureValues", "XCMSnExp", function(object, method = c("medret", + "maxint", + "sum"), + value = "into", + intensity = "into", + filled = TRUE, missing = NA, + msLevel = integer()) { + ## Input argument checkings + if (!hasFeatures(object, msLevel = msLevel)) + stop("No feature definitions for MS level(s) ", msLevel, + " present. Call 'groupChromPeaks' first.") + method <- match.arg(method) + if (method == "sum" & !(value %in% c("into", "maxo"))) + stop("method 'sum' is only allowed if value is set to 'into'", + " or 'maxo'") + if (is.character(missing)) { + if (!(missing %in% c("rowmin_half"))) + stop("if 'missing' is not 'NA' or a numeric it should", + " be one of: \"rowmin_half\".") + } else { + if (!is.numeric(missing) & !is.na(missing)) + stop("'missing' should be either 'NA', a numeric or one", + " of: \"rowmin_half\".") + } + fNames <- basename(fileNames(object)) + pks <- chromPeaks(object) + ## issue #157: replace all values for filled-in peaks with NA + if (!filled) + pks[chromPeakData(object)$is_filled, ] <- NA + .feature_values( + pks = pks, fts = featureDefinitions(object, msLevel = msLevel), + method = method, value = value, intensity = intensity, + colnames = fNames, missing = missing) }) + +#' Internal function to extract feature values based on featureDefinitions +#' `fts` and chromPeaks `pks`. +#' +#' @author Johannes Rainer +#' +#' @noRd +.feature_values <- function(pks, fts, method, value = "into", + intensity = "into", colnames, + missing = NA) { + ftIdx <- fts$peakidx + ## Match columns + idx_rt <- match("rt", colnames(pks)) + idx_int <- match(intensity, colnames(pks)) + idx_samp <- match("sample", colnames(pks)) + vals <- matrix(nrow = length(ftIdx), ncol = length(colnames)) + nSamples <- seq_along(colnames) + if (method == "sum") { + for (i in seq_along(ftIdx)) { + cur_pks <- pks[ftIdx[[i]], c(value, "sample"), drop=FALSE] + int_sum <- split(cur_pks[, value], + as.factor(as.integer(cur_pks[, "sample"]))) + vals[i, as.numeric(names(int_sum))] <- + unlist(lapply(int_sum, base::sum), use.names = FALSE) + } + } else { + if (method == "medret") { + medret <- fts$rtmed + for (i in seq_along(ftIdx)) { + gidx <- ftIdx[[i]][ + base::order(base::abs(pks[ftIdx[[i]], + idx_rt] - medret[i]))] + vals[i, ] <- gidx[ + base::match(nSamples, pks[gidx, idx_samp])] + } + } + if (method == "maxint") { + for (i in seq_along(ftIdx)) { + gidx <- ftIdx[[i]][ + base::order(pks[ftIdx[[i]], idx_int], + decreasing = TRUE)] + vals[i, ] <- gidx[base::match(nSamples, + pks[gidx, idx_samp])] + } + } + if (value != "index") { + if (!any(colnames(pks) == value)) + stop("Column '", value, "' not present in the ", + "chromatographic peaks matrix!") + vals <- pks[vals, value] + dim(vals) <- c(length(ftIdx), length(nSamples)) + } + } + if (value != "index") { + if (is.numeric(missing)) { + vals[is.na(vals)] <- missing + } + if (!is.na(missing) & missing == "rowmin_half") { + for (i in seq_len(nrow(vals))) { + nas <- is.na(vals[i, ]) + if (any(nas)) + vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2 + } + } + } + colnames(vals) <- colnames + rownames(vals) <- rownames(fts) + vals +} + ## #' @rdname XCMSnExp-peak-grouping-results ## setMethod("groupval", ## signature(object = "XCMSnExp"), @@ -2162,150 +2266,144 @@ setMethod("featureValues", #' @aliases chromatogram -#' +#' #' @title Extracting chromatograms #' #' @description #' -#' \code{chromatogram}: the method allows to extract -#' chromatograms from \code{\link{OnDiskMSnExp}} and -#' \code{\link{XCMSnExp}} objects. See also the -#' \code{\link{chromatogram}} implementation for -#' \code{\link{OnDiskMSnExp}} in the \code{MSnbase} package. +#' `chromatogram`: extract chromatographic data (such as an extracted ion +#' chromatogram, a base peak chromatogram or total ion chromatogram) from +#' an [OnDiskMSnExp] or [XCMSnExp] objects. See also the help page of the +#' `chromatogram` function in the `MSnbase` package. #' #' @details #' -#' Arguments \code{rt} and \code{mz} allow to specify the MS -#' data slice from which the chromatogram should be extracted. -#' The parameter \code{aggregationSum} allows to specify the function to be -#' used to aggregate the intensities across the mz range for the same -#' retention time. Setting \code{aggregationFun = "sum"} would e.g. allow -#' to calculate the \emph{total ion chromatogram} (TIC), -#' \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). -#' The length of the extracted \code{\link{Chromatogram}} object, -#' i.e. the number of available data points, corresponds to the number of -#' scans/spectra measured in the specified retention time range. If in a -#' specific scan (for a give retention time) no signal was measured in the -#' specified mz range, a \code{NA_real_} is reported as intensity for the -#' retention time (see Notes for more information). This can be changed -#' using the \code{missing} parameter. +#' Arguments `rt` and `mz` allow to specify the MS data slice (i.e. the m/z +#' range and retention time window) from which the chromatogram should be +#' extracted. These parameters can be either a `numeric` of length 2 with the +#' lower and upper limit, or a `matrix` with two columns with the lower and +#' upper limits to extract multiple EICs at once. +#' The parameter `aggregationSum` allows to specify the function to be +#' used to aggregate the intensities across the m/z range for the same +#' retention time. Setting `aggregationFun = "sum"` would e.g. allow +#' to calculate the **total ion chromatogram** (TIC), +#' `aggregationFun = "max"` the **base peak chromatogram** (BPC). +#' +#' If for a given retention time no intensity is measured in that spectrum a +#' `NA` intensity value is returned by default. This can be changed with the +#' parameter `missing`, setting `missing = 0` would result in a `0` intensity +#' being returned in these cases. #' #' @note #' -#' \code{\link{Chromatogram}} objects extracted with -#' \code{chromatogram} -#' contain \code{NA_real_} values if, for a given retention time, no -#' signal was measured in the specified mz range. If no spectrum/scan is -#' present in the defined retention time window a \code{Chromatogram} object -#' of length 0 is returned. -#' -#' For \code{\link{XCMSnExp}} objects, if adjusted retention times are -#' available, the \code{chromatogram} method will by default report +#' For [XCMSnExp] objects, if adjusted retention times are +#' available, the `chromatogram` method will by default report #' and use these (for the subsetting based on the provided parameter -#' \code{rt}). This can be overwritten with the parameter -#' \code{adjustedRtime}. -#' -#' @param object Either a \code{\link{OnDiskMSnExp}} or -#' \code{\link{XCMSnExp}} object from which the chromatograms should be -#' extracted. -#' -#' @param rt \code{numeric(2)} or two-column \code{matrix} defining the lower +#' `rt`). This can be changed by setting `adjustedRtime = FALSE`. +#' +#' @param object Either a [OnDiskMSnExp] or [XCMSnExp] object from which the +#' chromatograms should be extracted. +#' +#' @param rt `numeric(2)` or two-column `matrix` defining the lower #' and upper boundary for the retention time range(s). If not specified, #' the full retention time range of the original data will be used. -#' It is also possible to submit a \code{numeric(1)} in which case -#' \code{range} is called on it to transform it to a \code{numeric(2)}. #' -#' @param mz \code{numeric(2)} or two-column \code{matrix} defining the lower +#' @param mz `numeric(2)` or two-column `matrix` defining the lower #' and upper mz value for the MS data slice(s). If not specified, the #' chromatograms will be calculated on the full mz range. -#' It is also possible to submit a \code{numeric(1)} in which case -#' \code{range} is called on it to transform it to a \code{numeric(2)}. #' -#' @param adjustedRtime For \code{chromatogram,XCMSnExp}: whether the -#' adjusted (\code{adjustedRtime = TRUE}) or raw retention times -#' (\code{adjustedRtime = FALSE}) should be used for filtering and returned -#' in the resulting \code{\link{Chromatogram}} object. Adjusted +#' @param adjustedRtime For `chromatogram,XCMSnExp`: whether the +#' adjusted (`adjustedRtime = TRUE`) or raw retention times +#' (`adjustedRtime = FALSE`) should be used for filtering and returned +#' in the resulting [MChromatograms] object. Adjusted #' retention times are used by default if available. #' -#' @param aggregationFun \code{character} specifying the function to be used to +#' @param aggregationFun `character(1)` specifying the function to be used to #' aggregate intensity values across the mz value range for the same -#' retention time. Allowed values are \code{"sum"}, \code{"max"}, -#' \code{"mean"} and \code{"min"}. +#' retention time. Allowed values are `"sum"` (the default), `"max"`, +#' `"mean"` and `"min"`. #' -#' @param missing \code{numeric(1)} allowing to specify the intensity value to +#' @param missing `numeric(1)` allowing to specify the intensity value to #' be used if for a given retention time no signal was measured within the -#' mz range of the corresponding scan. Defaults to \code{NA_real_} (see also -#' Details and Notes sections below). Use \code{missing = 0} to resemble the -#' behaviour of the \code{getEIC} from the \code{old} user interface. +#' mz range of the corresponding scan. Defaults to `NA_real_` (see also +#' Details and Notes sections below). Use `missing = 0` to resemble the +#' behaviour of the `getEIC` from the *old* user interface. +#' +#' @param msLevel `integer(1)` specifying the MS level from which the +#' chromatogram should be extracted. Defaults to `msLevel = 1L`. #' -#' @param msLevel \code{integer} specifying the MS level from which the -#' chromatogram should be extracted. Defaults to \code{msLevel = 1L}. -#' #' @param BPPARAM Parallelisation backend to be used, which will #' depend on the architecture. Default is -#' \code{BiocParallel::bparam()}. +#' `BiocParallel::bparam()`. +#' +#' @param filled `logical(1)` whether filled-in peaks should also be +#' returned. Defaults to `filled = FALSE`, i.e. returns only detected +#' chromatographic peaks in the result object. +#' +#' @param include `character(1)` defining which chromatographic peaks should be +#' returned. Supported are `include = "apex_within"` (the default) which +#' returns chromatographic peaks that have their apex within the `mz` `rt` +#' range, `include = "any"` to return all chromatographic peaks which +#' m/z and rt ranges overlap the `mz` and `rt` or `include = "none"` to +#' not include any chromatographic peaks. #' #' @return #' -#' \code{chromatogram} returns a \code{\link{Chromatograms}} object with +#' `chromatogram` returns a [XChromatograms] object with #' the number of columns corresponding to the number of files in -#' \code{object} and number of rows the number of specified ranges (i.e. -#' number of rows of matrices provided with arguments \code{mz} and/or -#' \code{rt}). -#' +#' `object` and number of rows the number of specified ranges (i.e. +#' number of rows of matrices provided with arguments `mz` and/or +#' `rt`). All chromatographic peaks with their apex position within the +#' m/z and retention time range are also retained as well as all feature +#' definitions for these peaks. +#' #' @author Johannes Rainer #' -#' @seealso \code{\link{XCMSnExp}} for the data object. -#' \code{\link{Chromatogram}} for the object representing -#' chromatographic data. +#' @seealso [XCMSnExp] for the data object. +#' [Chromatogram] for the object representing chromatographic data. #' -#' \code{\link{Chromatograms}} for the object allowing to arrange -#' multiple \code{Chromatogram} objects. +#' [XChromatograms] for the object allowing to arrange +#' multiple [XChromatogram] objects. #' -#' \code{\link{plot}} to plot a \code{Chromatogram} or -#' \code{Chromatograms} objects. +#' [plot] to plot a [XChromatogram] or [MChromatograms] objects. #' -#' \code{\link{as}} (\code{as(x, "data.frame")}) in \code{MSnbase} -#' for a method to extract the MS data as \code{data.frame}. +#' `as` (`as(x, "data.frame")`) in `MSnbase` for a method to extract +#' the MS data as `data.frame`. #' #' @export -#' +#' +#' @md +#' #' @rdname chromatogram-method #' #' @examples -#' ## Read some files from the faahKO package. -#' library(xcms) -#' library(faahKO) -#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) #' -#' od <- readMSData(faahko_3_files, mode = "onDisk") +#' ## Load a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' ## Extract the ion chromatogram for one chromatographic peak in the data. -#' chrs <- chromatogram(od, rt = c(2700, 2900), mz = 335) +#' chrs <- chromatogram(faahko_sub, rt = c(2700, 2900), mz = 335) #' #' chrs -#' -#' ## Plot the chromatogram -#' plot(rtime(chrs[1, 2]), intensity(chrs[1, 2]), type = "l", xlab = "rtime", -#' ylab = "intensity", col = "000080") -#' for(i in c(1, 3)) { -#' points(rtime(chrs[1, i]), intensity(chrs[1, i]), type = "l", -#' col = "00000080") -#' } -#' -#' ## Plot the chromatogram using the dedicated plot method. +#' +#' ## Identified chromatographic peaks +#' chromPeaks(chrs) +#' +#' ## Plot the chromatogram #' plot(chrs) #' #' ## Extract chromatograms for multiple ranges. #' mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) #' rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) -#' chrs <- chromatogram(od, mz = mzr, rt = rtr) +#' chrs <- chromatogram(faahko_sub, mz = mzr, rt = rtr) +#' +#' chromPeaks(chrs) #' -#' chrs -#' -#' ## Plot the extracted chromatograms #' plot(chrs) #' #' ## Get access to all chromatograms for the second mz/rt range @@ -2313,49 +2411,134 @@ setMethod("featureValues", #' #' ## Plot just that one #' plot(chrs[1, , drop = FALSE]) -setMethod("chromatogram", - signature(object = "XCMSnExp"), - function(object, rt, mz, - aggregationFun = "sum", missing = NA_real_, - msLevel = 1L, BPPARAM = bpparam(), - adjustedRtime = hasAdjustedRtime(object)) { - ## Coerce to OnDiskMSnExp. - if (adjustedRtime) - adj_rt <- rtime(object, adjusted = TRUE) - object <- as(object, "OnDiskMSnExp") - if (adjustedRtime) { - ## Replace the original rtime with adjusted ones... - object@featureData$retentionTime <- adj_rt - } - MSnbase::chromatogram(object, rt = rt, mz = mz, - aggregationFun = aggregationFun, - missing = missing, msLevel = msLevel, - BPPARAM = BPPARAM) - }) +setMethod( + "chromatogram", "XCMSnExp", + function(object, rt, mz, aggregationFun = "sum", missing = NA_real_, + msLevel = 1L, BPPARAM = bpparam(), + adjustedRtime = hasAdjustedRtime(object), filled = FALSE, + include = c("apex_within", "any", "none")) { + include <- match.arg(include) + if (adjustedRtime) + adj_rt <- rtime(object, adjusted = TRUE) + object_od <- as(object, "OnDiskMSnExp") + fcs <- c("fileIdx", "spIdx", "seqNum", "acquisitionNum", "msLevel", + "polarity", "retentionTime", "precursorScanNum") + fcs <- intersect(fcs, colnames(.fdata(object))) + object_od <- selectFeatureData(object_od, fcol = fcs) + if (adjustedRtime) + object_od@featureData$retentionTime <- adj_rt + res <- MSnbase::chromatogram(object_od, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missing = missing, msLevel = msLevel, + BPPARAM = BPPARAM) + if (!hasChromPeaks(object) | include == "none") + return(res) + ## Process peaks + lvls <- 1:length(fileNames(object)) + if (missing(rt)) + rt <- c(-Inf, Inf) + if (missing(mz)) + mz <- c(-Inf, Inf) + if (is.matrix(rt) | is.matrix(mz)) { + ## Ensure rt and mz are aligned. + if (!is.matrix(rt)) + rt <- matrix(rt, ncol = 2) + if (!is.matrix(mz)) + mz <- matrix(mz, ncol = 2) + if (nrow(rt) == 1) + rt <- matrix(rep(rt, nrow(mz)), ncol = 2, byrow = TRUE) + if (nrow(mz) == 1) + mz <- matrix(rep(mz, nrow(rt)), ncol = 2, byrow = TRUE) + pk_list <- vector("list", nrow(mz)) + pkd_list <- vector("list", nrow(mz)) + for (i in 1:nrow(mz)) { + pks <- chromPeaks(object, rt = rt[i, ], mz = mz[i, ], + type = include) + pkd <- chromPeakData(object)[rownames(pks), , drop = FALSE] + if (!filled) { + pks <- pks[!pkd$is_filled, , drop = FALSE] + pkd <- extractROWS(pkd, which(!pkd$is_filled)) + } + smpls <- factor(pks[, "sample"], levels = lvls) + pk_list[[i]] <- split.data.frame(pks, smpls) + pkd_list[[i]] <- split.data.frame(pkd, smpls) + } + pks <- do.call(rbind, pk_list) + pks <- pks[seq_along(pks)] + pkd <- do.call(rbind, pkd_list) + pkd <- pkd[seq_along(pkd)] + } else { + pks <- chromPeaks(object, rt = rt, mz = mz, + type = include) + pkd <- chromPeakData(object)[rownames(pks), , drop = FALSE] + if (!filled) { + pks <- pks[!pkd$is_filled, , drop = FALSE] + pkd <- extractROWS(pkd, which(!pkd$is_filled)) + } + smpls <- factor(pks[, "sample"], levels = lvls) + pks <- split.data.frame(pks, smpls) + pkd <- split.data.frame(pkd, smpls) + } + res <- as(res, "XChromatograms") + res@.Data <- matrix( + mapply(unlist(res), pks, pkd, FUN = function(chr, pk, pd) { + chr@chromPeaks <- pk + chr@chromPeakData <- pd + chr + }), nrow = nrow(res), dimnames = dimnames(res)) + res@.processHistory <- object@.processHistory + if (hasFeatures(object)) { + pks_sub <- chromPeaks(res) + ## Loop through each EIC "row" to ensure all features in + ## that EIC are retained. + fts <- lapply(seq_len(nrow(res)), function(r) { + fdev <- featureDefinitions(object, mz = mz(res)[r, ], + rt = rt) + if (nrow(fdev)) { + fdev$row <- r + .subset_features_on_chrom_peaks( + fdev, chromPeaks(object), pks_sub) + } else DataFrame() + }) + res@featureDefinitions <- do.call(rbind, fts) + } + validObject(res) + res + }) #' @rdname XCMSnExp-class #' +#' @aliases faahko_sub +#' #' @description #' #' \code{findChromPeaks} performs chromatographic peak detection #' on the provided \code{XCMSnExp} objects. For more details see the method -#' for \code{\linkS4class{XCMSnExp}}. Note that the \code{findChromPeaks} -#' method for \code{XCMSnExp} objects removes previously identified -#' chromatographic peaks and aligned features. Previous alignment (retention +#' for \code{\linkS4class{XCMSnExp}}. +#' Note that by default (with parameter \code{add = FALSE}) previous peak +#' detection results are removed. Use \code{add = TRUE} to perform a second +#' round of peak detection and add the newly identified peaks to the previous +#' peak detection results. Correspondence results (features) are always removed +#' prior to peak detection. Previous alignment (retention #' time adjustment) results are kept, i.e. chromatographic peak detection #' is performed using adjusted retention times if the data was first -#' aligned using e.g. obiwarp (\code{\link{adjustRtime-obiwarp}}). -#' +#' aligned using e.g. obiwarp (\code{\link{adjustRtime-obiwarp}}). +#' #' @param param A \code{\link{CentWaveParam}}, \code{\link{MatchedFilterParam}}, #' \code{\link{MassifquantParam}}, \code{\link{MSWParam}} or #' \code{\link{CentWavePredIsoParam}} object with the settings for the #' chromatographic peak detection algorithm. -#' +#' +#' @param add For \code{findChromPeaks}: if newly identified chromatographic +#' peaks should be added to the peak matrix with the already identified +#' chromatographic peaks. By default (\code{add = FALSE}) previous +#' peak detection results will be removed. +#' #' @inheritParams findChromPeaks-centWave setMethod("findChromPeaks", signature(object = "XCMSnExp", param = "Param"), function(object, param, BPPARAM = bpparam(), - return.type = "XCMSnExp", msLevel = 1L) { + return.type = "XCMSnExp", msLevel = 1L, add = FALSE) { ## Remove previous correspondence results. if (hasFeatures(object)) { message("Removed feature definitions.") @@ -2364,12 +2547,17 @@ setMethod("findChromPeaks", keepAdjustedRtime = hasAdjustedRtime(object)) } ## Remove previous chromatographic peaks. - if (hasChromPeaks(object)) { + has_peaks <- hasChromPeaks(object) + if (has_peaks & !add) { message("Removed previously identified chromatographic peaks.") object <- dropChromPeaks( object, keepAdjustedRtime = hasAdjustedRtime(object)) } + if (add && has_peaks) { + old_cp <- chromPeaks(object) + old_cpd <- chromPeakData(object) + } meth <- selectMethod("findChromPeaks", signature = c(object = "OnDiskMSnExp", param = class(param))) @@ -2378,134 +2566,189 @@ setMethod("findChromPeaks", BPPARAM = BPPARAM, return.type = return.type, msLevel = msLevel)) + if (add && has_peaks) { + old_cp <- rbindFill(old_cp, chromPeaks(object)) + old_cpd <- rbindFill(old_cpd, chromPeakData(object)) + old_hist <- object@.processHistory + chromPeaks(object) <- old_cp + rownames(old_cpd) <- rownames(chromPeaks(object)) + chromPeakData(object) <- old_cpd + object@.processHistory <- old_hist + } ## object@.processHistory <- list() - if (validObject(object)) - object + validObject(object) + object }) #' @aliases fillChromPeaks -#' +#' #' @title Integrate areas of missing peaks #' #' @description #' #' Integrate signal in the mz-rt area of a feature (chromatographic #' peak group) for samples in which no chromatographic peak for this -#' feature was identified and add it to the \code{chromPeaks}. Such peaks -#' will have a value of \code{1} in the \code{"is_filled"} column of the -#' \code{\link{chromPeaks}} matrix of the object. +#' feature was identified and add it to the [chromPeaks()] matrix. Such +#' *filled-in* peaks are indicated with a `TRUE` in column `"is_filled"` in +#' the result object's [chromPeakData()] data frame. +#' +#' Two different gap-filling approaches are implemented: +#' +#' - `param = FillChromPeaksParam()`: the default of the original `xcms` code. +#' Signal is integrated from the m/z and retention time range as defined in +#' the [featureDefinitions()] data frame, i.e. from the `"rtmin"`, `"rtmax"`, +#' `"mzmin"` and `"mzmax"`. See details below for more information and +#' settings for this method. +#' +#' - `param = ChromPeakAreaParam()`: the area from which the signal for a +#' feature is integrated is defined based on the feature's chromatographic +#' peak areas. The m/z range is by default defined as the the lower quartile +#' of chromatographic peaks' `"mzmin"` value to the upper quartile of the +#' chromatographic peaks' `"mzmax"` values. The retention time range for the +#' area is defined analogously. Alternatively, by setting `mzmin = median`, +#' `mzmax = median`, `rtmin = median` and `rtmax = median` in +#' `ChromPeakAreaParam`, the median `"mzmin"`, `"mzmax"`, `"rtmin"` and +#' `"rtmax"` values from all detected chromatographic peaks of a feature +#' would be used instead. +#' In contrast to the `FillChromPeaksParam` approach this method uses the +#' actual identified chromatographic peaks of a feature to define the area +#' from which the signal should be integrated. #' #' @details #' #' After correspondence (i.e. grouping of chromatographic peaks across #' samples) there will always be features (peak groups) that do not include -#' peaks from every sample. The \code{fillChromPeaks} method defines +#' peaks from every sample. The `fillChromPeaks` method defines #' intensity values for such features in the missing samples by integrating -#' the signal in the mz-rt region of the feature. The mz-rt area is defined -#' by the median mz and rt start and end points of the other detected -#' chromatographic peaks for a given feature. Various parameters allow to -#' increase this area, either by a constant value (\code{fixedMz} and -#' \code{fixedRt}) or by a feature-relative amount (\code{expandMz} and -#' \code{expandRt}). -#' +#' the signal in the mz-rt region of the feature. Two different approaches +#' to define this region are available: with `ChromPeakAreaParam` the region +#' is defined based on the detected **chromatographic peaks** of a feature, +#' while with `FillChromPeaksParam` the region is defined based on the m/z and +#' retention times of the **feature** (which represent the m/z and retentention +#' times of the apex position of the associated chromatographic peaks). For the +#' latter approach various parameters are available to increase the area from +#' which signal is to be integrated, either by a constant value (`fixedMz` and +#' `fixedRt`) or by a feature-relative amount (`expandMz` and `expandRt`). +#' #' Adjusted retention times will be used if available. #' #' Based on the peak finding algorithm that was used to identify the -#' (chromatographic) peaks different internal functions are employed to +#' (chromatographic) peaks, different internal functions are used to #' guarantee that the integrated peak signal matches as much as possible #' the peak signal integration used during the peak detection. For peaks -#' identified with the \code{\link{matchedFilter}} method, signal -#' integration is performed on the \emph{profile matrix} generated with +#' identified with the [matchedFilter()] method, signal +#' integration is performed on the *profile matrix* generated with #' the same settings used also during peak finding (using the same -#' \code{bin} size for example). For direct injection data and peaks -#' identified with the \code{\link{MSW}} algorithm signal is integrated +#' `bin` size for example). For direct injection data and peaks +#' identified with the `MSW` algorithm signal is integrated #' only along the mz dimension. For all other methods the complete (raw) -#' signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, -#' \code{"rtmin"} and \code{"rtmax"} is used. -#' +#' signal within the area is used. +#' #' @note #' -#' The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and -#' \code{"rtmax"} for the filled peaks represents the actual MS area from +#' The reported `"mzmin"`, `"mzmax"`, `"rtmin"` and +#' `"rtmax"` for the filled peaks represents the actual MS area from #' which the signal was integrated. #' Note that no peak is filled in if no signal was present in a file/sample -#' in the respective mz-rt area. These samples will still show a \code{NA} -#' in the matrix returned by the \code{\link{featureValues}} method. This -#' is in contrast to the \code{\link{fillPeaks.chrom}} method that returned -#' an \code{"into"} and \code{"maxo"} of \code{0} for such peak areas. -#' Growing the mz-rt area using the \code{expandMz} and \code{expandRt} -#' might help to reduce the number of missing peak signals after filling. -#' -#' @param object \code{XCMSnExp} object with identified and grouped -#' chromatographic peaks. +#' in the respective mz-rt area. These samples will still show a `NA` +#' in the matrix returned by the [featureValues()] method. +#' +#' @param object `XCMSnExp` object with identified and grouped chromatographic +#' peaks. +#' +#' @param param `FillChromPeaksParam` or `ChromPeakAreaParam` object +#' defining which approach should be used (see details section). +#' +#' @param mzmin `function` to be applied to values in the `"mzmin"` column of all +#' chromatographic peaks of a feature to define the lower m/z value of the +#' area from which signal for the feature should be integrated. Defaults to +#' `mzmin = function(z) quantile(z, probs = 0.25)` hence using the 25% +#' quantile of all values. +#' +#' @param mzmax `function` to be applied to values in the `"mzmax"` column of all +#' chromatographic peaks of a feature to define the upper m/z value of the +#' area from which signal for the feature should be integrated. Defaults to +#' `mzmax = function(z) quantile(z, probs = 0.75)` hence using the 75% +#' quantile of all values. +#' +#' @param rtmin `function` to be applied to values in the `"rtmin"` column of all +#' chromatographic peaks of a feature to define the lower rt value of the +#' area from which signal for the feature should be integrated. Defaults to +#' `rtmin = function(z) quantile(z, probs = 0.25)` hence using the 25% +#' quantile of all values. +#' +#' @param rtmax `function` to be applied to values in the `"rtmax"` column of all +#' chromatographic peaks of a feature to define the upper rt value of the +#' area from which signal for the feature should be integrated. Defaults to +#' `rtmax = function(z) quantile(z, probs = 0.75)` hence using the 75% +#' quantile of all values. +#' +#' @param expandMz for `FillChromPeaksParam`: `numeric(1)` defining the value +#' by which the mz width of peaks should be expanded. Each peak is expanded +#' in mz direction by `expandMz *` their original m/z width. A value of +#' `0` means no expansion, a value of `1` grows each peak by `1 *` the m/z +#' width of the peak resulting in peaks with twice their original size in +#' m/z direction (expansion by half m/z width to both sides). +#' +#' @param expandRt for `FillChromPeaksParam`: `numeric(1)`, same as `expandMz` +#' but for the retention time width. +#' +#' @param ppm for `FillChromPeaksParam`: `numeric(1)` optionally specifying a +#' *ppm* by which the m/z width of the peak region should be expanded. For +#' peaks with an m/z width smaller than `mean(c(mzmin, mzmax)) * ppm / 1e6`, +#' the `mzmin` will be replaced by +#' `mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)` +#' `mzmax` by +#' `mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)`. +#' This is applied before eventually expanding the m/z width using the +#' `expandMz` parameter. +#' +#' @param fixedMz for `FillChromPeaksParam`: `numeric(1)` defining a constant +#' factor by which the m/z width of each feature is to be expanded. +#' The m/z width is expanded on both sides by `fixedMz` (i.e. `fixedMz` +#' is subtracted from the lower m/z and added to the upper m/z). This +#' expansion is applied *after* `expandMz` and `ppm`. +#' +#' @param fixedRt for `FillChromPeaksParam`: `numeric(1)` defining a constant +#' factor by which the retention time width of each factor is to be +#' expanded. The rt width is expanded on both sides by `fixedRt` (i.e. +#' `fixedRt` is subtracted from the lower rt and added to the upper rt). +#' This expansion is applied *after* `expandRt`. +#' +#' @param msLevel `integer(1)` defining the MS level on which peak filling +#' should be performed (defaults to `msLevel = 1L`). Only peak filling +#' on one MS level at a time is supported, to fill in peaks for MS level 1 +#' and 2 run first using `msLevel = 1` and then (on the returned +#' result object) again with `msLevel = 2`. #' -#' @param param A \code{FillChromPeaksParam} object with all settings. -#' -#' @param expandMz \code{numeric(1)} defining the value by which the mz width of -#' peaks should be expanded. Each peak is expanded in mz direction by -#' \code{expandMz *} their original mz width. A value of \code{0} means no -#' expansion, a value of \code{1} grows each peak by 1 * the mz width of -#' the peak resulting in peakswith twice their original size in mz -#' direction (expansion by half mz width to both sides). -#' -#' @param expandRt \code{numeric(1)}, same as \code{expandRt} but for the -#' retention time width. -#' -#' @param ppm \code{numeric(1)} optionally specifying a \emph{ppm} by which the -#' mz width of the peak region should be expanded. For peaks with an mz -#' width smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the -#' \code{mzmin} will be replaced by -#' \code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} -#' and \code{mzmax} by -#' \code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. -#' This is applied before eventually expanding the mz width using the -#' \code{expandMz} parameter. -#' -#' @param fixedMz \code{numeric(1)} defining a constant factor by which the -#' m/z width of each feature is to be expanded. The m/z width is expanded -#' on both sides by \code{fixedMz} (i.e. \code{fixedMz} is subtracted -#' from the lower m/z and added to the upper m/z). This expansion is -#' applied \emph{after} \code{expandMz} and \code{ppm}. -#' -#' @param fixedRt \code{numeric(1)} defining a constant factor by which the -#' retention time width of each factor is to be expanded. The rt width is -#' expanded on both sides by \code{fixedRt} (i.e. \code{fixedRt} is -#' subtracted from the lower rt and added to the upper rt). This -#' expansion is applied \emph{after} \code{expandRt}. -#' #' @param BPPARAM Parallel processing settings. -#' +#' #' @return #' -#' A \code{\link{XCMSnExp}} object with previously missing -#' chromatographic peaks for features filled into its \code{chromPeaks} -#' matrix. +#' A `XCMSnExp` object with previously missing chromatographic peaks for +#' features filled into its [chromPeaks()] matrix. #' #' @rdname fillChromPeaks -#' +#' #' @author Johannes Rainer -#' -#' @seealso \code{\link{groupChromPeaks}} for methods to perform the -#' correspondence. -#' \code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +#' +#' @seealso [groupChromPeaks()] for methods to perform the correspondence. +#' +#' @seealso [featureArea] for the function to define the m/z-retention time +#' region for each feature. +#' +#' @md #' #' @examples -#' -#' ## Perform the peak detection using centWave on some of the files from the -#' ## faahKO package. Files are read using the readMSData from the MSnbase -#' ## package -#' library(faahKO) -#' library(xcms) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") -#' -#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to -#' ## speed up the execution of the example - in a real use case the default -#' ## value should be used, or it should be set to a reasonable value. -#' cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 40) -#' -#' res <- findChromPeaks(raw_data, param = cwp) +#' +#' ## Load a test data set with identified chromatographic peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' res <- faahko_sub +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) #' #' ## Perform the correspondence. We assign all samples to the same group. #' res <- groupChromPeaks(res, @@ -2514,49 +2757,42 @@ setMethod("findChromPeaks", #' ## For how many features do we lack an integrated peak signal? #' sum(is.na(featureValues(res))) #' -#' ## Filling missing peak data using default settings. -#' res <- fillChromPeaks(res) +#' ## Filling missing peak data using the peak area from identified +#' ## chromatographic peaks. +#' res <- fillChromPeaks(res, param = ChromPeakAreaParam()) #' -#' ## Get the peaks that have been filled in: -#' fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] -#' head(fp) -#' -#' ## Did we get a signal for all missing peaks? +#' ## How many missing values do we have after peak filling? #' sum(is.na(featureValues(res))) #' -#' ## No. +#' ## Get the peaks that have been filled in: +#' fp <- chromPeaks(res)[chromPeakData(res)$is_filled, ] +#' head(fp) #' #' ## Get the process history step along with the parameters used to perform -#' ## The peak filling: +#' ## The peak filling: #' ph <- processHistory(res, type = "Missing peak filling")[[1]] #' ph #' #' ## The parameter class: #' ph@param -#' -#' ## Drop the filled in peaks: -#' res <- dropFilledChromPeaks(res) #' -#' ## Perform the peak filling with modified settings: allow expansion of the -#' ## mz range by a specified ppm and expanding the mz range by mz width/2 -#' prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) -#' res <- fillChromPeaks(res, param = prm) +#' ## It is also possible to remove filled-in peaks: +#' res <- dropFilledChromPeaks(res) #' -#' ## Did we get a signal for all missing peaks? #' sum(is.na(featureValues(res))) -#' -#' ## Still the same missing peaks. setMethod("fillChromPeaks", signature(object = "XCMSnExp", param = "FillChromPeaksParam"), - function(object, param, BPPARAM = bpparam()) { - if (!hasFeatures(object)) - stop("'object' does not provide feature definitions! Please ", - "run 'groupChromPeaks' first.") - ## Don't do that if we have already filled peaks? + function(object, param, msLevel = 1L, BPPARAM = bpparam()) { + if (length(msLevel) != 1) + stop("Can only perform peak filling for one MS level at a time") + if (!hasFeatures(object, msLevel = msLevel)) + stop("No feature definitions for MS level ", msLevel, + " present. Please run 'groupChromPeaks' first.") if (.hasFilledPeaks(object)) message("Filled peaks already present, adding still missing", " peaks.") - + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) startDate <- date() expandMz <- expandMz(param) expandRt <- expandRt(param) @@ -2568,20 +2804,21 @@ setMethod("fillChromPeaks", ## Define or extend the peak area from which the signal should be ## extracted. ## Original code: use the median of the min/max rt and mz per peak. - fdef <- featureDefinitions(object) + fdef <- featureDefinitions(object, msLevel = msLevel) aggFunLow <- median aggFunHigh <- median ## Note: we ensure in the downstream function that the rt range is ## within the rt range. For the mz range it doesn't matter. + tmp_pks <- chromPeaks(object)[, c("rtmin", "rtmax", "mzmin", + "mzmax")] pkArea <- do.call( rbind, lapply( fdef$peakidx, function(z) { - tmp <- chromPeaks(object)[z, c("rtmin", "rtmax", - "mzmin", "mzmax"), - drop = FALSE] - pa <- c(aggFunLow(tmp[, 1]), aggFunHigh(tmp[, 2]), - aggFunLow(tmp[, 3]), aggFunHigh(tmp[, 4])) + pa <- c(aggFunLow(tmp_pks[z, 1]), + aggFunHigh(tmp_pks[z, 2]), + aggFunLow(tmp_pks[z, 3]), + aggFunHigh(tmp_pks[z, 4])) ## Check if we have to apply ppm replacement: if (ppm != 0) { mzmean <- mean(pa[3:4]) @@ -2610,15 +2847,17 @@ setMethod("fillChromPeaks", pa[1] <- pa[1] - fixedRt pa[2] <- pa[2] + fixedRt } - return(pa) + pa } )) + rm(tmp_pks) message(".", appendLF = FALSE) colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") ## Add mzmed column - needed for MSW peak filling. pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, mzmed = as.numeric(fdef$mzmed)) - pkGrpVal <- featureValues(object) + pkGrpVal <- featureValues(object, value = "index", + msLevel = msLevel) message(".", appendLF = FALSE) ## Check if there is anything to fill... if (!any(is.na(rowSums(pkGrpVal)))) { @@ -2629,15 +2868,37 @@ setMethod("fillChromPeaks", ## Split the object by file and define the peaks for which objectL <- vector("list", length(fileNames(object))) pkAreaL <- objectL + ## We need "only" a list of OnDiskMSnExp, one for each file but + ## instead of filtering by file we create small objects to keep + ## memory requirement to a minimum. + req_fcol <- requiredFvarLabels("OnDiskMSnExp") + min_fdata <- .fdata(object)[, req_fcol] + rt_range <- range(pkArea[, c("rtmin", "rtmax")]) + if (hasAdjustedRtime(object)) + min_fdata$retentionTime <- adjustedRtime(object) for (i in 1:length(fileNames(object))) { - suppressMessages( - objectL[[i]] <- filterFile(object, file = i, - keepAdjustedRtime = TRUE) - ) + fd <- min_fdata[min_fdata$fileIdx == i, ] + fd$fileIdx <- 1L + objectL[[i]] <- new( + "OnDiskMSnExp", + processingData = new("MSnProcess", + files = fileNames(object)[i]), + featureData = new("AnnotatedDataFrame", fd), + phenoData = new("NAnnotatedDataFrame", + data.frame(sampleNames = "1")), + experimentData = new("MIAPE", + instrumentManufacturer = "a", + instrumentModel = "a", + ionSource = "a", + analyser = "a", + detectorType = "a")) ## Want to extract intensities only for peaks that were not ## found in a sample. pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE] } + rm(pkGrpVal) + rm(pkArea) + rm(min_fdata) message(" OK\nStart integrating peak areas from original files") ## Get to know what algorithm was used for the peak detection. ## Special cases are MSWParam (no retention time) and @@ -2654,6 +2915,7 @@ setMethod("fillChromPeaks", mzCenterFun <- prm@mzCenterFun } } + cp_colnames <- colnames(chromPeaks(object)) ## Now rename that to the correct function name in xcms. mzCenterFun <- paste("mzCenter", gsub(mzCenterFun, pattern = "mzCenter.", @@ -2669,32 +2931,31 @@ setMethod("fillChromPeaks", res <- bpmapply(FUN = .getMSWPeakData, objectL, pkAreaL, as.list(1:length(objectL)), MoreArgs = list( - cn = colnames(chromPeaks(object))), + cn = cp_colnames), BPPARAM = BPPARAM, SIMPLIFY = FALSE) } else if (findPeakMethod == "matchedFilter") { res <- bpmapply(FUN = .getChromPeakData_matchedFilter, objectL, pkAreaL, as.list(1:length(objectL)), - MoreArgs = list( - cn = colnames(chromPeaks(object)), - param = prm - ), + MoreArgs = list(cn = cp_colnames, + param = prm, + msLevel = msLevel), BPPARAM = BPPARAM, SIMPLIFY = FALSE) } else { res <- bpmapply(FUN = .getChromPeakData, objectL, pkAreaL, as.list(1:length(objectL)), - MoreArgs = list( - cn = colnames(chromPeaks(object)), - mzCenterFun = mzCenterFun), + MoreArgs = list(cn = cp_colnames, + mzCenterFun = mzCenterFun, + msLevel = msLevel), BPPARAM = BPPARAM, SIMPLIFY = FALSE) } - + rm(objectL) + res <- do.call(rbind, res) - if (any(colnames(res) == "is_filled")) - res[, "is_filled"] <- 1 - else - res <- cbind(res, is_filled = 1) ## cbind the group_idx column to track the feature/peak group. - res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"]) + res <- cbind( + res, group_idx = unlist(lapply(pkAreaL, + function(z) z[, "group_idx"]), + use.names = FALSE)) ## Remove those without a signal res <- res[!is.na(res[, "into"]), , drop = FALSE] if (nrow(res) == 0) { @@ -2702,17 +2963,233 @@ setMethod("fillChromPeaks", "peaks! Consider increasing 'expandMz' and 'expandRt'.") return(object) } - + ## Intermediate cleanup of objects. + rm(pkAreaL) + gc() + + ## Get the msFeatureData: + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + object@msFeatureData <- new("MsFeatureData") + incr <- nrow(chromPeaks(newFd)) + for (i in unique(res[, "group_idx"])) { + fdef$peakidx[[i]] <- c(fdef$peakidx[[i]], + (which(res[, "group_idx"] == i) + incr)) + } + ## Combine feature data with those from other MS levels + fdef <- rbind( + fdef, featureDefinitions(newFd)[ + featureDefinitions(newFd)$ms_level != msLevel, , + drop = FALSE]) + if (!any(colnames(fdef) == "ms_level")) + fdef$ms_level <- 1L + else + fdef <- fdef[order(fdef$ms_level), ] + ## Define IDs for the new peaks; include fix for issue #347 + maxId <- max(as.numeric( + sub("M", "", sub("^CP", "", rownames(chromPeaks(newFd)))))) + if (maxId < 1) + stop("chromPeaks matrix lacks rownames; please update ", + "'object' with the 'updateObject' function.") + toId <- maxId + nrow(res) + rownames(res) <- sprintf( + paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"), + (maxId + 1L):toId) + chromPeaks(newFd) <- rbind(chromPeaks(newFd), + res[, -ncol(res)]) + cpd <- extractROWS(chromPeakData(newFd), rep(1L, nrow(res))) + cpd[,] <- NA + cpd$ms_level <- as.integer(msLevel) + cpd$is_filled <- TRUE + if (!any(colnames(chromPeakData(newFd)) == "is_filled")) + chromPeakData(newFd)$is_filled <- FALSE + chromPeakData(newFd) <- rbind(chromPeakData(newFd), cpd) + rownames(chromPeakData(newFd)) <- rownames(chromPeaks(newFd)) + featureDefinitions(newFd) <- fdef + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + ## Add a process history step + ph <- XProcessHistory(param = param, + date. = startDate, + type. = .PROCSTEP.PEAK.FILLING, + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, ph) ## this also validates object. + object + }) + + +#' @rdname fillChromPeaks +setMethod("fillChromPeaks", + signature(object = "XCMSnExp", param = "ChromPeakAreaParam"), + function(object, param, msLevel = 1L, BPPARAM = bpparam()) { + if (length(msLevel) != 1) + stop("Can only perform peak filling for one MS level at a time") + if (!hasFeatures(object, msLevel = msLevel)) + stop("No feature definitions for MS level ", msLevel, + " present. Please run 'groupChromPeaks' first.") + if (.hasFilledPeaks(object)) + message("Filled peaks already present, adding still missing", + " peaks.") + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) + startDate <- date() + message("Defining peak areas for filling-in .", + appendLF = FALSE) + fts_region <- .features_ms_region( + object, mzmin = param@mzmin, mzmax = param@mzmax, + rtmin = param@rtmin, rtmax = param@rtmax, msLevel = msLevel) + fts_region <- cbind(group_idx = seq_len(nrow(fts_region)), + fts_region, + mzmed = featureDefinitions(object)$mzmed) + message(".", appendLF = FALSE) + pk_idx <- featureValues(object, value = "index", + msLevel = msLevel) + message(".", appendLF = FALSE) + ## Check if there is anything to fill... + if (!any(is.na(rowSums(pk_idx)))) { + message("No missing peaks present.") + return(object) + } + ## Split the object by file and define the peaks for which + objectL <- vector("list", length(fileNames(object))) + pkAreaL <- objectL + ## We need "only" a list of OnDiskMSnExp, one for each file but + ## instead of filtering by file we create small objects to keep + ## memory requirement to a minimum. + req_fcol <- requiredFvarLabels("OnDiskMSnExp") + min_fdata <- .fdata(object)[, req_fcol] + if (hasAdjustedRtime(object)) + min_fdata$retentionTime <- adjustedRtime(object) + for (i in 1:length(fileNames(object))) { + fd <- min_fdata[min_fdata$fileIdx == i, ] + fd$fileIdx <- 1L + objectL[[i]] <- new( + "OnDiskMSnExp", + processingData = new("MSnProcess", + files = fileNames(object)[i]), + featureData = new("AnnotatedDataFrame", fd), + phenoData = new("NAnnotatedDataFrame", + data.frame(sampleNames = "1")), + experimentData = new("MIAPE", + instrumentManufacturer = "a", + instrumentModel = "a", + ionSource = "a", + analyser = "a", + detectorType = "a")) + ## Want to extract intensities only for peaks that were not + ## found in a sample. + pkAreaL[[i]] <- fts_region[is.na(pk_idx[, i]), , drop = FALSE] + } + rm(pk_idx) + rm(fts_region) + rm(min_fdata) + message(" OK\nStart integrating peak areas from original files") + ## Get to know what algorithm was used for the peak detection. + ## Special cases are MSWParam (no retention time) and + ## MatchedFilterParam (integrate from profile matrix). + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + findPeakMethod <- "unknown" + mzCenterFun <- "wMean" + if (length(ph)) { + if (is(ph[[1]], "XProcessHistory")) { + prm <- ph[[1]]@param + findPeakMethod <- .param2string(prm) + ## Check if the param class has a mzCenterFun slot + if (.hasSlot(prm, "mzCenterFun")) + mzCenterFun <- prm@mzCenterFun + } + } + cp_colnames <- colnames(chromPeaks(object)) + ## Now rename that to the correct function name in xcms. + mzCenterFun <- paste("mzCenter", + gsub(mzCenterFun, pattern = "mzCenter.", + replacement = "", fixed = TRUE), sep=".") + if (findPeakMethod == "MSW") { + rts <- rtime(object, bySample = TRUE) + ## Ensure that we REALLY have direct injection data. + if (any(lengths(rts) > 1)) + stop("The data is supposed to be direct injection data, ", + "but I got files with more than one spectrum/", + "retention time!") + ## That's not working, because integration uses the rt. + res <- bpmapply(FUN = .getMSWPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = cp_colnames), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } else if (findPeakMethod == "matchedFilter") { + res <- bpmapply(FUN = .getChromPeakData_matchedFilter, + objectL, pkAreaL, as.list(1:length(objectL)), + MoreArgs = list(cn = cp_colnames, + param = prm, + msLevel = msLevel), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } else { + res <- bpmapply(FUN = .getChromPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list(cn = cp_colnames, + mzCenterFun = mzCenterFun, + msLevel = msLevel), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } + rm(objectL) + + res <- do.call(rbind, res) + ## cbind the group_idx column to track the feature/peak group. + res <- cbind( + res, group_idx = unlist(lapply(pkAreaL, + function(z) z[, "group_idx"]), + use.names = FALSE)) + ## Remove those without a signal + res <- res[!is.na(res[, "into"]), , drop = FALSE] + if (nrow(res) == 0) { + warning("Could not integrate any signal for the missing ", + "peaks!") + return(object) + } + ## Intermediate cleanup of objects. + rm(pkAreaL) + ## Get the msFeatureData: newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - incr <- nrow(chromPeaks(object)) + object@msFeatureData <- new("MsFeatureData") + incr <- nrow(chromPeaks(newFd)) + fdef <- featureDefinitions(newFd, msLevel = msLevel) for (i in unique(res[, "group_idx"])) { fdef$peakidx[[i]] <- c(fdef$peakidx[[i]], (which(res[, "group_idx"] == i) + incr)) } - - chromPeaks(newFd) <- rbind(chromPeaks(object), res[, -ncol(res)]) + ## Combine feature data with those from other MS levels + fdef <- rbind(fdef, + extractROWS( + featureDefinitions(newFd), + which(featureDefinitions(newFd)$ms_level != msLevel))) + if (!any(colnames(fdef) == "ms_level")) + fdef$ms_level <- 1L + else + fdef <- extractROWS(fdef, order(fdef$ms_level)) + ## Define IDs for the new peaks; include fix for issue #347 + maxId <- max(as.numeric( + sub("M", "", sub("^CP", "", rownames(chromPeaks(newFd)))))) + if (maxId < 1) + stop("chromPeaks matrix lacks rownames; please update ", + "'object' with the 'updateObject' function.") + toId <- maxId + nrow(res) + rownames(res) <- sprintf( + paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"), + (maxId + 1L):toId) + chromPeaks(newFd) <- rbind(chromPeaks(newFd), + res[, -ncol(res)]) + cpd <- extractROWS(chromPeakData(newFd), rep(1L, nrow(res))) + cpd[,] <- NA + cpd$ms_level <- as.integer(msLevel) + cpd$is_filled <- TRUE + if (!any(colnames(chromPeakData(newFd)) == "is_filled")) + chromPeakData(newFd)$is_filled <- FALSE + chromPeakData(newFd) <- rbind(chromPeakData(newFd), cpd) + rownames(chromPeakData(newFd)) <- rownames(chromPeaks(newFd)) featureDefinitions(newFd) <- fdef lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd @@ -2720,20 +3197,24 @@ setMethod("fillChromPeaks", ph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.FILLING, - fileIndex = 1:length(fileNames(object))) + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) object <- addProcessHistory(object, ph) ## this also validates object. - return(object) + object }) + + #' @rdname fillChromPeaks setMethod( "fillChromPeaks", signature(object = "XCMSnExp", param = "missing"), function(object, param, - BPPARAM = bpparam()) { + BPPARAM = bpparam(), + msLevel = 1L) { fillChromPeaks(object, param = FillChromPeaksParam(), - BPPARAM = BPPARAM) + BPPARAM = BPPARAM, msLevel = msLevel) }) #' @aliases dropFilledChromPeaks @@ -2745,36 +3226,23 @@ setMethod( #' related process history steps. #' #' @rdname XCMSnExp-class -#' +#' #' @seealso \code{\link{fillChromPeaks}} for the method to fill-in eventually #' missing chromatographic peaks for a feature in some samples. setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { if (!.hasFilledPeaks(object)) return(object) - keep_pks <- which(chromPeaks(object)[, "is_filled"] == 0) - newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) - ## Update index in featureDefinitions - fd <- featureDefinitions(newFd) - fd <- split(fd, 1:nrow(fd)) - fdL <- lapply(fd, function(z) { - z$peakidx <- list(z$peakidx[[1]][z$peakidx[[1]] %in% keep_pks]) - return(z) - }) - featureDefinitions(newFd) <- do.call(rbind, fdL) - ## Remove peaks - chromPeaks(newFd) <- chromPeaks(newFd)[keep_pks, , drop = FALSE] - ## newFd <- .filterChromPeaks(object@msFeatureData, idx = keep_pks) - object@msFeatureData <- newFd + keep_pks <- which(!chromPeakData(object)$is_filled) + object@msFeatureData <- .filterChromPeaks(object@msFeatureData, keep_pks) object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) - if (validObject(object)) - return(object) + validObject(object) + object }) #' @aliases extractMsData #' #' @title DEPRECATED: Extract a `data.frame` containing MS data -#' +#' #' @description #' #' **UPDATE**: the `extractMsData` and `plotMsData` functions are deprecated @@ -2784,7 +3252,7 @@ setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { #' adjusted retention times. In such cases it is advisable to use the #' [applyAdjustedRtime()] function prior to filtering. #' -#' +#' #' Extract a `data.frame` of retention time, mz and intensity #' values from each file/sample in the provided rt-mz range (or for the full #' data range if `rt` and `mz` are not defined). @@ -2799,7 +3267,7 @@ setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { #' @param msLevel `integer` defining the MS level(s) to which the data #' should be sub-setted prior to extraction; defaults to #' `msLevel = 1L`. -#' +#' #' @param adjustedRtime (for `extractMsData,XCMSnExp`): `logical(1)` #' specifying if adjusted or raw retention times should be reported. #' Defaults to adjusted retention times, if these are present in @@ -2820,29 +3288,23 @@ setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { #' @author Johannes Rainer #' #' @md -#' +#' #' @examples -#' ## Read some files from the test data package. -#' library(faahKO) -#' library(xcms) -#' library(magrittr) -#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -#' full.names = TRUE) -#' raw_data <- readMSData(fls[1:2], mode = "onDisk") -#' -#' ## Extract the full data as a data.frame -#' ms_all <- as(raw_data, "data.frame") -#' head(ms_all) -#' nrow(ms_all) #' -#' ## Read the full MS data for a defined mz-rt region. -#' res <- raw_data %>% -#' filterRt(rt = c(2700, 2900)) %>% -#' filterMz(mz = c(300, 320)) %>% -#' as("data.frame") +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") #' -#' head(res) -#' nrow(res) +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Extract the full MS data for a certain retention time range +#' ## as a data.frame +#' tmp <- filterRt(faahko_sub, rt = c(2800, 2900)) +#' ms_all <- as(tmp, "data.frame") +#' head(ms_all) +#' nrow(ms_all) setMethod("extractMsData", "XCMSnExp", function(object, rt, mz, msLevel = 1L, adjustedRtime = hasAdjustedRtime(object)){ @@ -2863,15 +3325,15 @@ setMethod("extractMsData", "XCMSnExp", #' @rdname calibrate-calibrant-mass #' #' @param object An [XCMSnExp] object. -#' +#' #' @param param The `CalibrantMassParam` object with the calibration settings. -#' +#' #' @return #' #' The `calibrate` method returns an [XCMSnExp] object with the #' chromatographic peaks being calibrated. Note that **only** the detected #' peaks are calibrated, but not the individual mz values in each spectrum. -#' +#' #' @md setMethod("calibrate", "XCMSnExp", function(object, param) { if (missing(param)) { @@ -2915,7 +3377,7 @@ setMethod("calibrate", "XCMSnExp", function(object, param) { method <- "shift" } } - + prms <- estimate(close_pks, method) adj_models[[i]] <- prms a <- prms[1] # slope @@ -2940,8 +3402,8 @@ setMethod("calibrate", "XCMSnExp", function(object, param) { type. = .PROCSTEP.CALIBRATION, fileIndex = 1:n_samps) object <- addProcessHistory(object, xph) - if (validObject(object)) - object + validObject(object) + object }) @@ -2954,7 +3416,7 @@ setMethod("calibrate", "XCMSnExp", function(object, param) { #' #' @param FUN For \code{spectrapply}: a function that should be applied to each #' spectrum in the object. -#' +#' #' @rdname XCMSnExp-class setMethod("spectrapply", "XCMSnExp", function(object, FUN = NULL, BPPARAM = bpparam(), ...) { @@ -2964,18 +3426,6 @@ setMethod("spectrapply", "XCMSnExp", function(object, FUN = NULL, callNextMethod() }) -#' @description -#' -#' \code{split} splits an \code{XCMSnExp} object into a \code{list} -#' of \code{XCMSnExp} objects based on the provided parameter \code{f}. -#' Note that by default all pre-processing results are removed by the -#' splitting, except adjusted retention times, if the optional argument -#' \code{keepAdjustedRtime = TRUE} is provided. -#' -#' @param f For \code{split} a vector of length equal to the length of x -#' defining how \code{x} will be splitted. It is converted internally to -#' a \code{factor}. -#' #' @rdname XCMSnExp-filter-methods setMethod("split", "XCMSnExp", function(x, f, drop = FALSE, ...) { @@ -3005,7 +3455,7 @@ setMethod("split", "XCMSnExp", function(x, f, #' \code{XCMSnExp} objects can be combined with the \code{c} function. This #' combines identified chromatographic peaks and the objects' pheno data but #' discards alignment results or feature definitions. -#' +#' #' @rdname XCMSnExp-class c.XCMSnExp <- function(...) { .concatenate_XCMSnExp(...) @@ -3039,7 +3489,7 @@ c.XCMSnExp <- function(...) { #' @seealso [XCMSnExp]. #' #' @md -#' +#' #' @rdname groupnames-XCMSnExp setMethod("groupnames", "XCMSnExp", function(object, mzdec = 0, rtdec = 0, template = NULL) { @@ -3098,7 +3548,7 @@ setMethod("groupnames", "XCMSnExp", function(object, mzdec = 0, rtdec = 0, #' output format (`"mzml"` or `"mzxml"`) or `copy` to specify whether #' general information from the original MS data files (such as data #' processing, software etc) should be copied to the new files. -#' +#' #' @author Johannes Rainer #' #' @md @@ -3120,3 +3570,604 @@ setMethod("writeMSData", signature(object = "XCMSnExp", file = "character"), outformat = outformat, copy = copy, software_processing = software_processing, ...) }) + +#' @title Plot chromatographic peak density along the retention time axis +#' +#' @aliases plotChromPeakDensity +#' +#' @description +#' +#' Plot the density of chromatographic peaks along the retention +#' time axis and indicate which peaks would be (or were) grouped into the +#' same feature based using the *peak density* correspondence method. +#' Settings for the *peak density* method can be passed with an +#' [PeakDensityParam] object to parameter `param`. If the `object` contains +#' correspondence results and the correspondence was performed with the +#' *peak groups* method, the results from that correspondence can be +#' visualized setting `simulate = FALSE`. +#' +#' @details +#' +#' The `plotChromPeakDensity` function allows to evaluate +#' different settings for the *peak density* on an mz slice of +#' interest (e.g. containing chromatographic peaks corresponding to a known +#' metabolite). +#' The plot shows the individual peaks that were detected within the +#' specified `mz` slice at their retention time (x-axis) and sample in +#' which they were detected (y-axis). The density function is plotted as a +#' black line. Parameters for the `density` function are taken from the +#' `param` object. Grey rectangles indicate which chromatographic peaks +#' would be grouped into a feature by the `peak density` correspondence +#' method. Parameters for the algorithm are also taken from `param`. +#' See [groupChromPeaks-density()] for more information about the +#' algorithm and its supported settings. +#' +#' @param object A [XCMSnExp] object with identified +#' chromatographic peaks. +#' +#' @param mz `numeric(2)` defining an mz range for which the peak density +#' should be plotted. +#' +#' @param rt `numeric(2)` defining an optional rt range for which the +#' peak density should be plotted. Defaults to the absolute retention time +#' range of `object`. +#' +#' @param param [PeakDensityParam] from which parameters for the +#' *peak density* correspondence algorithm can be extracted. If not provided +#' and if `object` contains feature definitions with the correspondence/ +#' peak grouping being performed by the *peak density* method, the +#' corresponding parameter class stored in `object` is used. +#' +#' @param simulate `logical(1)` defining whether correspondence should be +#' simulated within the specified m/z / rt region or (with +#' `simulate = FALSE`) whether the results from an already performed +#' correspondence should be shown. +#' +#' @param col Color to be used for the individual samples. Length has to be 1 +#' or equal to the number of samples in `object`. +#' +#' @param xlab `character(1)` with the label for the x-axis. +#' +#' @param ylab `character(1)` with the label for the y-axis. +#' +#' @param xlim `numeric(2)` representing the limits for the x-axis. +#' Defaults to the range of the `rt` parameter. +#' +#' @param main `character(1)` defining the title of the plot. By default +#' (for `main = NULL`) the mz-range is used. +#' +#' @param type `character(1)` specifying how peaks are called to be located +#' within the region defined by `mz` and `rt`. Can be one of `"any"`, +#' `"within"`, and `"apex_within"` for all peaks that are even partially +#' overlapping the region, peaks that are completely within the region, and +#' peaks for which the apex is within the region. This parameter is passed +#' to the [chromPeaks] function. See related documentation for more +#' information and examples. +#' +#' @param ... Additional parameters to be passed to the `plot` function. Data +#' point specific parameters such as `bg` or `pch` have to be of length 1 +#' or equal to the number of samples. +#' +#' @return The function is called for its side effect, i.e. to create a plot. +#' +#' @author Johannes Rainer +#' +#' @seealso [groupChromPeaks-density()] for details on the +#' *peak density* correspondence method and supported settings. +#' +#' @md +#' +#' @rdname plotChromPeakDensity +#' +#' @examples +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Plot the chromatographic peak density for a specific mz range to evaluate +#' ## different peak density correspondence settings. +#' mzr <- c(305.05, 305.15) +#' +#' plotChromPeakDensity(faahko_sub, mz = mzr, pch = 16, +#' param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(faahko_sub))))) +#' +setMethod("plotChromPeakDensity", "XCMSnExp", .plotChromPeakDensity) + +setMethod("updateObject", "XCMSnExp", function(object) { + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + if (hasChromPeaks(newFd)) { + if (is.null(rownames(chromPeaks(newFd)))) + rownames(chromPeaks(newFd)) <- + .featureIDs(nrow(chromPeaks(newFd)), "CP") + if (!.has_chrom_peak_data(newFd)) { + newFd$chromPeakData <- DataFrame( + ms_level = rep(1L, nrow(chromPeaks(newFd))), + row.names = rownames(chromPeaks(newFd))) + if (any(colnames(chromPeaks(newFd)) == "is_filled")) { + newFd$chromPeakData$is_filled <- as.logical( + chromPeaks(newFd)[, "is_filled"]) + newFd$chromPeaks <- + newFd$chromPeaks[, colnames(newFd$chromPeaks) != "is_filled"] + } else + newFd$chromPeakData$is_filled <- FALSE + } + if (hasFeatures(newFd) && + !any(colnames(featureDefinitions(newFd)) == "ms_level")) + newFd$featureDefinitions$ms_level <- 1L + } + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + if (!length(object@.processHistory)) + object@.processHistory <- list() + validObject(object) + object +}) + +#' @rdname XCMSnExp-class +setMethod("chromPeakData", "XCMSnExp", function(object) { + chromPeakData(object@msFeatureData) +}) +#' @rdname XCMSnExp-class +setReplaceMethod("chromPeakData", "XCMSnExp", function(object, value) { + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + chromPeakData(newFd) <- value + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + validObject(object) + object +}) + +#' @description +#' +#' \code{plot} plots the spectrum data (see \code{\link{plot}} for +#' \code{\link{MSnExp}} objects in the \code{MSnbase} package for more details. +#' For \code{type = "XIC"}, identified chromatographic peaks will be indicated +#' as rectangles with border color \code{peakCol}. +#' +#' @param x For \code{plot}: \code{XCMSnExp} object. +#' +#' @param y For \code{plot}: not used. +#' +#' @param peakCol For \code{plot}: the color that should be used to indicate +#' identified chromatographic peaks (only in combination with +#' \code{type = "XIC"} and if chromatographic peaks are present). +#' +#' @rdname XCMSnExp-class +setMethod("plot", c("XCMSnExp", "missing"), + function(x, y, type = c("spectra", "XIC"), + peakCol = "#ff000060", ...) { + type <- match.arg(type) + if (type == "spectra" || !hasChromPeaks(x)) + callNextMethod(x = x, type = type, ...) + else .plot_XIC(x, peakCol = peakCol, ...) + }) + +#' @title Remove chromatographic peaks with too large rt width +#' +#' @aliases refineChromPeaks CleanPeaksParam-class show,CleanPeaksParam-method +#' +#' @description +#' +#' Remove chromatographic peaks with a retention time range larger than the +#' provided maximal acceptable width (`maxPeakwidth`). +#' +#' @note +#' +#' `refineChromPeaks` methods will always remove feature definitions, because +#' a call to this method can change or remove identified chromatographic peaks, +#' which may be part of features. +#' +#' @param maxPeakwidth for `CleanPeaksParam`: `numeric(1)` defining the maximal +#' allowed peak width (in retention time). +#' +#' @param msLevel `integer` defining for which MS level(s) the chromatographic +#' peaks should be cleaned. +#' +#' @param object [XCMSnExp] object with identified chromatographic peaks. +#' +#' @param param `CleanPeaksParam` object defining the settings for the method. +#' +#' @return `XCMSnExp` object with chromatographic peaks exceeding the specified +#' maximal retention time width being removed. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @family chromatographic peak refinement methods +#' +#' @rdname refineChromPeaks-clean +#' +#' @examples +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Distribution of chromatographic peak widths +#' quantile(chromPeaks(faahko_sub)[, "rtmax"] - chromPeaks(faahko_sub)[, "rtmin"]) +#' +#' ## Remove all chromatographic peaks with a width larger 60 seconds +#' data <- refineChromPeaks(faahko_sub, param = CleanPeaksParam(60)) +#' +#' quantile(chromPeaks(data)[, "rtmax"] - chromPeaks(data)[, "rtmin"]) +setMethod("refineChromPeaks", c(object = "XCMSnExp", param = "CleanPeaksParam"), + function(object, param = CleanPeaksParam(), + msLevel = 1L) { + if (!hasChromPeaks(object, msLevel = msLevel)) { + warning("No chromatographic peaks present in for MS level ", + msLevel, ". Please run 'findChromPeaks' first.") + return(object) + } + if (hasFeatures(object)) { + message("Removing feature definitions.") + object <- dropFeatureDefinitions(object) + } + validObject(param) + rtwidths <- chromPeaks(object)[, "rtmax"] - + chromPeaks(object)[, "rtmin"] + sel_ms <- chromPeakData(object)$ms_level %in% msLevel + sel_rt <- rtwidths < param@maxPeakwidth & sel_ms + keep <- which(sel_rt | !sel_ms) + message("Removed ", nrow(chromPeaks(object)) - length(keep), + " of ", nrow(chromPeaks(object)), + " chromatographic peaks.") + msf <- new("MsFeatureData") + msf@.xData <- .copy_env(object@msFeatureData) + chromPeaks(msf) <- chromPeaks(object)[keep, , drop = FALSE] + chromPeakData(msf) <- extractROWS(chromPeakData(object), keep) + object@msFeatureData <- msf + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + xph <- XProcessHistory(param = param, date. = date(), + type. = .PROCSTEP.PEAK.REFINEMENT, + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, xph) + validObject(object) + object + }) + +#' @title Merge neighboring and overlapping chromatographic peaks +#' +#' @aliases MergeNeighboringPeaksParam-class show,MergeNeighboringPeaksParam-method +#' +#' @description +#' +#' Peak detection sometimes fails to identify a chromatographic peak correctly, +#' especially for broad peaks and if the peak shape is irregular (mostly for +#' HILIC data). In such cases several smaller peaks are reported. Also, peak +#' detection can result in partially or completely overlapping peaks. To reduce +#' such peak detection artifacts, this function merges chromatographic peaks +#' which are overlapping or close in rt and m/z dimension considering also the +#' measured signal intensities in the region between them. +#' +#' Chromatographic peaks are first expanded in m/z and retention time dimension +#' (based on parameters `expandMz`, `ppm` and `expandRt`) and subsequently +#' grouped into sets of merge candidates if they are (after expansion) +#' overlapping in both m/z and rt (within the same sample). +#' Candidate peaks are merged if the average intensity of the 3 data +#' points in the middle position between them (i.e. at half the distance between +#' `"rtmax"` of the first and `"rtmin"` of the second peak) is larger than a +#' certain proportion (`minProp`) of the smaller maximal intensity (`"maxo"`) +#' of both peaks. In cases in which this calculated mid point is **not** +#' located between the apexes of the two peaks (e.g. if the peaks are largely +#' overlapping) the average signal intensity at half way between the apexes is +#' used instead. Candidate peaks are not joined if all 3 data points between +#' them have `NA` intensities. +#' The joined peaks get the `"mz"`, `"rt"`, `"sn"` and `"maxo"` values from +#' the peak with the largest signal (`"maxo"`) as well as its row in the +#' metadata data frame of the peak (`chromPeakData`). The `"rtmin"`, `"rtmax"` +#' of the merged peaks are updated and `"into"` is recalculated based on all +#' the signal between `"rtmin"` and `"rtmax"` of the new merged peak. See +#' details for information on the `"mzmin"` and `"mzmax"` values of the merged +#' peak. +#' +#' @note +#' +#' Note that **each** peak gets expanded by `expandMz` and `expandRt`, thus +#' peaks differing by `2 * expandMz` (or `expandRt`) will be identified as +#' *overlapping*. As an example: m/z max of one peak is 12.2, m/z min of +#' another one is 12.4, if `expandMz = 0.1` the m/z max of the first peak +#' will be 12.3 and the m/z min of the second one 12.3, thus both are +#' considered overlapping. +#' +#' `refineChromPeaks` methods will always remove feature definitions, because +#' a call to this method can change or remove identified chromatographic peaks, +#' which may be part of features. +#' +#' Merging of chromatographic peaks is performed along the retention time axis, +#' i.e. candidate peaks are first ordered by their `"rtmin"` value. The signals +#' at half way between the first and the second candidate peak are then compared +#' to the smallest `"maxo"` of both and the two peaks are then merged if the +#' average signal between the peaks is larger `minProp`. For merging any +#' additional peak in a candidate peak list the `"maxo"` of that peak and the +#' newly merged peak are considered. +#' +#' @details +#' +#' For each set of candidate peaks an ion chromatogram is +#' extracted using the range of retention times and m/z values of these peaks. +#' The m/z range for the extracted ion chromatogram is expanded by `expandMz` +#' and `ppm` (on both sides) to reduce the possibility of missing signal +#' intensities between candidate peaks (variance of measured m/z values for +#' lower intensities is larger than for higher intensities and thus data points +#' not being part of identified chromatographic peaks tend to have m/z values +#' outside of the m/z range of the candidate peaks - especially for ToF +#' instruments). This also ensures that all data points from the same ion are +#' considered for the peak integration of merged peaks. The smallest and largest +#' m/z value of all data points used in the peak integration of the merged peak +#' are used as the merged peak's m/z range (i.e. columns `"mzmin"` and `"mzmax"`). +#' +#' @param expandRt `numeric(1)` defining by how many seconds the retention time +#' window is expanded on both sides to check for overlapping peaks. +#' +#' @param expandMz `numeric(1)` constant value by which the m/z range of each +#' chromatographic peak is expanded (on both sides!) to check for +#' overlapping peaks. +#' +#' @param ppm `numeric(1)` defining a m/z relative value (in parts per million) +#' by which the m/z range of each chromatographic peak is expanded +#' to check for overlapping peaks. +#' +#' @param minProp `numeric(1)` between `0` and `1` representing the proporion +#' of intensity to be required for peaks to be joined. See description for +#' more details. The default (`minProp = 0.75`) means that peaks are only +#' joined if the signal half way between then is larger 75% of the smallest +#' of the two peak's `"maxo"` (maximal intensity at peak apex). +#' +#' @param msLevel `integer` defining for which MS level(s) the chromatographic +#' peaks should be merged. +#' +#' @param object [XCMSnExp] object with identified chromatographic peaks. +#' +#' @param param `MergeNeighboringPeaksParam` object defining the settings for +#' the method. +#' +#' @param BPPARAM parameter object to set up parallel processing. Uses the +#' default parallel processing setup returned by `bpparam()`. See +#' [bpparam()] for details and examples. +#' +#' @return `XCMSnExp` object with chromatographic peaks matching the defined +#' conditions being merged. +#' +#' @author Johannes Rainer, Mar Garcia-Aloy +#' +#' @md +#' +#' @family chromatographic peak refinement methods +#' +#' @rdname refineChromPeaks-merge +#' +#' @examples +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Subset to a single file +#' xd <- filterFile(faahko_sub, file = 1) +#' +#' ## Example of a split peak that will be merged +#' mzr <- 305.1 + c(-0.01, 0.01) +#' chr <- chromatogram(xd, mz = mzr, rt = c(2700, 3700)) +#' plot(chr) +#' +#' ## Combine the peaks +#' res <- refineChromPeaks(xd, param = MergeNeighboringPeaksParam(expandRt = 4)) +#' chr_res <- chromatogram(res, mz = mzr, rt = c(2700, 3700)) +#' plot(chr_res) +#' +#' ## Example of a peak that was not merged, because the signal between them +#' ## is lower than the cut-off minProp +#' mzr <- 496.2 + c(-0.01, 0.01) +#' chr <- chromatogram(xd, mz = mzr, rt = c(3200, 3500)) +#' plot(chr) +#' chr_res <- chromatogram(res, mz = mzr, rt = c(3200, 3500)) +#' plot(chr_res) +setMethod("refineChromPeaks", c(object = "XCMSnExp", + param = "MergeNeighboringPeaksParam"), + function(object, param = MergeNeighboringPeaksParam(), + msLevel = 1L, BPPARAM = bpparam()) { + if (!hasChromPeaks(object, msLevel = msLevel)) { + warning("No chromatographic peaks present in for MS level ", + msLevel, ". Please run 'findChromPeaks' first.") + return(object) + } + if (hasFeatures(object)) { + message("Removing feature definitions.") + object <- dropFeatureDefinitions(object) + } + validObject(param) + peak_count <- nrow(chromPeaks(object)) + res <- bplapply(.split_by_file2(object, msLevel. = msLevel, + to_class = "XCMSnExp", + subsetFeatureData = TRUE, + keep_sample_idx = TRUE), + FUN = .merge_neighboring_peaks, + expandRt = param@expandRt, + expandMz = param@expandMz, ppm = param@ppm, + minProp = param@minProp, + BPPARAM = BPPARAM) + pks <- do.call(rbind, lapply(res, "[[", 1)) + pkd <- do.call(rbind, lapply(res, "[[", 2)) + ## Add also peaks for other MS levels! + other_msl <- !(chromPeakData(object)$ms_level %in% msLevel) + if (any(other_msl)) { + pks <- rbind(pks, chromPeaks(object)[other_msl, , drop = FALSE]) + pkd <- rbind(pkd, extractROWS(chromPeakData(object), which(other_msl))) + } + which_new <- is.na(rownames(pks)) + pkd$merged <- which_new + max_id <- max(as.numeric(sub("CP", "", rownames(pks))), + na.rm = TRUE) + if (!is.finite(max_id)) + max_id <- 0 + rownames(pks)[which_new] <- .featureIDs(sum(which_new), + prefix = "CPM", + from = max_id + 1) + rownames(pkd) <- rownames(pks) + message("Merging reduced ", peak_count, " chromPeaks to ", + nrow(pks), ".") + msf <- new("MsFeatureData") + msf@.xData <- .copy_env(object@msFeatureData) + chromPeaks(msf) <- pks + chromPeakData(msf) <- pkd + object@msFeatureData <- msf + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + xph <- XProcessHistory(param = param, date. = date(), + type. = .PROCSTEP.PEAK.REFINEMENT, + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, xph) + validObject(object) + object + }) + +#' @title Remove chromatographic peaks based on intensity +#' +#' @aliases FilterIntensityParam-class show,FilterIntensityParam-method +#' +#' @description +#' +#' Remove chromatographic peaks with intensities below the specified threshold. +#' By default, with `nValues = 1`, all peaks with an intensity +#' `>= threshold` are retained. Parameter `value` allows to specify the column of +#' the [chromPeaks()] matrix that should be used for the filtering (defaults to +#' `value = "maxo"` and thus evaluating the maximal intensity for each peak). +#' With `nValues > 1` it is possible to keep only peaks that have `nValues` +#' intensities `>= threshold`. Note that this requires data import from the +#' original MS files and run time of the call can thus be significantly larger. +#' Also, for `nValues > 1` parameter `value` is ignored. +#' +#' @param threshold `numeric(1)` defining the minimal required intensity for +#' a peak to be retained. Defaults to `threshold = 0`. +#' +#' @param nValues `integer(1)` defining the number of data points (per +#' chromatographic peak) that have to be `>= threshold`. Defaults to +#' `nValues = 1`. +#' +#' @param value `character(1)` specifying the column in [chromPeaks()] that +#' should be used for the comparison. This is ignored for `nValues > 1`. +#' +#' @param msLevel `integer(1)` defining the MS level in which peaks should be +#' filtered. +#' +#' @param object [XCMSnExp] object with identified chromatographic peaks. +#' +#' @param param `FilterIntensityParam` object defining the settings for +#' the method. +#' +#' @param BPPARAM parameter object to set up parallel processing. Uses the +#' default parallel processing setup returned by `bpparam()`. See +#' [bpparam()] for details and examples. +#' +#' @return `XCMSnExp` object with filtererd chromatographic peaks. +#' +#' @author Johannes Rainer, Mar Garcia-Aloy +#' +#' @md +#' +#' @family chromatographic peak refinement methods +#' +#' @rdname refineChromPeaks-filter-intensity +#' +#' @examples +#' +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Remove all peaks with a maximal intensity below 50000 +#' res <- refineChromPeaks(faahko_sub, param = FilterIntensityParam(threshold = 50000)) +#' +#' nrow(chromPeaks(faahko_sub)) +#' nrow(chromPeaks(res)) +#' +#' all(chromPeaks(res)[, "maxo"] > 50000) +#' +#' ## Keep only chromatographic peaks that have 3 signals above 20000; we +#' ## perform this on the data of a single file. +#' xdata <- filterFile(faahko_sub) +#' +#' res <- refineChromPeaks(xdata, FilterIntensityParam(threshold = 20000, nValues = 3)) +#' nrow(chromPeaks(xdata)) +#' nrow(chromPeaks(res)) +setMethod("refineChromPeaks", c(object = "XCMSnExp", + param = "FilterIntensityParam"), + function(object, param = FilterIntensityParam(), + msLevel = 1L, BPPARAM = bpparam()) { + if (!hasChromPeaks(object, msLevel = msLevel)) { + warning("No chromatographic peaks present in for MS level ", + msLevel, ". Please run 'findChromPeaks' first.") + return(object) + } + if (hasFeatures(object)) { + message("Removing feature definitions.") + object <- dropFeatureDefinitions(object) + } + validObject(param) + peak_count <- nrow(chromPeaks(object)) + if (param@nValues == 1) { + ## Simple subsetting of the chromPeaks matrix. + if (!any(colnames(chromPeaks(object)) %in% param@value)) + stop("Column '", value, "' not found in chromPeaks matrix") + keep <- chromPeaks(object)[, param@value] >= param@threshold | + !chromPeakData(object)$ms_level %in% msLevel + } else { + res <- bplapply(.split_by_file2(object, to_class = "XCMSnExp", + msLevel. = 1:10), + FUN = .chrom_peaks_above_threshold, + nValues = param@nValues, + threshold = param@threshold, + msLevel = msLevel, BPPARAM = BPPARAM) + keep <- unlist(res, use.names = FALSE) + if (length(keep) != nrow(chromPeaks(object))) + stop("Length of variable 'keep' does not match number ", + "of peaks. Please contact developers.") + } + msfd <- .filterChromPeaks(object, idx = which(keep)) + object@msFeatureData <- msfd + message("Removed ", peak_count - nrow(chromPeaks(object)), + " chromatographic peaks.") + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + xph <- XProcessHistory(param = param, date. = date(), + type. = .PROCSTEP.PEAK.REFINEMENT, + fileIndex = 1:length(fileNames(object)), + msLevel = msLevel) + object <- addProcessHistory(object, xph) + validObject(object) + object + }) + +#' @rdname XCMSnExp-filter-methods +setMethod("filterChromPeaks", "XCMSnExp", + function(object, keep = rep(TRUE, nrow(chromPeaks(object))), + method = "keep", ...) { + method <- match.arg(method) + object <- switch( + method, + keep = { + idx <- .i2index(keep, ids = rownames(chromPeaks(object)), + name = "keep") + object@msFeatureData <- .filterChromPeaks(object, idx) + object + }) + validObject(object) + object + }) diff --git a/R/methods-XChromatogram.R b/R/methods-XChromatogram.R new file mode 100644 index 000000000..a345623be --- /dev/null +++ b/R/methods-XChromatogram.R @@ -0,0 +1,503 @@ +#' @rdname XChromatogram +#' +#' @md +setMethod("show", "XChromatogram", function(object) { + callNextMethod() + cat("Identified chromatographic peaks (", nrow(object@chromPeaks),"):\n", + sep = "") + cat(" rt\trtmin\trtmax\tinto\tmaxo\tsn ") + nc <- ncol(object@chromPeaks) + if (nc > 6) + cat("(", nc - 6, " more column(s))", sep = "") + cat("\n") + for (i in seq_len(nrow(object@chromPeaks))) + cat(" ", paste(object@chromPeaks[i, .CHROMPEAKS_REQ_NAMES], + collapse = "\t"), "\n", sep = "") +}) + +#' @rdname XChromatogram +#' +#' @aliases filterChromPeaks +#' +#' @section Accessing data: +#' +#' See also help of [Chromatogram] in the `MSnbase` package for general +#' information and data access. The methods listed here are specific for +#' `XChromatogram` and `XChromatograms` objects. +#' +#' - `chromPeaks`, `chromPeaks<-`: extract or set the matrix with the +#' chromatographic peak definitions. Parameter `rt` allows to specify a +#' retention time range for which peaks should be returned along with +#' parameter `type` that defines how *overlapping* is defined (parameter +#' description for details). For `XChromatogram` objects the function returns +#' a `matrix` with columns `"rt"` (retention time of the peak apex), +#' `"rtmin"` (the lower peak boundary), `"rtmax"` (the upper peak boundary), +#' `"into"` (the ingegrated peak signal/area of the peak), `"maxo"` (the +#' maximum instensity of the peak and `"sn"` (the signal to noise ratio). +#' Note that, depending on the peak detection algorithm, the matrix may +#' contain additional columns. +#' For `XChromatograms` objects the `matrix` contains also columns `"row"` +#' and `"column"` specifying in which chromatogram of `object` the peak was +#' identified. Chromatographic peaks are ordered by row. +#' +#' - `chromPeakData`, `chromPeakData<-`: extract or set the [DataFrame()] with +#' optional chromatographic peak annotations. +#' +#' - `hasChromPeaks`: infer whether a `XChromatogram` (or `XChromatograms`) +#' has chromatographic peaks. For `XChromatogram`: returns a `logical(1)`, +#' for `XChromatograms`: returns a `matrix`, same dimensions than `object` +#' with either `TRUE` or `FALSE` if chromatographic peaks are available in +#' the chromatogram at the respective position. +#' +#' - `hasFilledChromPeaks`: whether a `XChromatogram` (or a `XChromatogram` in +#' a `XChromatograms`) has filled-in chromatographic peaks. +#' For `XChromatogram`: returns a `logical(1)`, +#' for `XChromatograms`: returns a `matrix`, same dimensions than `object` +#' with either `TRUE` or `FALSE` if chromatographic peaks are available in +#' the chromatogram at the respective position. +#' +#' - `dropFilledChromPeaks`: removes filled-in chromatographic peaks. See +#' [dropFilledChromPeaks()] help for [XCMSnExp()] objects for more +#' information. +#' +#' - `hasFeatures`: for `XChromatograms` objects only: if correspondence +#' analysis has been performed and m/z-rt feature definitions are present. +#' Returns a `logical(1)`. +#' +#' - `dropFeatureDefinitions`: for `XChrmomatograms` objects only: delete any +#' correspondence analysis results (and related process history). +#' +#' - `featureDefinitions`: for `XChromatograms` objects only. Extract the +#' results from the correspondence analysis (performed with +#' `groupChromPeaks`). Returns a `DataFrame` with the properties of the +#' defined m/z-rt features: their m/z and retention time range. Columns +#' `peakidx` and `row` contain the index of the chromatographic peaks in the +#' `chromPeaks` matrix associated with the feature and the row in the +#' `XChromatograms` object in which the feature was defined. Similar to the +#' `chromPeaks` method it is possible to filter the returned feature matrix +#' with the `mz`, `rt` and `ppm` parameters. +#' +#' - `featureValues`: for `XChromatograms` objects only. Extract the abundance +#' estimates for the individuals features. Note that by default (with +#' parameter `value = "index"` a `matrix` of indices of the peaks in the +#' `chromPeaks` matrix associated to the feature is returned. To extract the +#' integrated peak area use `value = "into"`. The function returns a `matrix` +#' with one row per feature (in `featureDefinitions`) and each column being +#' a sample (i.e. column of `object`). For features without a peak associated +#' in a certain sample `NA` is returned. This can be changed with the +#' `missing` argument of the function. +#' +#' - `filterChromPeaks`: *filters* chromatographic peaks in `object` depending +#' on parameter `method` and method-specific parameters passed as additional +#' arguments with `...`. Available methods are: +#' - `method = "keepTop"`: keep top `n` (default `n = 1L`) peaks in each +#' chromatogram ordered by column `order` (defaults to `order = "maxo"`). +#' Parameter `decreasing` (default `decreasing = TRUE`) can be used to +#' order peaks in descending (`decreasing = TRUE`) or ascending ( +#' `decreasing = FALSE`) order to keep the top `n` peaks with largest or +#' smallest values, respectively. +#' +#' - `processHistory`: returns a `list` of [ProcessHistory] objects representing +#' the individual performed processing steps. Optional parameters `type` and +#' `fileIndex` allow to further specify which processing steps to return. +#' +#' @section Manipulating data: +#' +#' - `transformIntensity`: transforms the intensity values of the chromatograms +#' with provided function `FUN`. See [transformIntensity()] in the `MSnbase` +#' package for details. For `XChromatogram` and `XChromatograms` in addition +#' to the intensity values also columns `"into"` and `"maxo"` in the object's +#' `chromPeaks` matrix are transformed by the same function. +#' +#' @param i For `[`: `integer` with the row indices to subset the +#' `XChromatograms` object. +#' +#' @param j For `[`: `integer` with the column indices to subset the +#' `XChromatograms` object. +#' +#' @param drop For `[`: `logical(1)` whether the dimensionality should be +#' dropped (if possible). Defaults to `drop = TRUE`, thus, if length of `i` +#' and `j` is 1 a `XChromatogram` is returned. Note that `drop` is ignored +#' if length of `i` or `j` is larger than 1, thus a `XChromatograms` is +#' returned. +#' +#' @param FUN For `transformIntensity`: a function to transform the intensity +#' values of `object`. +#' +#' @param method For `featureValues`: `character(1)` specifying the method to +#' resolve multi-peak mappings within the sample sample, i.e. to select +#' the *representative* peak for a feature for which more than one peak +#' was assigned in one sample. Options are `"medret"` (default): select the +#' peak closest to the median retention time of the feature, `"maxint"`: +#' select the peak with the largest signal and `"sum"`: sum the values +#' of all peaks (only if `value` is `"into"` or `"maxo"`). +#' For `filterChromPeaks`: `character(1)` defining the method that should +#' be used to filter chromatographic peaks. See help on `filterChromPeaks` +#' below for details. +#' +#' @param missing For `featureValues`: how missing values should be reported. +#' Allowed values are `NA` (default), a `numeric(1)` to replace `NA`s with +#' that value or `missing = "rowmin_half"` to replace `NA`s with half +#' of the row's minimal (non-missing) value. +#' +#' @param rt For `chromPeaks` and `featureDefinitions`: `numeric(2)` defining +#' the retention time range for which chromatographic peaks or features +#' should be returned. +#' For `filterRt`: `numeric(2)` defining the retention time range to +#' reduce `object` to. +#' +#' @param ppm For `chromPeaks` and `featureDefinitions`: `numeric(1)` defining +#' a ppm to expand the provided m/z range. +#' +#' @param type For `chromPeaks` and `featureDefinitions`: `character(1)` +#' defining which peaks or features to return if `rt` or `mz` is provided: +#' `"any"` (default) return all peaks that are even +#' partially overlapping with `rt`, `"within"` return peaks that are +#' completely within `rt` and `"apex_within"` return peaks which apex +#' is within `rt`. +#' +#' For `plot`: what type of plot should be used for the +#' chromatogram (such as `"l"` for lines, `"p"` for points etc), see help +#' of [plot()] in the `graphics` package for more details. +#' For `processHistory`: restrict returned processing steps to specific +#' types. Use [processHistoryTypes()] to list all supported values. +#' +#' @param value For `chromPeaks<-`: a numeric `matrix` with required columns +#' `"rt"`, `"rtmin"`, `"rtmax"`, `"into"` and `"maxo"`. +#' +#' For `featureValues`: `character(1)` specifying the name of the column in +#' `chromPeaks(object)` that should be returned or `"index"` (default) to +#' return the index of the peak associated with the feature in each sample. +#' To return the integrated peak area instead of the index use +#' `value = "into"`. +#' +#' @md +#' +#' @seealso +#' +#' [findChromPeaks-centWave][findChromPeaks-Chromatogram-CentWaveParam] for peak +#' detection on [MChromatograms()] objects. +#' +#' @examples +#' +#' ## Extract the chromatographic peaks +#' chromPeaks(xchr) +setMethod("chromPeaks", "XChromatogram", function(object, rt = numeric(), + mz = numeric(), ppm = 0, + type = c("any", "within", + "apex_within"), + msLevel) { + type <- match.arg(type) + pks <- object@chromPeaks + if (!missing(msLevel)) + pks <- pks[chromPeakData(object)$ms_level %in% msLevel, , drop = FALSE] + if (length(rt) && nrow(pks)) { + rt <- range(rt) + pks <- switch(type, + any = pks[which(pks[, "rtmin"] <= rt[2] & + pks[, "rtmax"] >= rt[1]), , drop = FALSE], + within = pks[which(pks[, "rtmin"] >= rt[1] & + pks[, "rtmax"] <= rt[2]), , + drop = FALSE], + apex_within = pks[which(pks[, "rt"] >= rt[1] & + pks[, "rt"] <= rt[2]), , + drop = FALSE] + ) + } + if (length(mz) && nrow(pks) & all(c("mz", "mzmin", "mzmax") + %in% colnames(pks))) { + mz <- .ppm_range(mz, ppm = ppm) + pks <- switch(type, + any = pks[which(pks[, "mzmin"] <= mz[2] & + pks[, "mzmax"] >= mz[1]), , drop = FALSE], + within = pks[which(pks[, "mzmin"] >= mz[1] & + pks[, "mzmax"] <= mz[2]), , + drop = FALSE], + apex_within = pks[which(pks[, "mz"] >= mz[1] & + pks[, "mz"] <= mz[2]), , + drop = FALSE] + ) + } + pks +}) + +#' @rdname XChromatogram +setReplaceMethod("chromPeaks", "XChromatogram", function(object, value) { + if (!is.matrix(value)) + stop("'value' should be a numeric matrix") + object@chromPeaks <- value + object@chromPeakData <- DataFrame(ms_level = rep(1L, nrow(value)), + is_filled = rep(FALSE, nrow(value)), + row.names = rownames(value)) + validObject(object) + object +}) + +#' @rdname XChromatogram +#' +#' @section Plotting and visualizing: +#' +#' - `plot` draws the chromatogram and highlights in addition any +#' chromatographic peaks present in the `XChromatogram` or `XChromatograms` +#' (unless `peakType = "none"` was specified). To draw peaks in different +#' colors a vector of color definitions with length equal to +#' `nrow(chromPeaks(x))` has to be submitted with `peakCol` and/or `peakBg` +#' defining one color for each peak (in the order as peaks are in +#' `chromPeaks(x))`. For base peak chromatograms or total ion chromatograms +#' it might be better to set `peakType = "none"` to avoid generating busy +#' plots. +#' +#' - `plotChromPeakDensity`: visualize *peak density*-based correspondence +#' analysis results. See section *Correspondence analysis* for more details. +#' +#' @note +#' +#' Highlighting the peak area(s) in an `XChromatogram` or `XChromatograms` +#' object (`plot` with `peakType = "polygon"`) draws a polygon representing +#' the displayed chromatogram from the peak's minimal retention time to the +#' maximal retention time. If the `XChromatograms` was extracted from an +#' [XCMSnExp()] object with the [chromatogram()] function this might not +#' represent the actual identified peak area if the m/z range that was +#' used to extract the chromatogram was larger than the peak's m/z. +#' +#' @param x For `plot`: an `XChromatogram` or `XChromatograms` object. +#' +#' @param col For `plot`: the color to be used to draw the chromatogram. +#' +#' @param lty For `plot` and `plotChromPeakDensity`: the line type. +#' +#' @param xlab For `plot` and `plotChromPeakDensity`: the x axis label. +#' +#' @param ylab For `plot`: the y axis label. +#' +#' @param main For `plot` and `plotChromPeakDensity`: an optional title for +#' the plot. +#' +#' @param peakType For `plot` and `plotChromPeakDensity`: +#' `character(1)` defining how (and if) identified chromatographic peak +#' within the chromatogram should be plotted. Options +#' are `"polygon"` (default): draw the peak borders with the `peakCol` color +#' and fill the peak area with the `peakBg` color, `"point"`: indicate the +#' peak's apex with a point, `"rectangle"`: draw a rectangle around the +#' identified peak and `"none"`: don't draw peaks. +#' +#' @param peakCol For `plot` and `plotChromPeakDensity`: the foreground color +#' for the peaks. For `peakType = "polygon"` and `peakType = "rectangle"` +#' this is the color for the border. Use `NA` to not use a foreground +#' color. This should either be a single color or a vector of colors with +#' the same length than `chromPeaks(x)` has rows. +#' +#' @param peakBg For `plot` and `plotChromPeakDensity`: the background color +#' for the peaks. For `peakType = "polygon"` and `peakType = "rectangle"` +#' the peak are or rectangle will be filled with this color. Use `NA` to +#' skip. This should be either a single color or a vector of colors with +#' the same length than `chromPeaks(x)` has rows. +#' +#' @param peakPch For `plot` and `plotChromPeakDensity`: the point character +#' to be used for `peakType = "point"`. See [plot()] in the `graphics` +#' package for more details. +#' +#' @param param For `groupChromPeaks` and `plotChromPeakDensity`: a +#' [PeakDensityParam()] object with the settings for the *peak density* +#' correspondence analysis algorithm. +#' +#' @param simulate For `plotChromPeakDensity`: `logical(1)` whether a +#' correspondence analysis should be *simulated* based on the available +#' data and the provided [PeakDensityParam()] `param` argument. See +#' section *Correspondence analysis* for details. +#' +#' @param ... For `filterChromPeaks`: additional parameters defining how to +#' filter chromatographic peaks. See function description below for details. +#' +#' @md +#' +#' @examples +#' +#' ## Plotting of a single XChromatogram object +#' ## o Don't highlight chromatographic peaks +#' plot(xchr, peakType = "none") +#' +#' ## o Indicate peaks with a polygon +#' plot(xchr) +#' +#' ## Add a second peak to the data. +#' pks <- rbind(chromPeaks(xchr), c(7, 7, 10, NA, 15, NA)) +#' chromPeaks(xchr) <- pks +#' +#' ## Plot the peaks in different colors +#' plot(xchr, peakCol = c("#ff000080", "#0000ff80"), +#' peakBg = c("#ff000020", "#0000ff20")) +#' +#' ## Indicate the peaks as rectangles +#' plot(xchr, peakCol = c("#ff000060", "#0000ff60"), peakBg = NA, +#' peakType = "rectangle") +setMethod("plot", "XChromatogram", function(x, col = "#00000060", lty = 1, + type = "l", + xlab = "retention time", + ylab = "intensity", + main = NULL, + peakType = c("polygon", + "point", + "rectangle", + "none"), + peakCol = "#00000060", + peakBg = "#00000020", + peakPch = 1, ...) { + peakType <- match.arg(peakType) + callNextMethod(x = x, col = col, lty = lty, type = type, xlab = xlab, + ylab = ylab, main = main, ...) + pks <- chromPeaks(x) + nr <- nrow(pks) + if (nr && peakType != "none") { + if (length(peakCol) != nr) + peakCol <- rep(peakCol[1], nr) + if (length(peakBg) != nr) + peakBg <- rep(peakBg[1], nr) + if (length(peakPch) != nr) + peakPch <- rep(peakPch[1], nr) + suppressWarnings(.add_chromatogram_peaks(x, pks, col = peakCol, + bg = peakBg, type = peakType, + pch = peakPch, ...)) + } +}) + +#' @rdname XChromatogram +#' +#' @section Filtering and subsetting: +#' +#' - `[` allows to subset a `XChromatograms` object by row (`i`) and column +#' (`j`), with `i` and `j` being of type `integer`. The `featureDefinitions` +#' will also be subsetted accordingly and the `peakidx` column updated. +#' +#' - `filterMz` filters the chromatographic peaks within an `XChromatogram` or +#' `XChromatograms`, if a column `"mz"` is present in the `chromPeaks` matrix. +#' This would be the case if the `XChromatogram` was extracted from an +#' [XCMSnExp()] object with the [chromatogram()] function. All +#' chromatographic peaks with their m/z within the m/z range defined by `mz` +#' will be retained. Also feature definitions (if present) will be subset +#' accordingly. The function returns a filtered `XChromatogram` or +#' `XChromatograms` object. +#' +#' - `filterRt` filters chromatogram(s) by the provided retention time range. +#' All eventually present chromatographic peaks with their apex within the +#' retention time range specified with `rt` will be retained. Also feature +#' definitions, if present, will be filtered accordingly. The function +#' returns a filtered `XChromatogram` or `XChromatograms` object. +#' +#' @md +setMethod("filterMz", "XChromatogram", function(object, mz, ...) { + if (missing(mz) || length(mz) == 0) + return(object) + pks <- chromPeaks(object) + if (nrow(pks) && any(colnames(pks) == "mz")) { + mz <- range(mz) + keep <- which(pks[, "mz"] >= mz[1] & pks[, "mz"] <= mz[2]) + object@chromPeaks <- pks[keep, , drop = FALSE] + object@chromPeakData <- extractROWS(object@chromPeakData, keep) + validObject(object) + } + object +}) + +#' @rdname XChromatogram +#' +#' @md +#' +#' @examples +#' +#' ## Filter the XChromatogram by retention time +#' xchr_sub <- filterRt(xchr, rt = c(4, 6)) +#' xchr_sub +#' plot(xchr_sub) +setMethod("filterRt", "XChromatogram", function(object, rt, ...) { + if (missing(rt) || length(rt) == 0) return(object) + pks <- chromPeaks(object) + object <- callNextMethod() + if (nrow(pks)) { + rt <- range(rt) + keep <- which(pks[, "rt"] >= rt[1] & pks[, "rt"] <= rt[2]) + object@chromPeaks <- pks[keep, , drop = FALSE] + object@chromPeakData <- extractROWS(object@chromPeakData, keep) + validObject(object) + } + object +}) + +#' @rdname XChromatogram +#' +#' @md +setMethod("hasChromPeaks", "XChromatogram", function(object) { + as.logical(nrow(object@chromPeaks)) +}) + +#' @rdname XChromatogram +#' +#' @md +setMethod("dropFilledChromPeaks", "XChromatogram", function(object) { + if (!.hasFilledPeaks(object)) + return(object) + not_fld <- which(!object@chromPeakData$is_filled) + object@chromPeaks <- object@chromPeaks[not_fld, , drop = FALSE] + object@chromPeakData <- extractROWS(object@chromPeakData, not_fld) + validObject(object) + object +}) + +#' @rdname XChromatogram +setMethod("chromPeakData", "XChromatogram", function(object) { + object@chromPeakData +}) +#' @rdname XChromatogram +setReplaceMethod("chromPeakData", "XChromatogram", function(object, value) { + object@chromPeakData <- value + validObject(object) + object +}) + +#' @rdname XChromatogram +setMethod("refineChromPeaks", c(object = "XChromatogram", + param = "MergeNeighboringPeaksParam"), + function(object, param = MergeNeighboringPeaksParam()) { + object <- .xchrom_merge_neighboring_peaks( + object, minProp = param@minProp, diffRt = 2 * param@expandRt) + validObject(object) + object + }) + +#' @rdname removeIntensity-Chromatogram +setMethod("removeIntensity", "XChromatogram", + function(object, which = c("below_threshold", "outside_chromPeak"), + threshold = 0) { + which <- match.arg(which) + if (which == "outside_chromPeak") { + cps <- chromPeaks(object) + if (nrow(cps)) { + keep <- rep(FALSE, length(object@rtime)) + for (i in seq_len(nrow(cps))) + keep[which(object@rtime >= cps[i, "rtmin"] & + object@rtime <= cps[i, "rtmax"])] <- TRUE + object@intensity[!keep] <- NA_real_ + } else + warning("No chromatographic peaks present. ", + "Returning data as is") + return(object) + } else callNextMethod(object, which = which, threshold = threshold) + }) + +#' @rdname XChromatogram +setMethod("filterChromPeaks", "XChromatogram", + function(object, method = c("keepTop"), ...) { + method <- match.arg(method) + switch(method, + keepTop = .filter_chrom_peaks_keep_top(object, ...)) + }) + +#' @rdname XChromatogram +setMethod("transformIntensity", "XChromatogram", function(object, + FUN = identity) { + object <- callNextMethod() + object@chromPeaks[, "into"] <- FUN(object@chromPeaks[, "into"]) + object@chromPeaks[, "maxo"] <- FUN(object@chromPeaks[, "maxo"]) + validObject(object) + object +}) diff --git a/R/methods-XChromatograms.R b/R/methods-XChromatograms.R new file mode 100644 index 000000000..88477baea --- /dev/null +++ b/R/methods-XChromatograms.R @@ -0,0 +1,678 @@ +#' @include methods-MChromatograms.R + +setAs("MChromatograms", "XChromatograms", function(from) { + res <- new("XChromatograms") + res@.Data <- matrix(lapply(from, function(z) { + if (is(z, "Chromatogram")) + as(z, "XChromatogram") + else z + }), nrow = nrow(from), ncol = ncol(from), dimnames = dimnames(from)) + res@phenoData <- from@phenoData + res@featureData <- from@featureData + if (validObject(resetClass)) res +}) + +#' @rdname XChromatogram +setMethod("show", "XChromatograms", function(object) { + nr <- nrow(object) + nc <- ncol(object) + cat(class(object), " with ", + nr, ifelse(nr == 1, " row and ", " rows and "), + nc, ifelse(nc == 1, " column\n", " columns\n"), + sep = "") + sumFun <- function(z) { + paste0("peaks: ", nrow(z[[1]]@chromPeaks)) + } + if (nr > 0 && nc > 0) { + if (nr <= 4) { + out <- apply(object, MARGIN = c(1, 2), sumFun) + rownames(out) <- paste0("[", 1:nrow(out), ",]") + } + else { + out <- rbind( + apply(object[c(1, 2), , drop = FALSE], MARGIN = c(1, 2), sumFun), + rep(" ... ", ncol(object)), + apply(object[nrow(object) - c(1, 0), , drop = FALSE], + MARGIN = c(1, 2), sumFun) + ) + rownames(out) <- c("[1,]", "[2,]", "...", + paste0("[", c(nrow(object) - c(1, 0)), ",]")) + } + rn <- rownames(out) + out <- rbind(rep("", ncol(out)), out) + rownames(out) <- c("", rn) + print(out, quote = FALSE, right = TRUE) + } + cat("phenoData with", length(varLabels(object@phenoData)), "variables\n") + cat("featureData with", length(fvarLabels(object)), "variables\n") + cat("- - - xcms preprocessing - - -\n") + if (any(hasChromPeaks(object))) { + cat("Chromatographic peak detection:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + if (length(ph)) + cat(" method:", .param2string(ph[[1]]@param), "\n") + else cat(" unknown method.\n") + } + if (hasFeatures(object)) { + cat("Correspondence:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.GROUPING) + if (length(ph)) + cat(" method:", .param2string(ph[[1]]@param), "\n") + else cat(" unknown method.\n") + cat(" ", nrow(object@featureDefinitions), " feature(s) identified.\n", + sep = "") + ## if (.hasFilledPeaks(object)) { + ## totF <- chromPeaks(object)[, "is_filled"] == 1 + ## fp <- chromPeaks(object)[totF, , drop = FALSE] + ## cat("", sum(totF), "filled peaks (on average", + ## mean(table(fp[, "sample"])), "per sample).\n") + ## } + } +}) + +#' @rdname XChromatogram +setMethod("hasChromPeaks", "XChromatograms", function(object) { + matrix(vapply(object, hasChromPeaks, logical(1)), ncol = ncol(object), + dimnames = dimnames(object)) +}) + +#' @rdname XChromatogram +setMethod("hasFilledChromPeaks", "XChromatograms", function(object) { + matrix(vapply(object, .hasFilledPeaks, logical(1)), ncol = ncol(object), + dimnames = dimnames(object)) +}) + +#' @rdname XChromatogram +setMethod("chromPeaks", "XChromatograms", function(object, rt = numeric(), + mz = numeric(), ppm = 0, + type = c("any", "within", + "apex_within"), + msLevel) { + type <- match.arg(type) + res <- lapply(object, chromPeaks, rt = rt, mz = mz, ppm = ppm, type = type, + msLevel = msLevel) + nrs <- vapply(res, nrow, integer(1)) + row_idx <- rep(seq_len(nrow(object)), ncol(object)) + col_idx <- rep(seq_len(ncol(object)), each = nrow(object)) + res <- do.call(rbind, res) + res <- cbind(res, row = rep(row_idx, nrs), column = rep(col_idx, nrs)) + res[order(res[, "row"]), , drop = FALSE] +}) + +#' @rdname XChromatogram +setMethod("chromPeakData", "XChromatograms", function(object) { + res <- lapply(object, chromPeakData) + nrs <- vapply(res, nrow, integer(1)) + row_idx <- rep(seq_len(nrow(object)), ncol(object)) + col_idx <- rep(seq_len(ncol(object)), each = nrow(object)) + res <- do.call(rbind, res) + res$row <- rep(row_idx, nrs) + res$column <- rep(col_idx, nrs) + extractROWS(res, order(res[, "row"])) +}) + +#' @rdname XChromatogram +setMethod("filterMz", "XChromatograms", function(object, mz, ...) { + if (missing(mz) || length(object) == 0) + return(object) + pks_orig <- chromPeaks(object) + object@.Data <- matrix(lapply(object, filterMz, mz = mz, ...), + nrow = nrow(object), dimnames = dimnames(object)) + pks_sub <- chromPeaks(object) + if (hasFeatures(object)) { + fts <- .subset_features_on_chrom_peaks( + object@featureDefinitions, pks_orig, pks_sub) + fts$row <- vapply(fts$peakidx, function(z) { + as.integer(pks_sub[z, "row"][1]) + }, integer(1)) + object@featureDefinitions <- fts + } + validObject(object) + object +}) + +#' @rdname XChromatogram +setMethod("filterRt", "XChromatograms", function(object, rt, ...) { + if (missing(rt) || length(object) == 0) + return(object) + pks_orig <- chromPeaks(object) + object@.Data <- matrix(lapply(object, filterRt, rt = rt, ...), + nrow = nrow(object), dimnames = dimnames(object)) + pks_sub <- chromPeaks(object) + if (hasFeatures(object)) { + fts <- .subset_features_on_chrom_peaks( + object@featureDefinitions, pks_orig, pks_sub) + fts$row <- vapply(fts$peakidx, function(z) { + as.integer(pks_sub[z, "row"][1]) + }, integer(1)) + object@featureDefinitions <- fts + } + validObject(object) + object +}) + +setMethod("addProcessHistory", "XChromatograms", function(object, ph) { + if (!inherits(ph, "ProcessHistory")) + stop("Argument 'ph' has to be of type 'ProcessHistory' or a class ", + "extending it!") + object@.processHistory[[(length(object@.processHistory) + 1)]] <- ph + object +}) + +#' @rdname XChromatogram +setMethod("plot", "XChromatograms", function(x, col = "#00000060", lty = 1, + type = "l", + xlab = "retention time", + ylab = "intensity", + main = NULL, + peakType = c("polygon", + "point", + "rectangle", + "none"), + peakCol = "#00000060", + peakBg = "#00000020", + peakPch = 1, ...) { + peakType <- match.arg(peakType) + nr <- nrow(x) + if (nr > 1) + par(mfrow = c(round(sqrt(nr)), ceiling(sqrt(nr)))) + pks_all <- chromPeaks(x) + pks_nr <- nrow(pks_all) + if (length(peakCol) != pks_nr) + peakCol <- rep(peakCol[1], pks_nr) + if (length(peakBg) != pks_nr) + peakBg <- rep(peakBg[1], pks_nr) + if (length(peakPch) != pks_nr) + peakPch <- rep(peakPch[1], pks_nr) + for (i in seq_len(nr)) { + x_sub <- x[i, , drop = FALSE] + plot(as(x_sub, ifelse(is(x_sub, "XChromatograms"), + "MChromatograms", "Chromatogram")), + col = col, lty = lty, type = type, + xlab = xlab, ylab = ylab, main = main, ...) + idx <- which(pks_all[, "row"] == i) + if (length(idx) && peakType != "none") { + pks <- chromPeaks(x_sub) + .add_chromatogram_peaks(x_sub, pks, col = peakCol[idx], + bg = peakBg[idx], type = peakType, + pch = peakPch[idx], ...) + } + } +}) + +#' @rdname XChromatogram +#' +#' @param fileIndex For `processHistory`: optional `integer` specifying the +#' index of the files/samples for which the [ProcessHistory] objects should +#' be returned. +#' +#' @md +setMethod("processHistory", "XChromatograms", function(object, fileIndex, + type) { + ph <- object@.processHistory + if (length(ph)) { + if (!missing(fileIndex)) { + if (!all(fileIndex %in% seq_len(ncol(object)))) + stop("'fileIndex' has to be within 1 and the number of samples!") + gotIt <- vapply(ph, function(z) any(z@fileIndex %in% fileIndex), + logical(1)) + ph <- ph[gotIt] + } + if (!missing(type) && length(ph)) { + gotIt <- vapply(ph, function(z) any(type == processType(z)), + logical(1)) + ph <- ph[gotIt] + } + return(ph) + } + list() +}) + +#' @rdname XChromatogram +#' +#' @md +setMethod("hasFeatures", "XChromatograms", function(object, ...) { + nrow(object@featureDefinitions) > 0 +}) + +#' @rdname XChromatogram +#' +#' @md +setMethod("dropFeatureDefinitions", "XChromatograms", function(object, ...) { + if (!hasFeatures(object)) + return(object) + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.GROUPING, 1) + object@featureDefinitions <- DataFrame() + validObject(object) + object +}) + +#' @rdname XChromatogram +#' +#' @section Chromatographic peak detection: +#' +#' See [findChromPeaks-Chromatogram-CentWaveParam] for information. +#' +#' After chromatographic peak detection it is also possible to *refine* +#' identified chromatographic peaks with the `refineChromPeaks` method (e.g. to +#' reduce peak detection artifacts). Currently, only peak refinement using the +#' *merge neighboring peaks* method is available (see +#' [MergeNeighboringPeaksParam()] for a detailed description of the approach. +#' +#' @section Correspondence analysis: +#' +#' Identified chromatographic peaks in an `XChromatograms` object can be grouped +#' into *features* with the `groupChromPeaks` function. Currently, such a +#' correspondence analysis can be performed with the *peak density* method +#' (see [groupChromPeaks] for more details) specifying the algorithm settings +#' with a [PeakDensityParam()] object. A correspondence analysis is performed +#' separately for each row in the `XChromatograms` object grouping +#' chromatographic peaks across samples (columns). +#' +#' The analysis results are stored in the returned `XChromatograms` object +#' and can be accessed with the `featureDefinitions` method which returns a +#' `DataFrame` with one row for each feature. Column `"row"` specifies in +#' which row of the `XChromatograms` object the feature was identified. +#' +#' The `plotChromPeakDensity` method can be used to visualize *peak density* +#' correspondence results, or to *simulate* a peak density correspondence +#' analysis on chromatographic data. The resulting plot consists of two panels, +#' the upper panel showing the chromatographic data as well as the identified +#' chromatographic peaks, the lower panel the distribution of peaks (the peak +#' density) along the retention time axis. This plot shows each peak as a point +#' with it's peak's retention time on the x-axis, and the sample in which it +#' was found on the y-axis. The distribution of peaks along the retention time +#' axis is visualized with a density estimate. Grouped chromatographic peaks +#' are indicated with grey shaded rectangles. Parameter `simulate` allows to +#' define whether the correspondence analysis should be simulated ( +#' `simulate=TRUE`, based on the available data and the provided +#' [PeakDensityParam()] parameter class) or not (`simulate=FALSE`). For the +#' latter it is assumed that a correspondence analysis has been performed with +#' the *peak density* method on the `object`. +#' See examples below. +#' +#' Abundance estimates for each feature can be extracted with the +#' `featureValues` function using parameter `value = "into"` to extract the +#' integrated peak area for each feature. The result is a `matrix`, columns +#' being samples and rows features. +#' +#' @md +setMethod("groupChromPeaks", + signature(object = "XChromatograms", param = "PeakDensityParam"), + function(object, param) { + if (!any(hasChromPeaks(object))) + stop("No chromatographic peak detection results in 'object'! ", + "Please perform first a peak detection using the ", + "'findChromPeaks' method.") + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + ## Check if we've got any sample groups: + if (length(sampleGroups(param)) == 0) { + sampleGroups(param) <- rep(1, ncol(object)) + message("Empty 'sampleGroups' in 'param', assuming all ", + "samples to be in the same group.") + } else { + ## Check that the sampleGroups are OK + if (length(sampleGroups(param)) != ncol(object)) + stop("The 'sampleGroups' value in the provided 'param' ", + "class does not match the number of available files/", + "samples!") + } + startDate <- date() + cpks <- chromPeaks(object) + cpks <- cbind(cpks, index = seq_len(nrow(cpks))) + if (!any(colnames(cpks) == "sample")) + colnames(cpks)[colnames(cpks) == "column"] <- "sample" + if (!any(colnames(cpks) == "mz")) + cpks <- cbind(cpks, mz = NA) + nr <- nrow(object) + res <- vector("list", nr) + bw <- bw(param) + sgrps <- sampleGroups(param) + sgrps_tbl <- table(sgrps) + minfr <- minFraction(param) + minsam <- minSamples(param) + maxf <- maxFeatures(param) + for (i in seq_len(nr)) { + cur_pks <- cpks[cpks[, "row"] == i, , drop = FALSE] + if (nrow(cur_pks) == 0) + next + rtr <- range(lapply(object[i, ], rtime), na.rm = TRUE) + densFrom <- rtr[1] - 3 * bw + densTo <- rtr[2] + 3 * bw + densN <- max(512, 2 * 2^(ceiling(log2(diff(rtr) / (bw / 2))))) + tmp <- .group_peaks_density(cur_pks, bw = bw, + densFrom = densFrom, + densTo = densTo, + densN = densN, + sampleGroups = sgrps, + sampleGroupTable = sgrps_tbl, + minFraction = minfr, + minSamples = minsam, + maxFeatures = maxf) + tmp$row <- rep(i, nrow(tmp)) + res[[i]] <- tmp + } + res <- DataFrame(do.call(rbind, res)) + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.GROUPING, + fileIndex = 1:ncol(object)) + object <- addProcessHistory(object, xph) + ## Add the results. + if (nrow(res) == 0) + warning("Unable to group any chromatographic peaks. ", + "You might have to adapt your settings.") + if (nrow(res) > 0) + rownames(res) <- .featureIDs(nrow(res)) + object@featureDefinitions <- res + validObject(object) + object + }) + +#' @rdname XChromatogram +setMethod("featureDefinitions", "XChromatograms", + function(object, mz = numeric(), rt = numeric(), ppm = 0, + type = c("any", "within", "apex_within")) { + if (!hasFeatures(object)) + return(DataFrame()) + feat_def <- object@featureDefinitions + type <- match.arg(type) + ## Select features within rt range. + if (length(rt) && nrow(feat_def)) { + rt <- range(rt) + keep <- switch( + type, + any = which(feat_def$rtmin <= rt[2] & + feat_def$rtmax >= rt[1]), + within = which(feat_def$rtmin >= rt[1] & + feat_def$rtmax <= rt[2]), + apex_within = which(feat_def$rtmed >= rt[1] & + feat_def$rtmed <= rt[2]) + ) + feat_def <- extractROWS(feat_def, keep) + } + if (length(mz) && nrow(feat_def)) { + mz <- range(mz) + ## Increase mz by ppm. + if (is.finite(mz[1])) + mz[1] <- mz[1] - mz[1] * ppm / 1e6 + if (is.finite(mz[2])) + mz[2] <- mz[2] + mz[2] * ppm / 1e6 + keep <- switch( + type, + any = which(feat_def$mzmin <= mz[2] & + feat_def$mzmax >= mz[1]), + within = which(feat_def$mzmin >= mz[1] & + feat_def$mzmax <= mz[2]), + apex_within = which(feat_def$mzmed >= mz[1] & + feat_def$mzmed <= mz[2]) + ) + feat_def <- extractROWS(feat_def, keep) + } + feat_def + }) + +#' @rdname XChromatogram +setMethod("[", "XChromatograms", function(x, i, j, drop = TRUE) { + if (missing(i) && missing(j)) + return(x) + if (missing(i)) + i <- seq_len(nrow(x)) + if (missing(j)) + j <- seq_len(ncol(x)) + if (is.logical(i)) + i <- which(i) + if (is.logical(j)) + j <- which(j) + if (length(i) > 1 || length(j) > 1) + drop <- FALSE + if (length(i) == 1 && length(j) == 1 && drop) + return(x@.Data[i, j, drop = TRUE][[1]]) + cpeaks_orig <- chromPeaks(x) + fts_orig <- featureDefinitions(x) + ## The following code replicates the [,MChromatograms + ph <- x@.processHistory + pd <- x@phenoData + fd <- x@featureData + xclass <- class(x) + x <- as(x@.Data[i = i, j = j, drop = FALSE], xclass) + pd <- pd[j, ] + pData(pd) <- droplevels(pData(pd)) + x@phenoData <- pd + fd <- fd[i, ] + pData(fd) <- droplevels(pData(fd)) + x@featureData <- fd + if (nrow(fts_orig)) { + cpeaks_sub <- chromPeaks(x) + ## re-order and duplicate fts based on i. + fts <- vector("list", length(i)) + for (el in seq_along(i)) { + fts_row <- fts_orig[fts_orig$row == i[el], , drop = FALSE] + if (nrow(fts_row)) { + fts_row$row <- el + fts_row <- .subset_features_on_chrom_peaks( + fts_row, cpeaks_orig, cpeaks_sub) + fts[[el]] <- fts_row + } else fts[[el]] <- DataFrame() + } + x@featureDefinitions <- do.call(rbind, fts) + } + x@.processHistory <- .process_history_subset_samples(ph, j = j) + validObject(x) + x +}) + +#' @rdname XChromatogram +setMethod("featureValues", "XChromatograms", + function(object, method = c("medret", "maxint", "sum"), + value = "into", intensity = "into", missing = NA, ...) { + if (!any(hasChromPeaks(object))) + stop("No chromatographic peaks present! Please use ", + "'findChromPeaks' first.") + if (!hasFeatures(object)) + stop("No features (grouped peaks) present! Please use ", + "'groupChromPeaks' first.") + method = match.arg(method) + if (method == "sum" & !(value %in% c("into", "maxo"))) + stop("method 'sum' is only allowed if value is set to 'into'", + " or 'maxo'") + if (is.character(missing)) { + if (!(missing %in% c("rowmin_half"))) + stop("if 'missing' is not 'NA' or a numeric it should", + " be one of: \"rowmin_half\".") + } else { + if (!is.numeric(missing) & !is.na(missing)) + stop("'missing' should be either 'NA', a numeric or one", + " of: \"rowmin_half\".") + } + cnames <- colnames(object) + pks <- chromPeaks(object) + if (any(colnames(pks) == "sample")) + pks[, "sample"] <- pks[, "column"] + else + pks <- cbind(pks, sample = pks[, "column"]) + .feature_values(pks = pks, fts = featureDefinitions(object), + method = method, value = value, + intensity = intensity, colnames = cnames, + missing = missing) +}) + +#' @rdname XChromatogram +setMethod("plotChromPeakDensity", "XChromatograms", + function(object, param, col = "#00000060", xlab = "retention time", + main = NULL, peakType = c("polygon", "point", "rectangle", + "none"), peakCol = "#00000060", + peakBg = "#00000020", peakPch = 1, simulate = TRUE, ...) { + peakType <- match.arg(peakType) + if (!any(hasChromPeaks(object))) + stop("No chromatographic peaks present. Please run ", + "'findChromPeaks' first.", call. = FALSE) + if (nrow(object) > 1) + stop("Currently only plotting of a single chromatogram in ", + "multiple samples is supported. Please subset 'object' ", + "to one row.", call. = FALSE) + if (missing(param)) { + param <- NULL + if (hasFeatures(object)) { + ph <- processHistory(object, + type = .PROCSTEP.PEAK.GROUPING) + if (length(ph)) { + ph <- ph[[length(ph)]] + if (is(ph, "XProcessHistory") && + is(ph@param, "PeakDensityParam")) + param <- ph@param + } + } + } + if (!length(param)) + stop("Object 'param' is missing", call. = FALSE) + fts <- NULL + if (!simulate && hasFeatures(object)) + fts <- featureDefinitions(object) + mr <- par("mar") + mr_1 <- mr[1] + mr_3 <- mr[3] + mr[1] <- 0 + xl <- range(lapply(object, function(z) range(rtime(z)))) + par(mfrow = c(2, 1), mar = mr) + plot(object, col = col, xlab = NA, xaxt = "n", main = main, + peakType = peakType, peakCol = peakCol, peakBg = peakBg, + peakPch = peakPch, xlim = xl, ...) + mr[1] <- mr_1 + mr[3] <- 0 + par(mar = mr) + .plot_chrom_peak_density(chromPeaks(object), fts = fts, col = col, + param = param, xlab = xlab, xlim = xl, + peakCol = peakCol, peakBg = peakBg, + peakPch = peakPch, simulate = simulate, + ylim = c(1, ncol(object)), + ...) + mr[1] <- mr_1 + mr[3] <- mr_3 + par(mar = mr) +}) + +#' @rdname XChromatogram +setMethod("dropFilledChromPeaks", "XChromatograms", function(object) { + pks_orig <- chromPeaks(object) + object@.Data <- matrix(lapply(object, dropFilledChromPeaks), + nrow = nrow(object), dimnames = dimnames(object)) + pks_sub <- chromPeaks(object) + if (hasFeatures(object)) { + fts <- .subset_features_on_chrom_peaks( + object@featureDefinitions, pks_orig, pks_sub) + ## fts$row <- vapply(fts$peakidx, function(z) { + ## as.integer(pks_sub[z, "row"][1]) + ## }, integer(1)) + object@featureDefinitions <- fts + } + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + validObject(object) + object +}) + +#' @rdname XChromatogram +setMethod("refineChromPeaks", c(object = "XChromatograms", + param = "MergeNeighboringPeaksParam"), + function(object, param = MergeNeighboringPeaksParam()) { + object@.Data <- matrix( + lapply(object, .xchrom_merge_neighboring_peaks, + diffRt = 2 * param@expandRt, + minProp = param@minProp), + ncol = ncol(object), nrow = nrow(object), + dimnames = dimnames(object)) + xph <- XProcessHistory(param = param, date. = date(), + type. = .PROCSTEP.PEAK.REFINEMENT, + fileIndex = 1:ncol(object)) + object <- addProcessHistory(object, xph) + validObject(object) + object + }) + +#' @rdname filter-MChromatograms +setMethod("filterColumnsIntensityAbove", "XChromatograms", + function(object, threshold = 0, + value = c("bpi", "tic", "maxo", "into"), + which = c("any", "all")) { + value <- match.arg(value) + which <- match.arg(which) + if (length(threshold) > 1 || !is.numeric(threshold)) + stop("'threshold' should be a 'numeric' of length 1") + if (value %in% c("maxo", "into")) { + nc <- ncol(object) + rws <- seq_len(nrow(object)) + cps <- chromPeaks(object) + keep <- rep(FALSE, nc) + for (i in seq_len(nc)) { + vals <- cps[cps[, "column"] == i & + cps[, value] > threshold, "row"] + if (length(vals)) { + if (which == "any") + keep[i] <- TRUE + else keep[i] <- all(rws %in% vals) + } + } + object[, keep] + } else + callNextMethod(object, threshold = threshold, value = value, + which = which) + }) + +#' @rdname filter-MChromatograms +setMethod("filterColumnsKeepTop", "XChromatograms", + function(object, n = 1L, sortBy = c("bpi", "tic", "maxo", "into"), + aggregationFun = sum) { + sortBy <- match.arg(sortBy) + if (length(n) > 1 || !is.numeric(n)) + stop("'n' should be an 'integer' of length 1") + if (sortBy %in% c("maxo", "into")) { + n <- ceiling(n) + nc <- ncol(object) + if (n > nc) + stop("'n' should be smaller or equal than the number of ", + "columns (", nc, ")") + colval <- numeric(nc) + cps <- chromPeaks(object) + for (i in seq_len(nc)) { + vals <- cps[cps[, "column"] == i, c("row", sortBy), + drop = FALSE] + if (nrow(vals)) { + vals <- sapply(split(vals[, sortBy], vals[, "row"]), + max, na.rm = TRUE) + colval[i] <- aggregationFun(vals, na.rm = TRUE) + } + } + idx <- order(colval, decreasing = TRUE)[seq_len(n)] + object[, sort(idx)] + } else + callNextMethod(object, n = n, sortBy = sortBy, + aggregationFun = aggregationFun) + }) + +#' @rdname XChromatogram +setMethod("filterChromPeaks", "XChromatograms", + function(object, method = c("keepTop"), ...) { + method <- match.arg(method) + pks_orig <- chromPeaks(object) + object@.Data <- matrix(lapply(object, filterChromPeaks, + method = method, ...), + nrow = nrow(object), + dimnames = dimnames(object)) + pks_sub <- chromPeaks(object) + if (hasFeatures(object)) { + fts <- .subset_features_on_chrom_peaks( + object@featureDefinitions, pks_orig, chromPeaks(object)) + object@featureDefinitions <- fts + } + validObject(object) + object + }) + +#' @rdname XChromatogram +setMethod("transformIntensity", "XChromatograms", function(object, + FUN = identity) { + object@.Data <- matrix(lapply(object, FUN = transformIntensity, FUN), + nrow = nrow(object), + dimnames = dimnames(object)) + object +}) diff --git a/R/methods-group-features.R b/R/methods-group-features.R new file mode 100644 index 000000000..1619c6b4c --- /dev/null +++ b/R/methods-group-features.R @@ -0,0 +1,1056 @@ +#' @title Compounding of LC-MS features +#' +#' @name feature-grouping +#' +#' @description +#' +#' Feature *compounding* aims at identifying and grouping LC-MS features +#' representing different ions or adducts (including isotopes) of the same +#' originating compound. +#' The [MsFeatures](https://bioconductor.org/packages/MsFeatures) package +#' provides a general framework and functionality to group features based on +#' different properties. The `groupFeatures` methods for [XCMSnExp-class] +#' objects implemented in `xcms` extend these to enable the *compounding* of +#' LC-MS data. Note that these functions simply define feature groups but don't +#' actually *aggregate* or combine the features. +#' +#' See [MsFeatures::groupFeatures()] for an overview on the general feature +#' grouping concept as well as details on the individual settings and +#' parameters. +#' +#' The available options for `groupFeatures` on `xcms` preprocessing results +#' (i.e. on `XCMSnExp` objects after correspondence analysis with +#' [groupChromPeaks()]) are: +#' +#' - Grouping by similar retention times: [groupFeatures-similar-rtime()]. +#' +#' - Grouping by similar feature values across samples: +#' [AbundanceSimilarityParam()]. +#' +#' - Grouping by similar peak shape of extracted ion chromatograms: +#' [EicSimilarityParam()]. +#' +#' An ideal workflow grouping features should sequentially perform the above +#' methods (in the listed order). +#' +#' Compounded feature groups can be accessed with the `featureGroups` function. +#' +#' @param object an [XCMSnExp()] object. +#' +#' @param value for `featureGroups<-`: replacement for the feature groups in +#' `object`. Has to be of length 1 or length equal to the number of features +#' in `object`. +#' +#' @author Johannes Rainer, Mar Garcia-Aloy, Vinicius Veri Hernandes +#' +#' @seealso [plotFeatureGroups()] for visualization of grouped features. +#' +#' @md +NULL + +#' @rdname feature-grouping +#' +#' @export +setMethod("featureGroups", "XCMSnExp", function(object) { + if (!hasFeatures(object)) + stop("No feature definitions present. Please run 'groupChromPeak'", + call. = FALSE) + if (any(colnames(featureDefinitions(object)) == "feature_group")) + as.character(featureDefinitions(object)$feature_group) + else rep(NA_character_, nrow(featureDefinitions(object))) +}) + +#' @rdname feature-grouping +#' +#' @export +setReplaceMethod("featureGroups", "XCMSnExp", function(object, value) { + if (!hasFeatures(object)) + stop("No feature definitions present. Please run 'groupChromPeak'", + call. = FALSE) + if (!(length(value) == 1 | + length(value) == nrow(featureDefinitions(object)))) + stop("'value' has to be either of length 1 or equal to the number ", + "of features in object") + featureDefinitions(object)$feature_group <- as.character(value) + object +}) + + +#' @title Compounding/feature grouping based on similar retention times +#' +#' @name groupFeatures-similar-rtime +#' +#' @description +#' +#' Group features based on similar retention time. This method is supposed to be +#' used as an initial *crude* grouping of features based on the median retention +#' time of all their chromatographic peaks. All features with a difference in +#' their retention time which is `<=` parameter `diffRt` of the parameter object +#' are grouped together. If a column `"feature_group"` is found in +#' [xcms::featureDefinitions()] this is further sub-grouped by this method. +#' +#' See [MsFeatures::SimilarRtimeParam()] in `MsFeatures` for more details. +#' +#' @param msLevel `integer(1)` defining the MS level on which the features +#' should be grouped. +#' +#' @param object [XCMSnExp()] object containing also correspondence results. +#' +#' @param param `SimilarRtimeParam` object with the settings for the method. See +#' [MsFeatures::SimilarRtimeParam()] for details and options. +#' +#' @param ... passed to the `groupFeatures` function on numeric values. +#' +#' @return input `XCMSnExp` with feature groups added (i.e. in column +#' `"feature_group"` of its `featureDefinitions` data frame. +#' +#' @family feature grouping methods +#' +#' @rdname groupFeatures-similar-rtime +#' +#' @importClassesFrom ProtGenerics Param +#' +#' @importFrom MsFeatures SimilarRtimeParam AbundanceSimilarityParam +#' +#' @importClassesFrom MsFeatures SimilarRtimeParam AbundanceSimilarityParam +#' +#' @importMethodsFrom MsFeatures groupFeatures featureGroups featureGroups<- +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' library(MsFeatures) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Group chromatographic peaks across samples +#' xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) +#' +#' ## Group features based on similar retention time (i.e. difference <= 2 seconds) +#' xodg_grp <- groupFeatures(xodg, param = SimilarRtimeParam(diffRt = 2)) +#' +#' ## Feature grouping get added to the featureDefinitions in column "feature_group" +#' head(featureDefinitions(xodg_grp)$feature_group) +#' +#' table(featureDefinitions(xodg_grp)$feature_group) +#' length(unique(featureDefinitions(xodg_grp)$feature_group)) +#' +#' ## Using an alternative groupiing method that creates larger groups +#' xodg_grp <- groupFeatures(xodg, +#' param = SimilarRtimeParam(diffRt = 2, groupFun = MsCoreUtils::group)) +#' +#' length(unique(featureDefinitions(xodg_grp)$feature_group)) +NULL + +#' @rdname groupFeatures-similar-rtime +#' +#' @importMethodsFrom xcms hasFeatures featureDefinitions featureDefinitions<- +#' +#' @importFrom MsCoreUtils group +#' +#' @exportMethod groupFeatures +#' +#' @importClassesFrom xcms XCMSnExp XProcessHistory +setMethod( + "groupFeatures", + signature(object = "XCMSnExp", param = "SimilarRtimeParam"), + function(object, param, msLevel = 1L, ...) { + fgs <- featureGroups(object) + if (length(msLevel) > 1) + stop("Currently only grouping of features from a single MS level", + " is supported.", call. = FALSE) + if (any(colnames(featureDefinitions(object)) == "ms_level")) + is_msLevel <- featureDefinitions(object)$ms_level == msLevel + else is_msLevel <- rep(TRUE, nrow(featureDefinitions(object))) + if (all(is.na(fgs))) + fgs <- rep("FG", length(fgs)) + fgs[!is_msLevel] <- NA + nas <- is.na(fgs) + fgs <- factor(fgs, levels = unique(fgs)) + rtl <- split(featureDefinitions(object)$rtmed, fgs) + res <- lapply( + rtl, function(z, param) + MsFeatures:::.format_id(groupFeatures(z, param = param, ...)), + param = param, ...) + res <- paste(fgs, unsplit(res, f = fgs), sep = ".") + if (any(nas)) + res[nas] <- NA_character_ + featureGroups(object) <- res + xph <- new("XProcessHistory", param = param, date = date(), + type = .PROCSTEP.FEATURE.GROUPING, + fileIndex = 1:length(fileNames(object)), + msLevel = as.integer(msLevel)) + object@.processHistory[[(length(object@.processHistory) + 1)]] <- xph + validObject(object) + object + }) + +#' @title Compounding/feature grouping based on similarity of abundances across samples +#' +#' @name groupFeatures-abundance-correlation +#' +#' @description +#' +#' Features from the same originating compound are expected to have similar +#' intensities across samples. This method this groups features based on +#' similarity of abundances (i.e. *feature values*) across samples. +#' See also [AbundanceSimilarityParam()] for additional information and details. +#' +#' This help page lists parameters specific for `xcms` result objects (i.e. the +#' [XCMSnExp()] object). Documentation of the parameters for the similarity +#' calculation is available in the [AbundanceSimilarityParam()] help page in +#' the `MsFeatures` package. +#' +#' @param filled `logical(1)` whether filled-in values should be included in +#' the correlation analysis. Defaults to `filled = TRUE`. +#' +#' @param intensity `character(1)` passed to the `featureValues` call. See +#' [featureValues()] for details. Defaults to `intensity = "into"`. +#' +#' @param method `character(1)` passed to the `featureValues` call. See +#' [featureValues()] for details. Defaults to `method = "medret"`. +#' +#' @param msLevel `integer(1)` defining the MS level on which the features +#' should be grouped. +#' +#' @param object [XCMSnExp()] object containing also correspondence results. +#' +#' @param param `AbudanceSimilarityParam` object with the settings for the +#' method. See [AbundanceSimilarityParam()] for details on the grouping +#' method and its parameters. +#' +#' @param value `character(1)` passed to the `featureValues` call. See +#' [featureValues()] for details. Defaults to `value = "into"`. +#' +#' @param ... additional parameters passed to the `groupFeatures` method for +#' `matrix`. +#' +#' @return input `XCMSnExp` with feature group definitions added to a column +#' `"feature_group"` in its `featureDefinitions` data frame. +#' +#' @family feature grouping methods +#' +#' @rdname groupFeatures-abundance-correlation +#' +#' @importClassesFrom MsFeatures AbundanceSimilarityParam +#' +#' @importFrom MsFeatures AbundanceSimilarityParam +#' +#' @author Johannes Rainer +#' +#' @seealso feature-grouping for a general overview. +#' +#' @md +#' +#' @examples +#' +#' library(MsFeatures) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Group chromatographic peaks across samples +#' xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) +#' +#' ## Group features based on correlation of feature values (integrated +#' ## peak area) across samples. Note that there are many missing values +#' ## in the feature value which influence grouping of features in the present +#' ## data set. +#' xodg_grp <- groupFeatures(xodg, +#' param = AbundanceSimilarityParam(threshold = 0.8)) +#' table(featureDefinitions(xodg_grp)$feature_group) +#' +#' ## Group based on the maximal peak intensity per feature +#' xodg_grp <- groupFeatures(xodg, +#' param = AbundanceSimilarityParam(threshold = 0.8, value = "maxo")) +#' table(featureDefinitions(xodg_grp)$feature_group) +NULL + +#' @rdname groupFeatures-abundance-correlation +#' +setMethod( + "groupFeatures", + signature(object = "XCMSnExp", param = "AbundanceSimilarityParam"), + function(object, param, msLevel = 1L, method = c("medret", "maxint", "sum"), + value = "into", intensity = "into", filled = TRUE, ...) { + if (!hasFeatures(object)) + stop("No feature definitions present. Please run ", + "first 'groupChromPeaks'") + if (length(msLevel) > 1) + stop("Currently only grouping of features from a single MS level", + " is supported.") + fgs <- featureGroups(object) + fgs_orig <- fgs + if (any(colnames(featureDefinitions(object)) == "ms_level")) + is_msLevel <- featureDefinitions(object)$ms_level == msLevel + else is_msLevel <- rep(TRUE, nrow(featureDefinitions(object))) + if (all(is.na(fgs))) + fgs <- rep("FG", length(fgs)) + fgs[!is_msLevel] <- NA + nas <- is.na(fgs) + fgs <- factor(fgs, levels = unique(fgs)) + l <- split.data.frame( + featureValues(object, method = method, value = value, + intensity = intensity, filled = filled), fgs) + res <- lapply( + l, function(z, param) + MsFeatures:::.format_id(groupFeatures(z, param = param, ...)), + param = param, ...) + res <- paste(fgs, unsplit(res, f = fgs), sep = ".") + if (any(nas)) + res[nas] <- fgs_orig[nas] + featureGroups(object) <- res + xph <- new("XProcessHistory", param = param, date = date(), + type = .PROCSTEP.FEATURE.GROUPING, + fileIndex = 1:length(fileNames(object)), + msLevel = as.integer(msLevel)) + object@.processHistory[[(length(object@.processHistory) + 1)]] <- xph + validObject(object) + object + }) + + +#' @title Plot feature groups in the m/z-retention time space +#' +#' @description +#' +#' `plotFeatureGroups` visualizes defined feature groups in the m/z by +#' retention time space. Features are indicated by points with features from +#' the same feature group being connected by a line. See [featureGroups()] +#' for details on and options for feature grouping. +#' +#' @param x [XCMSnExp()] object with grouped features (i.e. after calling +#' [groupFeatures()]. +#' +#' @param xlim `numeric(2)` with the lower and upper limit for the x-axis. +#' +#' @param ylim `numeric(2)` with the lower and upper limit for the y-axis. +#' +#' @param xlab `character(1)` with the label for the x-axis. +#' +#' @param ylab `character(1)` with the label for the y-axis. +#' +#' @param pch the plotting character. Defaults to `pch = 4` i.e. plotting +#' features as crosses. See [par()] for more information. +#' +#' @param col color to be used to draw the features. At present only a single +#' color is supported. +#' +#' @param type plotting type (see [par()]). Defaults to `type = "o"` which +#' draws each feature as a point and connecting the features of the same +#' feature group with a line. +#' +#' @param main `character(1)` with the title of the plot. +#' +#' @param featureGroups optional `character` of feature group IDs to draw only +#' specified feature group(s). If not provided, all feature groups are +#' drawn. +#' +#' @importFrom graphics lines +#' +#' @md +#' +#' @export +#' +#' @author Johannes Rainer +plotFeatureGroups <- function(x, xlim = numeric(), ylim = numeric(), + xlab = "retention time", ylab = "m/z", + pch = 4, col = "#00000060", type = "o", + main = "Feature groups", + featureGroups = character()) { + if (!inherits(x, "XCMSnExp")) + stop("'x' is supposed to be an xcms result object ('XCMSnExp')") + if (!length(featureGroups(x))) + stop("No feature groups present. Please run 'groupFeatures' first") + fts <- factor(featureGroups(x)) + if (!length(featureGroups)) + featureGroups <- levels(fts) + fts <- fts[fts %in% featureGroups] + fts <- droplevels(fts) + if (!length(fts)) + stop("None of the specified feature groups found") + fdef <- featureDefinitions(x)[featureGroups(x) %in% fts, ] + rts <- split(fdef$rtmed, fts) + mzs <- split(fdef$mzmed, fts) + xy <- cbind( + x = unlist(lapply(rts, function(z) c(z, NA)), use.names = FALSE), + y = unlist(lapply(mzs, function(z) c(z, NA)), use.names = FALSE)) + if (length(xlim) != 2) + xlim <- range(unlist(rts, use.names = FALSE)) + if (length(ylim) != 2) + ylim <- range(unlist(mzs, use.names = FALSE)) + plot(3, 3, pch = NA, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab) + lines(xy, type = type, col = col, pch = pch) +} + +## #' @title Extract spectra for feature groups +## #' +## #' @description +## #' +## #' `featureGroupSpectra` allows to extract a `Spectrum` object for each feature +## #' group in `x`. Based on the specified function `FUN` different *types* of +## #' spectra can be returned: +## #' +## #' - `featureGroupPseudoSpectrum` creates a *pseudo* spectrum based on the +## #' feature values (defined by `value`) of all features within a feature group +## #' (i.e. each feature is represented as a mass peak in the resulting +## #' spectrum). The reported m/z values will be the `"mzmed"` of the respective +## #' feature from the [featureDefinitions()] data frame. The associated +## #' intensity is calculated from the values of the features from the feature +## #' group: by default, for each feature, the median intensity across all +## #' samples part of `subset` is reported. Parameters `value` and `filled` are +## #' passed to the internal call to [featureValues()] that returns the features' +## #' values which are used in these calculations. Parameter `n` allows to +## #' further restrict the samples being considered in the calculations: for each +## #' feature group samples are first ordered by the sum of signal of the +## #' features of the group and then only the *top n* samples are used in the +## #' calculations. +## #' +## #' Examples: +## #' To report the mean intensity of each feature in the 10 samples with the +## #' highest signal for the feature group use `n = 10` and +## #' `intensityFun = mean`. The m/z values reported in the `Spectrum` object +## #' of a feature group will be the `"mzmed"` of the features, the intensity +## #' values the mean intensity (`value = "maxo"`) across the 10 samples with +## #' the highest signal for that feature group. +## #' +## #' To report the maximal intensity (`value = "maxo"` of each feature in +## #' samples 1, 4, 8 and 10 use `subset = c(1, 4, 8, 10)` and +## #' `intensityFun = max`. More examples in the examples section. +## #' +## #' - `featureGroupFullScan`: reports the full MS1 spectrum (full scan) in the +## #' sample with the highest total signal (defined by `value`) for the feature +## #' group at the retention time closest to the median `"rtmed"` across all +## #' features of the feature group. +## #' +## #' @param x [XCMSnExp()] object with available `featureGroups()`. +## #' +## #' @param featureGroup `character` with the IDs of the feature group(s) for +## #' which the spectra should be returned. Defaults to all feature groups +## #' defined in `x`. Only `featureGroupSpectra` supports +## #' `length(featureGroup)` to be of length > 1. +## #' +## #' @param filled for `featureGroupPseudoSpectra`: `logical(1)` whether +## #' filled-in values should also be considered. See [featureValues()] for +## #' details. +## #' +## #' @param FUN `function` to be used to define the spectrum for each feature +## #' group. Can be `featureGroupPseudoSpectrum`, `featureGroupFullScan` or +## #' any function taking parameters `featureGroup`, `x`, `fvals`. +## #' +## #' @param fvals for `featureGroupPseudoSpectra` and `featureGroupFullScan`: +## #' `matrix` with feature values (rows being features and columns samples) +## #' such as returned by [featureValues()]. +## #' +## #' @param intensityFun for `featureGroupPseudoSpectra`: `function` that should +## #' be applied across samples (defined by `subset`) of the feature value +## #' matrix to calculate the intensity for each mass peak of the returned +## #' pseudo spectrum. By default (`intensityFun = median`) the median +## #' intensity of a feature across all samples (defined by `subset` and `n`) +## #' is used. See description section for examples. +## #' +## #' @param n for `featureGroupPseudoSpectra`: `integer(1)` defining the *top n* +## #' samples (in `subset`) on which spectra should be defined. Samples are +## #' ordered based on the sum of signal (defined by parameter `value`) from +## #' the features of each feature group. See description section for more +## #' details. +## #' +## #' @param subset `integer` with indices of specific samples if spectra should +## #' only be defined on a subset of samples. See description section for +## #' details. +## #' +## #' @param value `character(1)` specifying the column in `chromPeaks` matrix to +## #' be used as *feature values* for each feature. This parameter is passed +## #' to the [featureValues()] call. +## #' +## #' @param ... additional parameters passed down to the function specifyed with +## #' `FUN`. +## #' +## #' @return for `featureGroupSpectra`: `MSpectra` object of length equal to the +## #' number of feature groups in `x` and each element being one spectrum. +## #' For all other functions: a `Spectrum` object. +## #' +## #' @author Johannes Rainer +## #' +## #' @importMethodsFrom xcms filterFile hasAdjustedRtime hasFeatures rtime +## #' +## #' @importFrom xcms applyAdjustedRtime +## #' +## #' @importFrom S4Vectors DataFrame +## #' +## #' @importFrom IRanges CharacterList +## #' +## #' @importFrom MSnbase MSpectra +## #' +## #' @importFrom stats median +## #' +## #' @export +## #' @md +## #' @examples +## #' +## #' ## Load test data set from xcms +## #' library(xcms) +## #' data(faahko_sub) +## #' ## Update the path to the files for the local system +## #' dirname(faahko_sub) <- system.file("cdf/KO/", package = "faahKO") +## #' +## #' ## Group chromatographic peaks across samples +## #' xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) +## #' ## Perform correspondence analysis +## #' xdata <- groupChromPeaks(faahko_sub, +## #' param = PeakDensityParam(sampleGroup = rep(1, 3))) +## #' +## #' ## Group features +## #' xdata <- groupFeatures(xdata, param = SimilarRtimeParam(4)) +## #' xdata <- groupFeatures(xdata, +## #' param = AbundanceSimilarityParam(threshold = 0.3)) +## #' +## #' sort(table(featureGroups(xdata))) +## #' +## #' ################ +## #' ## featureGroupSpectra +## #' ## +## #' +## #' ## Get a pseudo spectrum for each feature group +## #' res <- featureGroupSpectra(xdata) +## #' res +## #' +## #' ## Get a full scan spectrum for a subset of the feature groups +## #' ## considering only the subset of the last two samples +## #' res <- featureGroupSpectra(xdata, +## #' featureGroup = unique(featureGroups(xdata))[1:4], +## #' FUN = featureGroupFullScan, subset = 2:3) +## #' res +## #' +## #' ################ +## #' ## Pseudo Spectrum +## #' ## +## #' +## #' ## Get the pseudo spectrum for one feature group reporting the per-feature +## #' ## maximal "maxo" value across samples as the spectrum's intensities +## #' res <- featureGroupPseudoSpectrum(featureGroup = "FG.010.001", xdata, +## #' fvals = featureValues(xdata, value = "maxo"), intensityFun = max) +## #' +## #' intensity(res) +## #' mz(res) +## #' +## #' ## Get the pseudo spectrum using the values in the one sample with the +## #' ## highest total sum of signal ("maxo") for the feature group. +## #' res <- featureGroupPseudoSpectrum(featureGroup = "FG.010.001", xdata, +## #' fvals = featureValues(xdata, value = "maxo"), n = 1) +## #' +## #' intensity(res) +## #' mz(res) +## #' +## #' +## #' ################ +## #' ## Full Scan Spectrum +## #' ## +## #' +## #' ## Get the full MS1 spectrum from the sample with the highest total signal +## #' ## of one specific feature group +## #' res <- featureGroupFullScan(featureGroup = "FG.010.001", xdata, +## #' fvals = featureValues(xdata, value = "maxo")) +## #' +## #' plot(mz(res), intensity(res), type = "h", xlab = "m/z", ylab = "intensity") +## #' ## Highlight the peaks for the features of the group. +## #' idx <- which(featureGroups(xdata) == "FG.001.001") +## #' points(x = featureDefinitions(xdata)$mzmed[idx], +## #' y = rep(0, length(idx)), pch = 4, col = "red") +## featureGroupSpectra <- function(x, featureGroup = featureGroups(x), +## FUN = featureGroupPseudoSpectrum, +## value = "maxo", filled = TRUE, +## subset = seq_along(fileNames(x)), +## ...) { +## if (!all(subset %in% seq_along(fileNames(x)))) +## stop("'subset' is expected to be an integer vector with values ", +## "between 1 and ", length(fileNames(x))) +## subset <- unique(subset) +## if (!hasFeatures(x)) +## stop("No feature definitions present. Please run 'groupChromPeaks' first") +## featureGroup <- unique(featureGroup) +## featureGroup <- featureGroup[!is.na(featureGroup)] +## if (!length(featureGroup)) +## stop("No feature groups present. Please run 'groupFeatures' first") +## if (!all(featureGroup %in% featureGroups(x))) +## stop("Not all feature groups defined with parameter 'featureGroup' ", +## "found in 'featureGroups(x)'") +## if (length(subset) < length(fileNames(x))) +## x <- filterFile(x, subset, keepFeatures = TRUE) +## fvals <- featureValues(x, method = "maxint", intensity = value, +## value = value, filled = filled) +## res <- lapply(featureGroup, FUN, x = x, fvals = fvals, ...) +## fids <- split(rownames(featureDefinitions(x)), featureGroups(x)) +## MSnbase::MSpectra(res, elementMetadata = DataFrame( +## feature_group = featureGroup, +## feature_id = CharacterList(fids[featureGroup], +## compress = FALSE))) +## } + +## #' @rdname featureGroupSpectra +## #' +## #' @importClassesFrom MSnbase Spectrum Spectrum1 Spectrum2 MSpectra +## #' +## #' @importMethodsFrom MSnbase polarity +## #' +## #' @export +## featureGroupPseudoSpectrum <- function(featureGroup = character(), x, +## fvals = featureValues(x), +## n = ncol(fvals), +## intensityFun = median, ...) { +## if (n < 1 || n > ncol(fvals)) +## stop("'n' has to be an integer between 1 and ", ncol(fvals)) +## ft_idx <- which(featureGroups(x) == featureGroup) +## ft_fvals <- fvals[ft_idx, , drop = FALSE] +## ordr <- order(colSums(ft_fvals, na.rm = TRUE), decreasing = TRUE) +## ft_fvals <- ft_fvals[, ordr, drop = FALSE][, 1:n, drop = FALSE] +## ft_fdef <- extractROWS(featureDefinitions(x), ft_idx) +## if (any(colnames(ft_fdef) == "ms_level") && all(ft_fdef$ms_level == 1)) +## cls <- "Spectrum1" +## else cls <- "Spectrum2" +## sp <- new(cls) +## sp@rt <- median(ft_fdef$rtmed) +## sp@mz <- ft_fdef$mzmed +## sp@intensity <- apply(ft_fvals, MARGIN = 1, FUN = intensityFun, na.rm = TRUE) +## sp@peaksCount <- length(ft_idx) +## sp@centroided <- TRUE +## sp@polarity <- polarity(x)[1] +## sp +## } + +## #' @rdname featureGroupSpectra +## #' +## #' @importFrom S4Vectors extractROWS +## #' +## #' @export +## featureGroupFullScan <- function(featureGroup = character(), x, +## fvals = featureValues(x), ...) { +## ft_idx <- which(featureGroups(x) == featureGroup) +## ft_fvals <- fvals[ft_idx, , drop = FALSE] +## samp_idx <- which.max(colSums(ft_fvals, na.rm = TRUE)) +## ft_fdef <- extractROWS(featureDefinitions(x), ft_idx) +## if (hasAdjustedRtime(x)) +## x <- applyAdjustedRtime(x) +## x <- filterFile(as(x, "OnDiskMSnExp"), samp_idx) +## rtmed <- median(ft_fdef$rtmed) +## sp <- x[[which.min(abs(rtime(x) - rtmed))]] +## sp@fromFile <- samp_idx +## sp +## } + +#' @title Compounding/feature grouping based on similarity of extracted ion chromatograms +#' +#' @aliases EicSimilarityParam-class +#' +#' @name groupFeatures-eic-similarity +#' +#' @description +#' +#' Features from the same originating compound are expected to share their +#' elution pattern (i.e. chromatographic peak shape) with it. +#' Thus, this methods allows to group features based on similarity of their +#' extracted ion chromatograms (EICs). The similarity calculation is performed +#' separately for each sample with the similarity score being aggregated across +#' samples for the final generation of the similarity matrix on which the +#' grouping (considering parameter `threshold`) will be performed. +#' +#' The [compareChromatograms()] function is used for similarity calculation +#' which by default calculates the Pearson's correlation coefficient. The +#' settings for `compareChromatograms` can be specified with parameters +#' `ALIGNFUN`, `ALIGNFUNARGS`, `FUN` and `FUNARGS`. `ALIGNFUN` defaults to +#' [alignRt()] and is the function used to *align* the chromatograms before +#' comparison. `ALIGNFUNARGS` allows to specify additional arguments for the +#' `ALIGNFUN` function. It defaults to +#' `ALIGNFUNARGS = list(tolerance = 0, method = "closest")` which ensures that +#' data points from the same spectrum (scan, i.e. with the same retention time) +#' are compared between the EICs from the same sample. Parameter `FUN` defines +#' the function to calculate the similarity score and defaults to `FUN = cor` +#' and `FUNARGS` allows to pass additional arguments to this function (defaults +#' to `FUNARGS = list(use = "pairwise.complete.obs")`. See also +#' [compareChromatograms()] for more information. +#' +#' The grouping of features based on the EIC similarity matrix is performed +#' with the function specified with parameter `groupFun` which defaults to +#' `groupFun = groupSimilarityMatrix` which groups all rows (features) in the +#' similarity matrix with a similarity score larger than `threshold` into the +#' same cluster. This creates clusters of features in which **all** features +#' have a similarity score `>= threshold` with **any** other feature in that +#' cluster. See [groupSimilarityMatrix()] for details. Additional parameters to +#' that function can be passed with the `...` argument. +#' +#' This feature grouping should be called **after** an initial feature +#' grouping by retention time (see [SimilarRtimeParam()]). The feature groups +#' defined in columns `"feature_group"` of `featureDefinitions(object)` (for +#' features matching `msLevel`) will be used and refined by this method. +#' Features with a value of `NA` in `featureDefinitions(object)$feature_group` +#' will be skipped/not considered for feature grouping. +#' +#' @note +#' +#' While being possible to be performed on the full data set without prior +#' feature grouping, this is not suggested for the following reasons: I) the +#' selection of the top `n` samples with the highest signal for the +#' *feature group* will be biased by very abundant compounds as this is +#' performed on the full data set (i.e. the samples with the highest overall +#' intensities are used for correlation of all features) and II) it is +#' computationally much more expensive because a pairwise correlation between +#' all features has to be performed. +#' +#' It is also suggested to perform the correlation on a subset of samples +#' per feature with the highest intensities of the peaks (for that feature) +#' although it would also be possible to run the correlation on all samples by +#' setting `n` equal to the total number of samples in the data set. EIC +#' correlation should however be performed ideally on samples in which the +#' original compound is highly abundant to avoid correlation of missing values +#' or noisy peak shapes as much as possible. +#' +#' By default also the signal which is outside identified chromatographic peaks +#' is excluded from the correlation. +#' +#' @param ALIGNFUN `function` defining the function to be used to *align* +#' chromatograms prior similarity calculation. Defaults to +#' `ALIGNFUN = alignRt`. See [alignRt()] and [compareChromatograms()] for +#' more information. +#' +#' @param ALIGNFUNARGS **named** `list` with arguments for `ALIGNFUN`. +#' Defaults to `ALIGNFUNARGS = list(tolerance = 0, method = "closest")`. +#' +#' @param FUN `function` defining the function to be used to calculate a +#' similarity between (aligned) chromatograms. Defaults to `FUN = cor`. +#' See [cor()] and [compareChromatograms()] for more information. +#' +#' @param FUNARGS **named** `list` with arguments for `FUN`. Defaults to +#' `FUN = list(use = "pairwise.complete.obs")`. +#' +#' @param groupFun `function` defining the function to be used to group rows +#' based on a pairwise similarity matrix. Defaults to +#' [groupSimilarityMatrix()]. +#' +#' @param msLevel `integer(1)` defining the MS level on which the features +#' should be grouped. +#' +#' @param n `numeric(1)` defining the total number of samples per feature group +#' on which this similarity calculation should be performed. This value is +#' rounded up to the next larger integer value. +#' +#' @param object [XCMSnExp()] object containing also correspondence results. +#' +#' @param onlyPeak `logical(1)` whether the correlation should be performed only +#' on the signals within the identified chromatographic peaks +#' (`onlyPeak = TRUE`, default) or all the signal from the extracted ion +#' chromatogram. +#' +#' @param param `EicSimilarityParam` object with the settings for the method. +#' +#' @param threshold `numeric(1)` with the minimal required similarity score to +#' group featues. This is passed to the `groupFun` function. +#' +#' @param value `character(1)` defining whether samples should be grouped based +#' on the sum of the maximal peak intensity (`value = "maxo"`, the default) +#' or the integrated peak area (`value = "into"`) for a feature. +#' +#' @param ... for `EicSimilarityParam`: additional arguments to be passed to +#' `groupFun` and `featureChromatograms` (such as `expandRt` to expand the +#' retention time range of each feature). +#' +#' @return input `XCMSnExp` with feature groups added (i.e. in column +#' `"feature_group"` of its `featureDefinitions` data frame. +#' +#' @family feature grouping methods +#' +#' @seealso feature-grouping for a general overview. +#' +#' @rdname groupFeatures-eic-similarity +#' +#' @exportClass EicSimilarityParam +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @examples +#' +#' library(MsFeatures) +#' ## Load a test data set with detected peaks +#' data(faahko_sub) +#' ## Update the path to the files for the local system +#' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +#' +#' ## Disable parallel processing for this example +#' register(SerialParam()) +#' +#' ## Group chromatographic peaks across samples +#' xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) +#' +#' ## Performing a feature grouping based on EIC similarities on a single +#' ## sample +#' xodg_grp <- groupFeatures(xodg, param = EicSimilarityParam(n = 1)) +#' +#' table(featureDefinitions(xodg_grp)$feature_group) +#' +#' ## Usually it is better to perform this correlation on pre-grouped features +#' ## e.g. based on similar retention time. +#' xodg_grp <- groupFeatures(xodg, param = SimilarRtimeParam(diffRt = 4)) +#' xodg_grp <- groupFeatures(xodg_grp, param = EicSimilarityParam(n = 1)) +#' +#' table(featureDefinitions(xodg_grp)$feature_group) +NULL + +setClass( + "EicSimilarityParam", + slots = c(threshold = "numeric", + n = "numeric", + onlyPeak = "logical", + value = "character", + groupFun = "function", + ALIGNFUN = "function", + FUN = "function", + ALIGNFUNARGS = "list", + FUNARGS = "list", + dots = "list"), + contains = "Param", + prototype = prototype( + threshold = 0.9, + n = 1, + onlyPeak = TRUE, + value = "maxo", + groupFun = groupSimilarityMatrix, + ALIGNFUN = alignRt, + ALIGNFUNARGS = list(tolerance = 0, method = "closest"), + FUN = cor, + FUNARGS = list(use = "pairwise.complete.obs"), + dots = list() + ), + validity = function(object) { + msg <- NULL + if (length(object@threshold) != 1 || object@threshold < 0) + msg <- "'threshold' has to be a positive numeric of length 1" + if (length(object@n) != 1 || object@n < 0) + msg <- c(msg, "'n' has to be a positive numeric of length 1") + if (length(object@onlyPeak) != 1) + msg <- c(msg, "'onlyPeak' has to a logical of length 1") + if (length(object@value) != 1 && !(object@value %in% + c("maxo", "into"))) + msg <- c(msg, "'value' has to be either \"maxo\" or \"into\"") + msg + }) + +#' @rdname groupFeatures-eic-similarity +#' +#' @export +EicSimilarityParam <- function(threshold = 0.9, n = 1, onlyPeak = TRUE, + value = c("maxo", "into"), + groupFun = groupSimilarityMatrix, + ALIGNFUN = alignRt, + ALIGNFUNARGS = list(tolerance = 0, + method = "closest"), + FUN = cor, + FUNARGS = list(use = "pairwise.complete.obs"), + ...) { + value <- match.arg(value) + groupFun <- match.fun(groupFun) + ALIGNFUN <- match.fun(ALIGNFUN) + FUN <- match.fun(FUN) + new("EicSimilarityParam", threshold = threshold, n = ceiling(n), + onlyPeak = onlyPeak, value = value, groupFun = groupFun, + ALIGNFUN = ALIGNFUN, ALIGNFUNARGS = ALIGNFUNARGS, FUN = FUN, + FUNARGS = FUNARGS, dots = list(...)) +} + +#' @rdname groupFeatures-eic-similarity +setMethod( + "groupFeatures", + signature(object = "XCMSnExp", param = "EicSimilarityParam"), + function(object, param, msLevel = 1L) { + if (!requireNamespace("progress", quietly = TRUE)) + stop("Package 'progress' is required. Please install with ", + "'BiocManager::install(\"progress\")'") + if (!hasFeatures(object)) + stop("No feature definitions present. Please run ", + "first 'groupChromPeaks'") + if (length(msLevel) > 1) + stop("Currently only grouping of features from a single MS level", + " is supported.") + n <- ceiling(param@n) + nc <- length(fileNames(object)) + if (n > nc) + stop("'n' should be smaller than or equal to the number of ", + "samples (", nc, ")") + if (any(colnames(featureDefinitions(object)) == "ms_level")) + is_msLevel <- featureDefinitions(object)$ms_level == msLevel + else is_msLevel <- rep(TRUE, nrow(featureDefinitions(object))) + if (any(colnames(featureDefinitions(object)) == "feature_group")) { + f <- featureDefinitions(object)$feature_group + f_new <- as.character(f) + } else { + f <- rep("FG", nrow(featureDefinitions(object))) + f_new <- rep(NA_character_, length(f)) + } + f[!is_msLevel] <- NA + if (is.factor(f)) { + f <- droplevels(f) + fgroups <- levels(f) + } else { + fgroups <- unique(f) + f <- factor(f, levels = fgroups) + } + fvals <- featureValues(object, method = "maxint", value = param@value) + ffun <- function(z, na.rm = TRUE) + quantile(z, probs = 0.75, na.rm = na.rm) + pb <- progress::progress_bar$new(format = paste0("[:bar] :current/:", + "total (:percent) in ", + ":elapsed"), + total = length(fgroups), + clear = FALSE, force = TRUE) + pb$tick(0) + for (fg in fgroups) { + idx <- which(f == fg) + idxl <- length(idx) + if (idxl > 1) { + vals <- apply(fvals[idx, ], MARGIN = 2, sum, na.rm = TRUE) + sample_idx <- order(vals, decreasing = TRUE)[seq_len(n)] + obj_sub <- .filter_file_XCMSnExp(object, sample_idx, + keepFeatures = TRUE) + ## Can happen that some of the features are not present in the + ## subsetted object. Will put them into their own individual + ## groups later. + idx_miss <- which(!rownames(fvals)[idx] %in% + rownames(featureDefinitions(obj_sub))) + if (length(idx_miss)) { + tmp <- idx[idx_miss] + idx <- idx[-idx_miss] + idx_miss <- tmp + } + if (length(idx) > 1) { + eics <- do.call( + featureChromatograms, + args = c(list(obj_sub, features = rownames(fvals)[idx], + filled = TRUE), param@dots)) + if (param@onlyPeak) + eics <- removeIntensity(eics, + which = "outside_chromPeak") + res <- do.call( + .group_eic_similarity, + args = c(list(as(eics, "MChromatograms"), + aggregationFun = ffun, + threshold = param@threshold, + ALIGNFUN = param@ALIGNFUN, + ALIGNFUNARGS = param@ALIGNFUNARGS, + FUN = param@FUN, + FUNARGS = param@FUNARGS, + groupFun = param@groupFun), param@dots)) + } else res <- factor(1) + f_new[idx] <- paste0(fg, ".", MsFeatures:::.format_id(res)) + if (length(idx_miss)) + f_new[idx_miss] <- paste0( + fg, ".", MsFeatures:::.format_id( + seq((length(levels(res)) + 1), + length.out = length( + idx_miss)))) + } else + f_new[idx] <- paste0(fg, ".001") + pb$tick(1) + } + featureDefinitions(object)$feature_group <- f_new + xph <- new("XProcessHistory", param = param, date = date(), + type = .PROCSTEP.FEATURE.GROUPING, + fileIndex = seq_along(fileNames(object)), + msLevel = as.integer(msLevel)) + object@.processHistory[[(length(object@.processHistory) + 1)]] <- xph + validObject(object) + object + }) + +#' @title Group EICs based on their correlation +#' +#' @description +#' +#' `groupEicCorrelation` groups (extracted ion) chromatograms (EICs) based on +#' their similarity with each other. If this correlation is `>=` than the +#' provided `threshold` they are grouped. +#' +#' If `x` is a [MChromatograms()] object with more than one column (sample), +#' pairwise similarity is calculated between EICs first for each column +#' (sample) of `x` separately and subsequently aggregated across samples using +#' `aggregationFun`. If `x` is a `MChromatograms` with 4 rows (EICs) and 3 +#' columns (samples), pairwise correlations are first calculated between all +#' 4 EICs in each of the 3 columns resulting in 3 correlation matrices (of +#' dimension 4x4). These correlation matrices are combined into a single matrix +#' by combining the 3 correlation values per comparison with +#' `aggregationFun`. By default the mean of the correlation value between e.g. +#' EIC 1 and EIC 2 in each of the 3 columns is used as the final correlation +#' value. Similar to the one-column case EICs are grouped if their (aggregated) +#' correlation coefficient is larger than `threshold`. +#' +#' @param x [MChromatograms()] object. +#' +#' @param aggregationFun `function` to combine the correlation values between +#' pairs of EICs across samples (columns). See description for details. +#' +#' @inheritParams groupFeatures-eic-similarity +#' +#' @return `factor` same length as `nrow(x)` (if `x` is a `MChromatograms` +#' object) or `length(x)` (if `x` is a `list`) with the group each EIC +#' is assigned to. +#' +#' @family grouping operations +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +#' +#' @examples +#' +#' library(MSnbase) +#' set.seed(123) +#' chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +#' chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), +#' intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) +#' chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), +#' intensity = c(53, 80, 130, 15, 5, 3, 2)) +#' chrs <- MChromatograms(list(chr1, chr2, chr3)) +#' +#' groupEicCorrelation(chrs) +#' +#' ## With a MChromatograms with two columns, use the maximal correlation +#' ## coefficient found in each of the columns +#' chrs <- MChromatograms(list(chr1, chr2, chr3, chr1, chr2, chr3), ncol = 2) +#' groupEicCorrelation(chrs, aggregationFun = max) +.group_eic_similarity <- function(x, aggregationFun = mean, + threshold = 0.8, ALIGNFUN = alignRt, + ALIGNFUNARGS = list(tolerance = 0), FUN = cor, + FUNARGS = list(use = "pairwise.complete.obs"), + groupFun = groupSimilarityMatrix, + ...) { + nr <- nrow(x) + nc <- ncol(x) + res <- array(NA_real_, dim = c(nr, nr, nc)) + for (i in seq_len(nc)) + res[, , i] <- compareChromatograms(x[, i], + ALIGNFUN = ALIGNFUN, + ALIGNFUNARGS = ALIGNFUNARGS, + FUN = FUN, FUNARGS = FUNARGS) + suppressWarnings( + res <- apply(res, c(1, 2), aggregationFun, na.rm = TRUE) + ) + ## Ensure diagonal is always TRUE to not drop any features! + res[cbind(1:nr, 1:nr)] <- 1 + as.factor(do.call(groupFun, + args = c(list(res, threshold = threshold), + list(...)))) +} diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index 421100289..7dbfd055a 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -312,27 +312,27 @@ setMethod("findPeaks.matchedFilter_orig", "xcmsRaw", #' @title Peak detection in the chromatographic time domain #' #' @aliases findPeaks.matchedFilter -#' +#' #' @description Find peaks in the chromatographic time domain of the #' profile matrix. For more details see #' \code{\link{do_findChromPeaks_matchedFilter}}. -#' +#' #' @param object The \code{\linkS4class{xcmsRaw}} object on which peak detection #' should be performed. -#' +#' #' @inheritParams findChromPeaks-matchedFilter -#' +#' #' @param step numeric(1) specifying the width of the bins/slices in m/z #' dimension. -#' +#' #' @param sleep (DEPRECATED). The use of this parameter is highly discouraged, #' as it could cause problems in parallel processing mode. -#' +#' #' @param scanrange Numeric vector defining the range of scans to which the #' original \code{object} should be sub-setted before peak detection. -#' +#' #' @author Colin A. Smith -#' +#' #' @return A matrix, each row representing an intentified chromatographic peak, #' with columns: #' \describe{ @@ -350,14 +350,14 @@ setMethod("findPeaks.matchedFilter_orig", "xcmsRaw", #' \item{i}{Rank of peak in merged EIC (\code{<= max}).} #' \item{sn}{Signal to noise ratio of the peak.} #' } -#' +#' #' @references #' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and #' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite #' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" #' \emph{Anal. Chem.} 2006, 78:779-787. #' @family Old peak detection methods -#' +#' #' @seealso \code{\link{matchedFilter}} for the new user interface. #' \code{\linkS4class{xcmsRaw}}, #' \code{\link{do_findChromPeaks_matchedFilter}} for the core function @@ -431,7 +431,7 @@ setMethod("findPeaks.matchedFilter", "xcmsRaw", distance <- floor(0.075 / bin_size) } ## o baseValue - if (length(profp$baseleve) > 0) { + if (length(profp$baselevel) > 0) { if (!is.numeric(profp$baselevel)) stop("Profile parameter 'baselevel' has to be numeric!") baseValue <- profp$baselevel[1] @@ -569,10 +569,10 @@ setMethod("findPeaks.centWaveWithPredictedIsotopeROIs", "xcmsRaw", firstBaselineCheck = firstBaselineCheck, roiScales = roiScales, snthreshIsoROIs = snthreshIsoROIs, - maxCharge = maxcharge, - maxIso = maxiso, - mzIntervalExtension = mzIntervalExtension - ) + maxCharge = maxcharge, + maxIso = maxiso, + mzIntervalExtension = mzIntervalExtension + ) invisible(new("xcmsPeaks", res)) }) @@ -585,8 +585,8 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", xcmsPeaks, snthresh = 6.25, maxcharge = 3, maxiso = 5, mzIntervalExtension = TRUE) { if (!isCentroided(object)) - warning("It looks like this file is in profile mode. centWave", - " can process only centroid mode data !\n") + warning("It looks like this file is in profile mode. ", + "centWave works best for centroided data") ## Sub-set the xcmsRaw based on scanrange if (length(scanrange) < 2) { @@ -601,28 +601,20 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", } object <- object[scanrange[1]:scanrange[2]] if(class(xcmsPeaks) != "xcmsPeaks") - stop("Pparameter >xcmsPeaks< is not of class 'xcmsPeaks'!\n") + stop("Parameter 'xcmsPeaks' is not of class 'xcmsPeaks'") vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_findChromPeaks_addPredIsoROIs(mz = object@env$mz, - int = object@env$intensity, - scantime = object@scantime, - valsPerSpect = vps, - ppm = ppm, - peakwidth = peakwidth, - snthresh = snthresh, - prefilter = prefilter, - mzCenterFun = mzCenterFun, - integrate = integrate, - mzdiff = mzdiff, - fitgauss = fitgauss, - noise = noise, - verboseColumns = verbose.columns, - peaks. = xcmsPeaks@.Data, - maxCharge = maxcharge, - maxIso = maxiso, - mzIntervalExtension = mzIntervalExtension - ) + res <- do_findChromPeaks_addPredIsoROIs( + mz = object@env$mz, int = object@env$intensity, + scantime = object@scantime, valsPerSpect = vps, + ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, prefilter = prefilter, + mzCenterFun = mzCenterFun, integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, + verboseColumns = verbose.columns, peaks. = xcmsPeaks@.Data, + maxCharge = maxcharge, maxIso = maxiso, + mzIntervalExtension = mzIntervalExtension + ) invisible(new("xcmsPeaks", res)) }) @@ -630,7 +622,7 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", ############################################################ ## findPeaks.MSW #' @title Peak detection for single-spectrum non-chromatography MS data -#' +#' #' @aliases findPeaks.MSW #' #' @description This method performs peak detection in mass spectrometry @@ -642,12 +634,12 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", #' \code{\link{tuneInPeakInfo}} functions. #' #' @inheritParams findPeaks-MSW -#' +#' #' @inheritParams findChromPeaks-centWave -#' +#' #' @param object The \code{\linkS4class{xcmsRaw}} object on which peak #' detection should be performed. -#' +#' #' @param verbose.columns Logical whether additional peak meta data columns #' should be returned. #' @@ -666,7 +658,7 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", #' \item{maxf}{Maximum MSW-filter response of the peak.} #' \item{sn}{Signal to noise ratio.} #' } -#' +#' #' @seealso \code{\link{MSW}} for the new user interface, #' \code{\link{do_findPeaks_MSW}} for the downstream analysis #' function or \code{\link{peakDetectionCWT}} from the @@ -1119,8 +1111,8 @@ setMethod("rawEIC", "xcmsRaw", function(object, if (length(rtrange) >= 2) { rtrange <- range(rtrange) scanidx <- (object@scantime >= rtrange[1]) & (object@scantime <= rtrange[2]) - scanrange <- c(match(TRUE, scanidx), length(scanidx) - - match(TRUE, rev(scanidx))) + scanrange <- c(match(TRUE, scanidx), + length(scanidx) - match(TRUE, rev(scanidx)) + 1) } else if (length(scanrange) < 2) scanrange <- c(1, length(object@scantime)) else @@ -1748,17 +1740,17 @@ setMethod("plotSurf", "xcmsRaw", function(object, log = FALSE, colorlut <- terrain.colors(256) col <- colorlut[y/aspect[3]*255+1] - rgl.clear("shapes") - rgl.clear("bbox") - rgl.surface(x, z, y, color = col, shininess = 128) - rgl.points(0, 0, 0, alpha = 0) + clear3d("shapes") + clear3d("bbox") + surface3d(x, z, y, color = col, shininess = 128) + points3d(0, 0, 0, alpha = 0) mztics <- pretty(sel$mzrange, n = 5*aspect[1]) rttics <- pretty(sel$rtrange, n = 5*aspect[2]) inttics <- pretty(c(0,ylim), n = 10*aspect[3]) inttics <- inttics[inttics > 0] - rgl.bbox(xat = (mztics - sel$mzrange[1])/diff(sel$mzrange)*aspect[1], + bbox3d(xat = (mztics - sel$mzrange[1])/diff(sel$mzrange)*aspect[1], xlab = as.character(mztics), yat = inttics/ylim[2]*aspect[3], ylab = as.character(inttics), @@ -1790,7 +1782,7 @@ setMethod("getMsnScan", "xcmsRaw", function(object, scan, mzrange = numeric()) { ############################################################ ## AutoLockMass setMethod("AutoLockMass", "xcmsRaw", function(object) { - if(length(grep("xml|mzData|mzXML|mzML", object@filepath, ignore.case=TRUE)) >= 1){ + if(length(grep("xml|mzXML|mzML", object@filepath, ignore.case=TRUE)) >= 1){ tempFreq<-diff(which(diff(object@scantime) == 0))-1 idx <- which(tempFreq != floor(mean(tempFreq))) ## only needed for newer lockmass signal if(is.nan(mean(tempFreq)) ){ @@ -1887,7 +1879,7 @@ setMethod("makeacqNum", "xcmsRaw", function(object, freq, start=1) { ############################################################ ## stitch setMethod("stitch", "xcmsRaw", function(object, lockMass) { - if(length(grep("xml|mzData", object@filepath, ignore.case=TRUE)) >= 1){ + if(length(grep("xml", object@filepath, ignore.case=TRUE)) >= 1){ type<-stitch.xml } else if(length(grep("cdf", object@filepath, ignore.case=TRUE)) >= 1){ ## lets check to see if lockMass is one scan or two @@ -1932,7 +1924,7 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { ## Remove the last lock mass if it is too close by the end if ((lockMass[length(lockMass)] + 2) > length(ob@scanindex)) lockMass <- lockMass[1:(length(lockMass) - 1)] - + ## If the number of lockMass values is not even splitting them into a ## two-column matrix is not OK (causes also the first lockMass spectrum to ## be overwritten twice. That's to get rid of the warning in issue #173. @@ -2136,7 +2128,7 @@ setMethod("stitch.netCDF.new", "xcmsRaw", function(object, lockMass) { ## [ ## Subset by scan. #' @title Subset an xcmsRaw object by scans -#' +#' #' @aliases subset-xcmsRaw #' #' @description Subset an \code{\linkS4class{xcmsRaw}} object by scans. The @@ -2149,26 +2141,26 @@ setMethod("stitch.netCDF.new", "xcmsRaw", function(object, lockMass) { #' vector are supported. If not ordered, argument \code{i} is sorted #' automatically. Indices which are larger than the total number of scans #' are discarded. -#' +#' #' @param x The \code{\linkS4class{xcmsRaw}} object that should be sub-setted. -#' +#' #' @param i Integer or logical vector specifying the scans/spectra to which #' \code{x} should be sub-setted. -#' +#' #' @param j Not supported. -#' +#' #' @param drop Not supported. -#' +#' #' @return The sub-setted \code{\linkS4class{xcmsRaw}} object. -#' +#' #' @author Johannes Rainer -#' +#' #' @seealso \code{\link{split.xcmsRaw}} -#' +#' #' @examples #' ## Load a test file #' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -#' xraw <- xcmsRaw(file) +#' xraw <- xcmsRaw(file, profstep = 0) #' ## The number of scans/spectra: #' length(xraw@scantime) #' @@ -2337,7 +2329,7 @@ setMethod("[", signature(x = "xcmsRaw", #' all.equal(profmat, profmat_2) #' #' @rdname profMat-xcmsSet -#' +#' #' @name profMat-xcmsSet setMethod("profMat", signature(object = "xcmsRaw"), function(object, method, step, diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index c4d806ddd..91b5a3c51 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -5,16 +5,19 @@ ## show setMethod("show", "xcmsSet", function(object) { cat("An \"xcmsSet\" object with", nrow(object@phenoData), "samples\n\n") - cat("Time range: ", paste(round(range(object@peaks[,"rt"]), 1), - collapse = "-"), - " seconds (", paste(round(range(object@peaks[,"rt"])/60, 1), - collapse = "-"), - " minutes)\n", sep = "") - cat("Mass range:", paste(round(range(object@peaks[,"mz"], na.rm = TRUE), 4), - collapse = "-"), "m/z\n") - cat("Peaks:", nrow(object@peaks), "(about", - round(nrow(object@peaks)/nrow(object@phenoData)), "per sample)\n") - cat("Peak Groups:", nrow(object@groups), "\n") + if (nrow(object@peaks)) { + cat("Time range: ", paste(round(range(object@peaks[,"rt"]), 1), + collapse = "-"), + " seconds (", paste(round(range(object@peaks[,"rt"])/60, 1), + collapse = "-"), + " minutes)\n", sep = "") + cat("Mass range:", paste(round(range(object@peaks[,"mz"], na.rm = TRUE), 4), + collapse = "-"), "m/z\n") + cat("Peaks:", nrow(object@peaks), "(about", + round(nrow(object@peaks)/nrow(object@phenoData)), "per sample)\n") + cat("Peak Groups:", nrow(object@groups), "\n") + } else + cat("Peaks: 0\n") cat("Sample classes:", paste(levels(sampclass(object)), collapse = ", "), "\n\n") ## Processing info. @@ -56,18 +59,18 @@ setMethod("show", "xcmsSet", function(object) { #' @description This method updates an \emph{old} \code{\linkS4class{xcmsSet}} #' object to the latest definition. -#' +#' #' @title Update an \code{\linkS4class{xcmsSet}} object -#' +#' #' @param object The \code{\linkS4class{xcmsSet}} object to update. -#' +#' #' @param ... Optional additional arguments. Currently ignored. -#' +#' #' @param verbose Currently ignored. -#' +#' #' @return An updated \code{\linkS4class{xcmsSet}} containing all data from #' the input object. -#' +#' #' @author Johannes Rainer setMethod("updateObject", "xcmsSet", function(object, ..., verbose = FALSE) { ## Create a new empty xcmsSet and start filling it with the slot @@ -373,9 +376,8 @@ setMethod("group.density", "xcmsSet", function(object, bw = 30, minfrac = 0.5, binSize = mzwid, maxFeatures = max, sleep = sleep) - - groups(object) <- res$featureDefinitions - groupidx(object) <- res$peakIndex + groups(object) <- as.matrix(res[, -match("peakidx", colnames(res))]) + groupidx(object) <- res$peakidx object }) @@ -501,7 +503,7 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = family = c("gaussian", "symmetric"), plottype = c("none", "deviation", "mdevden"), col = NULL, ty = NULL) { - + peakmat <- peaks(object) groupmat <- groups(object) if (length(groupmat) == 0) @@ -550,10 +552,10 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = if (plottype %in% c("deviation", "mdevden")) { ## Need also the 'rt' matrix: - rt <- .getPeakGroupsRtMatrix(peakmat, object@groupidx, n, + rt <- .getPeakGroupsRtMatrix(peakmat, object@groupidx, seq_len(n), missing, extra) rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) - + ## define the colors and line types and returns a list of ## mypal, col and ty. Uses the original code if no colors are ## submitted. Supports manually selected colors (e.g. in hex) @@ -622,7 +624,7 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = family = c("gaussian", "symmetric"), plottype = c("none", "deviation", "mdevden"), col = NULL, ty = NULL) { - + peakmat <- peaks(object) groupmat <- groups(object) if (length(groupmat) == 0) @@ -645,7 +647,7 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = } object@rt <- list(raw = rtcor, corrected = rtcor) } - + nsamp <- rowSums(groupmat[,match("npeaks", colnames(groupmat))+unique(classlabel),drop=FALSE]) idx <- which(nsamp >= n-missing & groupmat[,"npeaks"] <= nsamp + extra) @@ -659,7 +661,7 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = rt <- groupval(object, "maxint", "rt")[idx,, drop=FALSE] ## And now order them by median retention time: issue #110 rt <- rt[order(rowMedians(rt, na.rm = TRUE)), , drop = FALSE] - + cat("Retention Time Correction Groups:", nrow(rt), "\n") rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) @@ -877,7 +879,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de ## Subset the object if scanrange not NULL if (!is.null(scanrange)) obj1 <- obj1[scanrange[1]:scanrange[2]] - + ## ## added t automatically find the correct scan range from the xcmsSet object ## if(length(obj1@scantime) != length(object@rt$raw[[center]])){ ## ## This is in case the xcmsSet was read using a scanrange, i.e. if @@ -889,7 +891,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de ## ##figure out the scan time range ## scantime.start <-object@rt$raw[[center]][1] ## scantime.end <-object@rt$raw[[center]][length(object@rt$raw[[center]])] - + ## scanrange.start <-which.min(abs(obj1@scantime - scantime.start)) ## scanrange.end <-which.min(abs(obj1@scantime - scantime.end)) ## scanrange<-c(scanrange.start, scanrange.end) @@ -898,11 +900,11 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de ## } else{ ## scanrange<-NULL ## } - + for (si in 1:length(idx)) { s <- idx[si] cat(samples[s], " ") - + ## ## Might be better to just get the profile matrix from the center object ## outside of the for loop and then modifying a internal variable within @@ -916,7 +918,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de scanrange = scanrange) } profStepPad(obj2) <- profStep ## generate profile matrix - + mzmin <- min(obj1@mzrange[1], obj2@mzrange[1]) mzmax <- max(obj1@mzrange[2], obj2@mzrange[2]) @@ -1043,7 +1045,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de ## Why are we rounding here, but NOT in the retcor.peakgroups? ## -> issue #122 ## The point is we're using the un-rounded adjusted rt for the rt, BUT - ## use the rounded values for the adjustment of the peak rts. + ## use the rounded values for the adjustment of the peak rts. rtdevsmo[[s]] <- round(rtcor[[s]]-object@rt$corrected[[s]],2) rm(obj2) @@ -1189,7 +1191,9 @@ setMethod("fillPeaks.chrom", "xcmsSet", function(object, nSlaves = 0, rtcor <- object@rt$corrected ## Remove groups that overlap with more "well-behaved" groups - numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE]) + sampclasscols <- seq(match("npeaks", colnames(groupmat))+1, + length.out=length(levels(sampclass(object)))) + numsamp <- rowSums(groupmat[ , sampclasscols, drop=FALSE]) uorder <- order(-numsamp, groupmat[,"npeaks"]) uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE], uorder) @@ -1558,7 +1562,9 @@ setMethod("diffreport", "xcmsSet", function(object, if (!is.numeric(w) || !is.numeric(h)) stop("'h' and 'w' have to be numeric") - ## require(multtest) || stop("Couldn't load multtest") + if (!requireNamespace("multtest", quietly = TRUE)) + stop("The use of 'diffreport' requires package 'multtest'. Please ", + "install with 'Biobase::install(\"multtest\")'") value <- match.arg(value) groupmat <- groups(object) @@ -1595,7 +1601,7 @@ setMethod("diffreport", "xcmsSet", function(object, warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", "-in missing peak values. Note however that this will also ", "insert intensities of 0 for peaks that can not be filled in.") - + mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) @@ -1611,7 +1617,7 @@ setMethod("diffreport", "xcmsSet", function(object, testclab <- c(rep(0,length(c1)),rep(1,length(c2))) if (min(length(c1), length(c2)) >= 2) { - tstat <- mt.teststat(testval, testclab, ...) + tstat <- multtest::mt.teststat(testval, testclab, ...) pvalue <- pval(testval, testclab, tstat) } else { message("Too few samples per class, skipping t-test.") @@ -1967,28 +1973,38 @@ setMethod("[", "xcmsSet", function(x, i, j, ..., drop = FALSE) { x <- updateObject(x) ## don't allow i, but allow j to be: numeric or logical. If ## it's a character vector <- has to fit to sampnames(x) - if(!missing(i)) + if (!missing(i)) stop("Subsetting to rows is not supported!") - if(missing(j)) + if (missing(j)) { j <- 1:length(sampnames(x)) - if(class(j)=="character"){ - ## check if these match to the sampnames. - matches <- match(j, sampnames(x)) - if(length(matches)!=length(j)) - stop("All provided sample names have to match the", - " sample names in the xcmsSet!") - j <- matches - } - if(class(j)=="logical"){ - if(length(j) != length(sampnames(x))) - stop("If j is a logical its length has to match", - " the number of samples in the xcmsSet!") - j <- which(j) - } - if(class(j)=="numeric") + } else { + if (is.character(j)) { + ## check if these match to the sampnames. + matches <- match(j, sampnames(x)) + if(length(matches)!=length(j)) + stop("All provided sample names have to match the", + " sample names in the xcmsSet!") + j <- matches + } + if (is.logical(j)) { + if (length(j) != length(sampnames(x))) + stop("If j is a logical its length has to match", + " the number of samples in the xcmsSet!") + j <- which(j) + } + if (!length(j)) { + tmp <- new("xcmsSet") + cn <- c("rtmin", "rtmax", "rt", "mzmin", "mzmax", "mz", + "into", "maxo", "sample") + pks <- matrix(ncol = length(cn), nrow = 0, dimnames = list(c(), cn)) + tmp@peaks <- pks + return(tmp) + } + } + if (class(j) == "numeric") j <- as.integer(j) - if(class(j)!="integer") - stop("j has to be a numeric vector specifying the", + if(!is.integer(j)) + stop("j has to be an integer vector specifying the", " index of the samples for which the data has to be extracted") ## check if j is within the range of 1:length(sampnames) if(any(!j %in% (1:length(sampnames(x))))) @@ -2164,7 +2180,7 @@ setMethod("specDist", signature(object="xcmsSet"), ############################################################ ## showError #' @title Extract processing errors -#' +#' #' @aliases showError #' #' @description If peak detection is performed with \code{\link{findPeaks}} @@ -2174,15 +2190,15 @@ setMethod("specDist", signature(object="xcmsSet"), #' accessed with the \code{showError} method. #' #' @param object An \code{\linkS4class{xcmsSet}} object. -#' +#' #' @param message. Logical indicating whether only the error message, or the #' error itself should be returned. -#' +#' #' @param ... Additional arguments. #' #' @return A list of error messages (if \code{message. = TRUE}) or errors or an #' empty list if no errors are present. -#' +#' #' @author Johannes Rainer setMethod("showError", signature(object = "xcmsSet"), function(object, message. = TRUE, ...) { diff --git a/R/models.R b/R/models.R index da8d5a09c..211942bbe 100644 --- a/R/models.R +++ b/R/models.R @@ -1,4 +1,4 @@ -SSgauss <- selfStart(~ h*exp(-(x-mu)^2/(2*sigma^2)), function(mCall, data, LHS) { +SSgauss <- selfStart(~ h*exp(-(x-mu)^2/(2*sigma^2)), function(mCall, data, LHS, ...) { xy <- sortedXyData(mCall[["x"]], LHS, data) diff --git a/R/mzClust.R b/R/mzClust.R index 571b93f8c..303d5e7a8 100644 --- a/R/mzClust.R +++ b/R/mzClust.R @@ -26,9 +26,10 @@ mzClustGeneric <- function(p,sampclass=NULL, bin <- pord[pos] pos <- pos+1 basepeak <- p[bin[1],1] - error_range <- c(basepeak, basepeak*error_window+basepeak+2*mzabs) - while(pos < numpeaks && p[pord[pos],1] <= error_range[2]) { - bin <- c(bin,pord[pos]) + error_range <- c(basepeak, + basepeak * error_window + basepeak + 2 * mzabs) + while(pos < numpeaks && p[pord[pos], 1] <= error_range[2]) { + bin <- c(bin, pord[pos]) pos <- pos + 1 } @@ -74,8 +75,8 @@ mzClustGeneric <- function(p,sampclass=NULL, lst <- list(stat=groupvec,members=grp_members) lst } - ppm_error <- mzppm/1000000 - error_window <- 2*ppm_error + ppm_error <- mzppm / 1000000 + error_window <- 2 * ppm_error ## numeric version of classlabel if(is.null(sampclass)) { diff --git a/R/netCDF.R b/R/netCDF.R deleted file mode 100644 index 45559c10d..000000000 --- a/R/netCDF.R +++ /dev/null @@ -1,170 +0,0 @@ -netCDFStrError <- function(ncerr) { - - buflen <- 255 - - .C("NetCDFStrError", - as.integer(ncerr), - as.integer(buflen), - out = paste(rep(" ", buflen), collapse = ""), - PACKAGE = "xcms")$out -} - -netCDFIsFile <- function(filename) { - - ncid <- netCDFOpen(filename) - if (!is.null(attr(ncid, "errortext"))) - return(FALSE) - netCDFClose(ncid) - - return(TRUE) -} - -netCDFOpen <- function(filename) { - - result <- .C("NetCDFOpen", - as.character(filename), - ncid = integer(1), - status = integer(1), - PACKAGE = "xcms") - - if (result$status) - return(structure(result$status, - errortext = netCDFStrError(result$status))) - - return(result$ncid) -} - -netCDFClose <- function(ncid) { - - result <- .C("NetCDFClose", - as.integer(ncid), - status = integer(1), - PACKAGE = "xcms") - - if (result$status) - return(structure(result$status, - errortext = netCDFStrError(result$status))) - - result$status -} - -netCDFVarID <- function(ncid, var) { - - result <- .C("NetCDFVarID", - as.integer(ncid), - as.character(var), - id = integer(1), - status = integer(1), - PACKAGE = "xcms") - - if (result$status) - return(structure(result$status, - errortext = netCDFStrError(result$status))) - - return(result$id) -} - -netCDFVarLen <- function(ncid, var) { - - if (is.character(var)) - var <- netCDFVarID(ncid, var) - - result <- .C("NetCDFVarLen", - as.integer(ncid), - as.integer(var), - len = integer(1), - status = integer(1), - PACKAGE = "xcms") - - if (result$status) - return(structure(result$status, - errortext = netCDFStrError(result$status))) - - return(result$len) -} - -netCDFVarDouble <- function(ncid, var) { - - if (is.character(var)) - var <- netCDFVarID(ncid, var) - - if (!is.null(attr(var, "errortext"))) - return(var) - - len <- netCDFVarLen(ncid, var) - if (!is.null(attr(len, "errortext"))) - return(len) - - .C("NetCDFVarDouble", - as.integer(ncid), - as.integer(var), - data = double(len), - status = integer(1), - PACKAGE = "xcms")$data -} - -netCDFVarInt <- function(ncid, var) { - - if (is.character(var)) - var <- netCDFVarID(ncid, var) - - if (!is.null(attr(var, "errortext"))) - return(var) - - len <- netCDFVarLen(ncid, var) - if (!is.null(attr(len, "errortext"))) - return(len) - - .C("NetCDFVarInt", - as.integer(ncid), - as.integer(var), - data = integer(len), - status = integer(1), - PACKAGE = "xcms")$data -} - -netCDFMSPoints <- function(ncid, scanIndex) { - - if (!is.integer(scanIndex)) scanIndex <- as.integer(scanIndex) - - var <- netCDFVarID(ncid, "mass_values") - if (!is.null(attr(var, "errortext"))) - return(var) - - len <- netCDFVarLen(ncid, var) - if (!is.null(attr(len, "errortext"))) - return(len) - - .C("NetCDFMSPoints", - as.integer(ncid), - as.integer(length(scanIndex)), - scanIndex, - as.integer(len), - massValues = double(len), - intensityValues = double(len), - status = integer(1), - PACKAGE = "xcms")[c("massValues", "intensityValues")] -} - -netCDFRawData <- function(ncid) { - - rt <- netCDFVarDouble(ncid, "scan_acquisition_time") - if (!is.null(attr(rt, "errortext"))) - stop("Could not read scan times") - - tic <- netCDFVarDouble(ncid, "total_intensity") - if (!is.null(attr(tic, "errortext"))) - stop("Could not read total ion current") - - scanindex <- netCDFVarInt(ncid, "scan_index") - if (!is.null(attr(scanindex, "errortext"))) - stop("Could not read scan indecies") - - pointValues <- netCDFMSPoints(ncid, scanindex) - if (!is.null(attr(pointValues, "errortext"))) - stop("Could not read mass/intensity values") - - return(list(rt = rt, tic = tic, scanindex = scanindex, - mz = pointValues$massValues, - intensity = pointValues$intensityValues)) -} diff --git a/R/plotQC.R b/R/plotQC.R index cd42ef7bf..08e7711cf 100644 --- a/R/plotQC.R +++ b/R/plotQC.R @@ -42,22 +42,44 @@ #' plotQC(xsg, what="rtdevsample") #' #' @author Michael Wenk, Michael Wenk + plotQC <- function(object, sampNames = NULL, sampColors = NULL, sampOrder=NULL, what=c("mzdevhist", - "rtdevhist", - "mzdevmass", - "mzdevtime", - "mzdevsample", - "rtdevsample")) { + "rtdevhist", + "mzdevmass", + "mzdevtime", + "mzdevsample", + "rtdevsample")) { + + if (inherits(object, "xcmsSet")) { + deviations <- list(mzs=groupval(object, value = "mz"), + rts=groupval(object, value = "rt")) + deviations$mzdev <- deviations$mzs - groups(object)[,"mzmed"] + deviations$rtdev <- deviations$rts - groups(object)[,"rtmed"] + + } else if (inherits(object, "XCMSnExp")) { + deviations <- list(mzs=featureValues(object, value = "mz"), + rts=featureValues(object, value = "rt")) + deviations$mzdev <- deviations$mzs - featureDefinitions(object)[,"mzmed"] + deviations$rtdev <- deviations$rts - featureDefinitions(object)[,"rtmed"] + } else { + stop("object not xcmsSet nor XCMSnExp") + } if (missing(sampNames) || is.null(sampNames)) { + if (class(object)=="xcmsSet") { sampNames <- sampnames(object) + } else if (class(object)=="XCMSnExp") { + sampNames <- sampleNames(object) + } else { + stop("object not xcmsSet nor XCMSnExp") + } } - n <- length(sampclass(object)) + n <- ncol(deviations$mzs) if (missing(sampColors) || is.null(sampColors)) { sampColors <- rainbow(n) } @@ -66,111 +88,72 @@ plotQC <- function(object, sampOrder <- 1:n } - deviations <- getdeviations(object, sampNames) - deviation_mzs <- deviations$mzdev - deviation_rts <- deviations$rtdev - mzs <- deviations$mzs - rts <- deviations$rts - ## Plot histograms of deviation if ("mzdevhist" %in% what) { - hist(deviation_mzs, breaks=100, - ylab = "Number of Peaks", - xlab = "m/z Deviation", - main = "m/z Deviation") + hist(deviations$mzdev, breaks=100, + ylab = "Number of Peaks", + xlab = "m/z Deviation", + main = "m/z Deviation") } if ("rtdevhist" %in% what) { - hist(deviation_rts, breaks=100, ylab = "Number of Peaks", - xlab = "Retention Time Deviation", - main = "Retention Time Deviation") + hist(deviations$rtdev, breaks=100, ylab = "Number of Peaks", + xlab = "Retention Time Deviation", + main = "Retention Time Deviation") } if ("mzdevmass" %in% what) { - ## Plot mass deviation depending on absolute mass - # Add extra space to right of plot area; change clipping to figure - # par(mar=c(6, 8, 8.1, 10), xpd=TRUE) - # par(oma = c(0,0,0,10)) - plot(x = as.vector(mzs), - y = deviation_mzs, - col=sampColors, pch = ".", type = "p", - main = "m/z Deviation vs. m/z", - xlab = "m/z", ylab = "m/z deviation") - - for(i in 1:n) { - data <- na.omit(data.frame(mzs = mzs[,i], deviation_mzs = deviation_mzs[,i])) - lo <- loess(formula = deviation_mzs ~ mzs, data = data) - currSampleValues <- mzs[!is.na(mzs[,i]),i] - currSampleValues <- currSampleValues[order(currSampleValues)] - lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 2) + ## Plot mass deviation depending on absolute mass + # Add extra space to right of plot area; change clipping to figure + # par(mar=c(6, 8, 8.1, 10), xpd=TRUE) + # par(oma = c(0,0,0,10)) + plot(x = as.vector(deviations$mzs), + y = deviations$mzdev, + col=sampColors, pch = ".", type = "p", + main = "m/z Deviation vs. m/z", + xlab = "m/z", ylab = "m/z deviation") + + for(i in 1:n) { + data <- na.omit(data.frame(mzs = deviations$mzs[,i], mzdev = deviations$mzdev[,i])) + lo <- loess(formula = mzdev ~ mzs, data = data) + currSampleValues <- deviations$mzs[!is.na(deviations$mzs[,i]),i] + currSampleValues <- currSampleValues[order(currSampleValues)] + lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 2) + } + legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples") } - legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples") -} - if ("mzdevtime" %in% what) { - ## Plot mass deviation depending on retention time - ## Add extra space to right of plot area; change clipping to figure - ##par(mar=c(5.1, 5.1, 8.1, 8.1), xpd=TRUE) - plot(x = as.vector(rts), y = deviation_mzs, pch = ".", type = "p", - col=sampColors, main = "m/z deviation vs. retention time", - xlab = "Retention Time", ylab = "m/z deviation") - - for(i in 1:n){ - data <- na.omit(data.frame(rts = rts[,i], deviation_mzs = deviation_mzs[,i])) - lo <- loess(formula = deviation_mzs ~ rts, data = data) - currSampleValues <- rts[!is.na(rts[,i]),i] - currSampleValues <- currSampleValues[order(currSampleValues)] - lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 1) + if ("mzdevtime" %in% what) { + ## Plot mass deviation depending on retention time + ## Add extra space to right of plot area; change clipping to figure + ##par(mar=c(5.1, 5.1, 8.1, 8.1), xpd=TRUE) + plot(x = as.vector(deviations$rts), y = deviations$mzdev, pch = ".", type = "p", + col=sampColors, main = "m/z deviation vs. retention time", + xlab = "Retention Time", ylab = "m/z deviation") + + for(i in 1:n){ + data <- na.omit(data.frame(rts = deviations$rts[,i], mzdev = deviations$mzdev[,i])) + lo <- loess(formula = mzdev ~ rts, data = data) + currSampleValues <- deviations$rts[!is.na(deviations$rts[,i]),i] + currSampleValues <- currSampleValues[order(currSampleValues)] + lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 1) + } + legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples") } - legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples") -} ## still to come: median deviations per sample, ## to detect corrupt samples if ("mzdevsample" %in% what) { - barplot(apply(deviation_mzs[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)), - col = sampColors, xlab = "", ylab = "m/z Deviation", - names.arg = sampNames, las = 2) + barplot(apply(deviations$mzdev[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)), + col = sampColors, xlab = "", ylab = "m/z Deviation", + names.arg = sampNames, las = 2) } if ("rtdevsample" %in% what) { - barplot(apply(deviation_rts[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)), - col = sampColors, xlab = "", ylab = "Retention Time Deviation", - names.arg = sampNames, las = 2) + barplot(apply(deviations$rtdev[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)), + col = sampColors, xlab = "", ylab = "Retention Time Deviation", + names.arg = sampNames, las = 2) } invisible(deviations) } - - -getdeviations <- function(object, sampNames = NULL) { - if (missing(sampNames) || is.null(sampNames)) { - sampNames <- sampnames(object) - } - n <- length(sampnames(object)) - p <- peaks(object) - - ## Get a matrix of all mz and another one of all RT in all samples - pidx <- groupval(object, value="index") - colnames(pidx) <- sampNames - mzs <- p[pidx, "mz"] - dim(mzs) <- c((length(mzs)/n),n) - - rts <- p[pidx, "rt"] - dim(rts) <- c((length(rts)/n),n) - - ## Calculate deviation between median mz (or RT) and each observed mz (or RT) - result <- list(mzs=mzs, - rts=rts, - mzdev=mzs - groups(object)[,"mzmed"], - rtdev=rts - groups(object)[,"rtmed"]) -} - - - -## plotQC(xsg, what="mzdevhist") -## plotQC(xsg, what="rtdevhist") -## plotQC(xsg, what="mzdevmass") -## plotQC(xsg, what="mzdevtime") -## plotQC(xsg, what="mzdevsample") -## plotQC(xsg, what="rtdevsample") diff --git a/R/xdata.R b/R/xdata.R new file mode 100644 index 000000000..ad6fb549e --- /dev/null +++ b/R/xdata.R @@ -0,0 +1,11 @@ +#' @title LC-MS preprocessing result test data +#' +#' @description +#' +#' The `xdata` variable represent the results from a `xcms`-based +#' pre-processing of an LC-MS untargeted metabolomics data set. The raw data +#' files are provided in the `faahKO` package. The pre-processing of this data +#' set is described in detail in the *xcms* vignette of the `xcms` package. +#' +#' @name xdata +NULL \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 000000000..6ec4e1f30 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +[![R-CMD-check-bioc](https://github.com/sneumann/xcms/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/sneumann/xcms/actions?query=workflow%3AR-CMD-check-bioc) +[![codecov.io](https://codecov.io/github/sneumann/xcms/coverage.svg?branch=master)](https://codecov.io/github/sneumann/xcms?branch=master) +[![Years in Bioconductor](http://www.bioconductor.org/shields/years-in-bioc/xcms.svg)](http://www.bioconductor.org/packages/release/bioc/html/xcms.html) +[![Bioconductor release build status](http://www.bioconductor.org/shields/build/release/bioc/xcms.svg)](http://www.bioconductor.org/packages/release/bioc/html/xcms.html) +[![Bioconductor devel build status](http://www.bioconductor.org/shields/build/devel/bioc/xcms.svg)](http://www.bioconductor.org/checkResults/devel/bioc-LATEST/xcms.html) + +# The `xcms` package (version >= 3) + + + + +Version >= 3 of the `xcms` package are updated and partially re-written versions +of the original `xcms` package. The version number *3* was selected to avoid +confusions with the `xcms2` (http://pubs.acs.org/doi/abs/10.1021/ac800795f) +software. While providing all of the original software's functionality, `xcms` +version >= 3 aims at: + +1) Better integration into the Bioconductor framework: + - Make use and extend classes defined in the `MSnbase` package. + - Implement class versioning (Biobase's `Versioned` class). + - Use `BiocParallel` for parallel processing. +2) Implementation of validation methods for all classes to ensure data + integrity. +3) Easier and faster access to raw spectra data. +4) Cleanup of the source code: + - Remove obsolete and redundant functionality (`getEIC`, `rawEIC` etc). + - Unify interfaces, i.e. implement a layer of base functions accessing all + analysis methods (which are implemented in C, C++ or R). +5) Using a more consistent naming scheme of methods that follows established + naming conventions (e.g. `correspondence` instead of `grouping`). +6) Update, improve and extend the documentation. +7) Establishing a layer of base R-functions that interface all analysis + methods. These should take M/Z, retention time (or scan index) and intensity + values as input along with optional arguments for the downstream functions + (implemented in C, C++ or R). The input arguments should be basic R objects + (numeric vectors) thus enabling easy integration of analysis methods in other + R packages. +8) The user interface's analysis methods should take the (raw) data object and a + parameter class, that is used for dispatching to the corresponding analysis + algorithm. + +Discussions and suggestions are welcome: +https://github.com/sneumann/xcms/issues + +For more information see the package [vignette](vignettes/xcms.Rmd). diff --git a/data/faahko_sub.RData b/data/faahko_sub.RData new file mode 100644 index 000000000..7bdbe9244 Binary files /dev/null and b/data/faahko_sub.RData differ diff --git a/data/xdata.RData b/data/xdata.RData new file mode 100644 index 000000000..4ba55713f Binary files /dev/null and b/data/xdata.RData differ diff --git a/inst/NEWS b/inst/NEWS index 091d144eb..5156a1585 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,6 +1,483 @@ -CHANGES IN VERSION 3.3.3 -------------------------- +Changes in version 3.21.3 +--------------------- +- Only fixes in the long running tests + +Changes in version 3.21.2 +--------------------- + +- Re-write the `reconstructChromPeakSpectra` for DIA data analysis to fix an + issue with chromatographic peaks in overlapping SWATH isolation windows and + generally to improve performance. + +Changes in version 3.21.1 +---------------------- + +- Fix error with `fillChromPeaks` on sparse data (many empty spectra) and peak + detection performed with `MatchedFilterParam` (issue #653). +- Update to newer function names in the `rgl` package (issue #654). + + +Changes in version 3.19.2 +---------------------- + +- Update/expand documentation for the `firstBaselineCheck` parameter of + centWave. + +Changes in version 3.19.1 +---------------------- + +- Update documentation to reference updates in MassSpecWavelet package. + +Changes in version 3.17.6 +---------------------- + +- Rewrite code to subset features and chromatographic peaks. This results in a + perfomance improvement for `filterFile` and similar functions. + +- Add parameter `expandMz` to `featureChromatograms` + (https://github.com/sneumann/xcms/issues/612). + +Changes in version 3.17.5 +---------------------- + +- Change the way the m/z value for a chromatographic peak is determined by + centWave: if a ROI contains more than one peak for one scan (spectrum) an + intensity-weighted m/z is reported for that scan. The m/z of the + chromatographic peak is then calculated based on these reported m/z values for + each scan (spectrum). In the original version the mean m/z for a scan was + reported instead. As a result, m/z values of chromatographic peaks are now + slightly different but are expected to be more accurate. See + https://github.com/sneumann/xcms/issues/590 for more details. + +Changes in version 3.17.4 +---------------------- + +- Add `transformIntensity` method. +- Fix issue when calling `chromPeakSpectra` or `featureSpectra` on an object + that contains also files with only MS1 spectra + (https://github.com/sneumann/xcms/issues/603). + +Changes in version 3.17.2 +---------------------- + +- Use mzML instead of mzData files in testing and vignettes, + since mzR drop mzData reading and msdata package will drop mzData files as well + +Changes in version 3.17.1 +---------------------- + +- Fix bug in feature grouping by EIC correlation that would return a + non-symmetric similarity matrix. +- Fix error message from issue [584](https://github.com/sneumann/xcms/issues/584). + +Changes in version 3.15.5 +---------------------- + +- Disable testing on windows i386, providing some speedup +- Disable parallel processing on Windows, causing an issue in testthat on BioC build check + +Changes in version 3.15.4 +---------------------- + +- Fix in `plot` with `type = "XIC"` to plot an empty plot if no data is present. +- Skip re-indexing of peaks to features if not necessary. This results in + performance improvements for MS1 only data. + +Changes in version 3.15.3 +---------------------- + +- Add `manualFeatures` allowing to manually define and add features to an + `XCMSnExp` object. +- Add `plotChromatogramsOverlay` function to support plotting of multiple EICs + from the same sample into the same plot (eventually stacked). +- Add feature grouping by EIC similarity: `EicSimilarityParam`. +- Import `compareChromatograms` from `MSnbase`. +- Add feature grouping by similar retention time: `SimilarRtimeParams. +- Add feature grouping by similarity of feature abundances across samples: + `AbundanceSimilarityParam`. +- Add feature grouping methodology based on `MsFeatures`. + +Changes in version 3.15.2 +---------------------- + +- Fix LC-MS/MS vignette. + +Changes in version 3.15.1 +---------------------- + +- Compatibility fix for nls() in R >= 4.1, contributed by Rick Helmus. + +Changes in version 3.13.8 +---------------------- + +- Fix plotQC() for XCMSnExp objects + +Changes in version 3.13.7 +---------------------- + +- Add `featureArea` function to extract the m/z-rt region for features. +- Fix `featureSpectra` function. +- Re-add the LC-MS/MS vignette. +- Feature: plotQC() supports XCMSnExp objects now + +Changes in version 3.13.6 +---------------------- + +- Fix issue #545: skip second centWave run with CentWavePredIsoParam in regions + of interest with undefined peak boundaries/scan ranges. +- Temporarily remove the LC-MS/MS vignette (until MsBackendMgf is added to + Bioconductor). + +Changes in version 3.13.5 +---------------------- + +- Add `filterChromPeaks` method to filter chromatographic peaks in a + `XChromatogram` or `XChromatograms` object. +- Add `filterChromPeaks` method for `XCMSnExp` (issue #541). +- Support return of `Spectra` objects by `chromPeakSpectra`, `featureSpectra` + and `reconstructChromPeakSpectra`. +- Support extraction of MS1 spectra with `chromPeakSpectra`. +- Support extraction of the spectrum with the largest total signal or largest + base peak signal in `chromPeakSpectra`. +- Add support for extraction of spectra for selected/individual peaks/features + using the `peaks` and `features` parameter in `chromPeakSpectra` and + `featureSpectra`, respectively. + +Changes in version 3.13.4 +---------------------- + +- Import `Param` object from `ProtGenerics`. +- Import `filterIntensity`, `normalize` and `alignRt` for `Chromatogram` and + `MChromatograms` from `MSnbase`. + + +Changes in version 3.13.3 +---------------------- + +- `align,Chromatogram` gains new method `"none"` which will only keep values + with identical retention times. For `method = "matchRtime"` the (much faster) + matching function `closest` from the `MsCoreUtils` package is used. +- Method `correlate,Chromatogram` gains parameter `useIntensitiesAbove` to + perform the correlation only with values larger than this threshold + (avoiding thus high correlation because of many 0-values). +- Add method `filterIntensity,Chromatogram` that allows to filter a chromatogram + object keeping only data points with an intensity above a user provided + threshold. + + +Changes in version 3.13.2 +---------------------- + +- Add new function `manualChromPeaks` allowing to manually add and integrate + chromatographic peaks. + + +Changes in version 3.13.1 +---------------------- + +- Support subsetting of `XChromatograms` with `drop = FALSE`. + + +Changes in version 3.11.8 +---------------------- + +- Disable parallel processing in vignettes. + + +Changes in version 3.11.7 +---------------------- + +- More efficient splitting data per file especially for larger data sets. +- Disable parallel processing in examples. + + +Changes in version 3.11.6 +---------------------- + +- Add `FilterIntensityParam` to filter chromatographic peaks on intensity + (issue #502). +- Add `estimatePrecursorIntensity` function to determine the precursor intensity + for MS2 spectra from the neighboring MS1 spectra. + + +Changes in version 3.11.4 +---------------------- + +- Change from `Spectra` and `Chromatograms` to `MSpectra` and `MChromatograms` + from MSnbase version >= 2.15.3. + + +Changes in version 3.11.3 +---------------------- + +- `reconstructChromPeakSpectra`: report also polarity and `precusorIntensity`. +- `reconstructChromPeakSpectra`: ensure a retention time is reported for + reconstructed MS2 spectra (issue #485). +- Change default for `expandRt` to `0` in `reconstructChromPeakSpectra`. +- Fix error in `refineChromPeaks,MergeNeighboringPeaksParam` if no peaks found + to be merged. + + +Changes in version 3.11.2 +---------------------- + +- Add `fillChromPeaks,ChromPeakAreaParam` to base the area from which missing + peak data should be filled-in on the actually detected chromatographic peaks + of a feature. +- Potential fix for issue #481: function should no longer throw an error because + retention times are of length 0. +- More efficient splitting of processing which should increase the speed of + the findChromPeaks, refineChromPeaks, reconstructChromPeakSpectra and + chromPeakSpectra calls. + + +Changes in version 3.11.1 +---------------------- + +- Fix issue #471: conversion from `XCMSnExp` to `xcmsSet` looses phenodata + (thanks to Andris Jankevics for reporting and providing a solution). +- Add `normalize` method for `Chromatogram` and `Chromatograms` objects. +- `featureChromatograms` gets new parameter `n` and `value` to extract EICs + only from the top n samples with highest intensities. +- `filterFile` gets new parameter `keepFeatures` to support retaining + correspondence results even if a data set is filtered by file. +- Export the virtual `Param` class. +- Add filterColumnsIntensityAbove method for Chromatograms object that allows + to select columns (samples) of an Chromatograms object for which intensities + of its chromatographic data are higher than a threshold. +- Add removeIntensity method for Chromatogram, Chromatograms, XChromatogram + and XChromatograms objects allowing to *remove* intensities based on different + criteria. +- Add correlate method for Chromatograms allowing to correlate multiple + chromatograms with each other. + + +Changes in version 3.9.4 +---------------------- + +- Fix issue in centWave which skips peak detection depending on minimum + peakwidth (issue #445): add parameter `extendLengthMSW` in `CentWaveParam`. + Thanks to William Kumler for contributing the fix. +- Tentatively reduce memory requirements in `fillChromPeaks`. +- Fix issue #467 for fillPeaks() of an xcmsSet converted from an XCMSnSet + +Changes in version 3.9.3 +---------------------- + +- Move multtest from Imports to Suggests to avoid duplicated method definition + for plot (issue #459). +- Add support for peak filling from MS level > 1 to fillChromPeaks. +- featureValues gains parameter msLevel to extract feature values for features + of all, or from a specific MS level. +- refineChromPeaks supports different MS levels. +- Added support to perform correspondence analysis on MS level > 1 and add the + respective results to already present feature definitions. +- hasChromPeaks and hasFeatures gain parameter msLevel to check for presence of + chromatographic peaks or features from a specific MS level. + + +Changes in version 3.9.2 +---------------------- + +- Fix featureChromatograms and chromatograms on a XCMSnExp object with features: + features can be duplicated across rows (EICs). +- findChromPeaks: add parameter `add` to allow several rounds of peak detections + on the same object. +- Small performance enhancement in fillChromPeaks. +- Better support for MS > 1 data in fillChromPeaks: skip MS level 2 spectra for + filling in. +- Add refineChromPeaks for XChromatogram and XChromatograms objects. +- Add groupOverlaps function to group arbitrary ranges. +- Add quantify,XCMSnExp object to quantify an XCMSnExp into a + SummarizedExperiment. +- Fine-tune MergeNeighboringPeaks peak refinement method: the average of the + 3 data points between candidate peaks is used to evaluate whether the peaks + should be merged making the approach more robust against outliers. In + addition, an ion chromatogram for candidate peaks is extracted with an m/z + range expanded depending on the expandMz and ppm setting ensuring that low + intensity data points between candidate peaks are not missed out (because + their m/z might be slightly shifted on ToF instruments). The mzmin and mzmax + of the merged peak represents also the minimum and maximum m/z of all data + points in that extracted ion chromatogram. + + +Changes in version 3.9.1 +---------------------- + +- Fix problem of not shown/plotted peak positions in plotChromPeakSpectra + for experiments in which peaks were not detected in the first sample(s). +- Add method *from_to* to missing value imputation method `imputeRowMinRand`. +- Show warning in findChromPeaks if empty spectra are detected. +- Add refineChromPeaks method and CleanPeaksParam class to allow removal of + chromatographic peaks exceeding a user-definable maximal peak width. +- Add MergeNeighboringPeaksParam for refineChromPeaks to allow merging of + chromatographic peaks close in m/z and retention time with a signal between + them higher than a certain threshold (issue #414). +- Fix misspelled parameter `mzd` in LC-MS/MS vignette. + + +Changes in version 3.7.5 +---------------------- + +- Remove xcmsMSn vignette (based on old xcms). + + +Changes in version 3.7.4 +---------------------- + +- mzClust correspondence analysis: check and fix missing values in column mz of + the peaks matrix (issue #416). + + +Changes in version 3.7.3 +---------------------- + +- plot type = "XIC" on an XCMSnExp object will draw rectangles indicating the + identified chromatographic peaks. +- Add a vignette describing LC-MS/MS data analysis with xcms. + + +Changes in version 3.7.2 +---------------------- + +- Fix documentation (issue #401). +- Add support for SWATH data analysis. + + +Changes in version 3.7.1 +---------------------- + +- Add correlate method for Chromatogram objects. +- Add parameter lwd to plotAdjustedRtime. +- Add align method for Chromatogram objects. +- Add findChromPeaksIsolationWindow to enable chromatographic peak detection + in isolation windows. +- Fix issue in chromPeakSpectra with method = "signal". +- chromPeakSpectra and featureSpectra return now MS2 spectra with an precursor + m/z >= mzmin, <= mzmax and retention time >= rtmin, <= rtmax. +- Improve performance of chromPeakSpectra and featureSpectra. + + +Changes in version 3.5.5 +---------------------- + +- Add dirname and dirname<- methods for OnDiskMSnExp to change the path to the + raw data files. +- Add section "Subset-based alignment" to the xcms vignette to describe the + alignment possibility to perform alignments based on a subset of samples + (e.g. QC samples). + + +Changes in version 3.5.4 +---------------------- + +- Fix problem in featureChromatograms with include = "feature_only" that could + return a non-valid object. +- Ensure that XCMSnExp objects are updated if necessary in all analysis methods. + + +Changes in version 3.5.3 +---------------------- + +- Fix unit tests. + + +Changes in version 3.5.2 +---------------------- + +- Small changes in fillChromPeaks,XCMSnExp to reduce memory demand. +- Fix issue #359. +- Fix issue #360: rawEIC skipped last scan/spectrum if rtrange was provided. +- filterMsLevel keeps now chromatographic peaks and feature definitions from the + specified MS levels (issue #362). +- Fix bug in `xcmsRaw` that leads to a netCDF error message (issue #363). +- Add parameter msLevel to chromPeaks for XCMSnExp objects. +- Add chromPeakData to allow adding arbitrary annotation to chromatographic + peaks. +- Change default of parameter value in featureValues from value = "index" to + value = "into". +- Add parameter isFilledColumn to chromPeaks allowing the old behaviour to + include the is_filled column in the chromatographic peak matrix. + + +Changes in version 3.5.1 +---------------------- + +- Fix issue #349. +- Add updateObject function for XCMSnExp objects (issue #347). +- Add dropFilledChromPeaks methods for XChromatogram and XChromatograms objects. +- Add parameter filled = FALSE to chromatogram and featureChromatograms + functions. +- Fix matchedFilter peak detection problems with empty spectra (issue #325). +- featureChromatograms extracts by default only chromatographic peaks associated + with a feature. +- chromatogram,XCMSnExp extracts an XChromatogram containing also + chromatographic peaks and feature definitions. +- Add featureValues method for XChromatograms objects (issue #336). +- Add correspondence analysis (peak grouping) for chromatographic data (for now + only with PeakDensity method; issue #336). +- Add featureDefinitions slot to XChromatograms object and related accessor + methods. +- Add subset alignment option subsetAdjust = "average" to adjust left-out + samples (blanks or simply non-subset samples) based on an interpolation from + the results of the previous and subsequent subset sample. +- Add parameter subsetAdjust to PeakGroupsParam allowing to switch between + different methods to adjust samples left out in the alignment process. +- Alignment based on a sample subset for the peak groups method (issue #335): + sample subset can be defined with the subset parameter, samples not included + in the subset will be aligned based on the adjusted retention times of the + closest sample in the subset. +- Add findChromPeaks,XChromatograms (issue #332). +- Add processHistory,XChromatograms. +- Add plot,XChromatograms method with automatic peak highlighting (issue #334). +- Add hasChromPeaks,XChromatograms method. +- Add XChromatograms class with constructor function and coercing method. +- Add hasChromPeaks,XChromatogram method. +- Add filterRt,XChromatogram, filterMz,XChromatogram. +- Add plot,XChromatogram function supporting of highlighting/drawing identified + chromatographic peaks. +- findChromPeaks,Chromatogram returns an XChromatogram object (issue #329). +- Add chromPeaks,XChromatogram (issue #329). +- Add XChromatogram object (issue #329). +- Fix higlightChromPeaks with type = "polygon": peak filling represents now the + full detected peak and is no longer cut by the provided rt. +- Add argument peakIds to highlightChromPeaks allowing to specify the IDs of + peaks to be highlighted. +- Add example on clustering of base peak chromatograms to the vignette (issue + #328). +- Small fix in the vignette (issue #327). +- Add parameter groupval to exportMetaboAnalyst (issue #296). +- Fix bug in show,XCMSnExp that would throw an error if no process history is + present. + + +Changes in version 3.3.6 +---------------------- + +- Add type = "polygon" to highlightChromPeaks allowing to fill the actual + signal area of identified chromatographic peaks. + + +Changes in version 3.3.5 +---------------------- + +- Performance enhancement of the chromPeakSpectra and featureSpectra functions. + + +Changes in version 3.3.4 +---------------------- + +- Add featureChromatograms to extract ion chromatograms for each feature. +- Add hasFilledChromPeaks function. +- Add argument skipFilled to the featureSummary function. + + +Changes in version 3.3.3 +---------------------- + +- Add chromPeakSpectra and featureSpectra functions to extract MS2 spectra + for chromatographic peaks and features, respectively (issue #321). - Fix profMat to handle also data files with empty spectra (issue #312). - Add argument ylim to plotAdjustedRtime (issue #314). - Add imputeRowMin and imputeRowMinRand, two simple missing value imputation @@ -20,8 +497,8 @@ CHANGES IN VERSION 3.3.3 a number. -CHANGES IN VERSION 3.3.2 -------------------------- +Changes in version 3.3.2 +---------------------- - Add writeMSData method for XCMSnExp allowing to write mzML/mzXML files with adjusted retention times (issue #294). @@ -36,8 +513,8 @@ CHANGES IN VERSION 3.3.2 intensities of peaks that are assigned to the same feature in a file/sample. -CHANGES IN VERSION 3.3.1 -------------------------- +Changes in version 3.3.1 +---------------------- - Add overlappingFeatures function to identify overlapping or close features. - Add support for type = "apex_within" for featureDefinitions. @@ -61,8 +538,8 @@ CHANGES IN VERSION 3.3.1 MatchedFilterParam (issue #280). -CHANGES IN VERSION 3.1.3 -------------------------- +Changes in version 3.1.3 +---------------------- BUG FIXES: - Fix misplaced parenthesis in the check for multiple spectra in @@ -70,8 +547,8 @@ BUG FIXES: - Update link to correct metlin page in diffreport result (issue #204). -CHANGES IN VERSION 3.1.2 -------------------------- +Changes in version 3.1.2 +---------------------- NEW FEATURES - Add filterFeatureDefinitions function. @@ -81,8 +558,8 @@ BUG FIXES: by groupChromPeaks. -CHANGES IN VERSION 3.1.1 -------------------------- +Changes in version 3.1.1 +---------------------- NEW FEATURES - Reading raw files using xcmsSet or xcmsRaw uses now the automatic file type @@ -97,15 +574,15 @@ BUG FIXES: - Fix #259 -CHANGES IN VERSION 2.99.10 -------------------------- +Changes in version 2.99.10 +---------------------- BUG FIXES: - Fix #230: Failing vignettes on Windows. -CHANGES IN VERSION 2.99.9 -------------------------- +Changes in version 2.99.9 +---------------------- USER VISIBLE CHANGES: - Chromatographic peak detection uses adjusted retention times on an aligned @@ -140,23 +617,23 @@ BUG FIXES: points. -CHANGES IN VERSION 2.99.8 -------------------------- +Changes in version 2.99.8 +---------------------- BUG FIXES: - Replace xcmsMSn Rnw with Rmd vignette to fix Windows build errors. -CHANGES IN VERSION 2.99.7 -------------------------- +Changes in version 2.99.7 +---------------------- BUG FIXES: - Fix #201: Warnings: 'readMSData2' is deprecated, thanks to L. Gatto. - Merge with BioC git after transition -CHANGES IN VERSION 2.99.6 -------------------------- +Changes in version 2.99.6 +---------------------- NEW FEATURES: - calibrate,XCMSnExp method that allows to calibrate chromatographic peaks. @@ -170,7 +647,6 @@ USER VISIBLE CHANGES: - Replace xcmsDirect.Rnw with rmarkdown-based vignette using the new user interface. - BUG FIXES: - issue #196: removed the unnecessary requirement for same-dimension profile matrices in adjustRtime,XCMSnExp,ObiwarpParam. @@ -181,19 +657,19 @@ BUG FIXES: - Potential problems in the plotChromPeakDensity function. -CHANGES IN VERSION 2.99.5 -------------------------- +Changes in version 2.99.5 +---------------------- USER VISIBLE CHANGES: - Re-enable sleep parameter in findPeaks.centWave and findPeaks.matchedFilter. -CHANGES IN VERSION 2.99.4 -------------------------- +Changes in version 2.99.4 +---------------------- NEW FEATURES: - Add plotChromPeaks function to plot the definition (rt and mz range) of - detected chromatographic peaks of one file into the mz-rt plane. + detected chromatographic peaks of one file into the mz-rt plane. - Add plotChromPeakImage function to plot the number of detected peaks along the retention time axis per file as an image plot. @@ -206,15 +682,15 @@ BUG FIXES: - Polarity information was not read from mzXML files (issue #192). -CHANGES IN VERSION 2.99.3 -------------------------- +Changes in version 2.99.3 +---------------------- BUG FIXES: - issue #188: determine file type from file content if file ending not known. -CHANGES IN VERSION 2.99.2 -------------------------- +Changes in version 2.99.2 +---------------------- BUG FIXES: - issue #181: problem when isCentroided,Spectrum method returns NA because of @@ -224,8 +700,8 @@ BUG FIXES: backwards compatible with the old group.density code. -CHANGES IN VERSION 2.99.1 -------------------------- +Changes in version 2.99.1 +---------------------- NEW FEATURES: - extractMsData to extract raw MS data as a data.frame (issue #120). @@ -239,8 +715,8 @@ BUG FIXES: retcor method. -CHANGES IN VERSION 2.99.0 -------------------------- +Changes in version 2.99.0 +---------------------- NEW FEATURES: - plotChromatogram and highlightChromPeaks functions. @@ -258,8 +734,8 @@ USER VISIBLE CHANGES: (intensity values are NA). -CHANGES IN VERSION 1.53.1 --------------------------- +Changes in version 1.53.1 +---------------------- BUG FIXES: - Increase parameter n for the density call in the peak density correspondence @@ -267,8 +743,8 @@ BUG FIXES: Thanks to Jan Stanstrup. -CHANGES IN VERSION 1.51.11 --------------------------- +Changes in version 1.51.11 +---------------------- NEW FEATURES: - Parameter "filled" for featureValues (issue #157). @@ -280,8 +756,8 @@ BUG FIXES: - Update getPeaks to use non-deprecated API (issue #163). -CHANGES IN VERSION 1.51.10 --------------------------- +Changes in version 1.51.10 +---------------------- NEW FEATURES: - filterRt for Chromatogram class (issue #142). @@ -300,8 +776,8 @@ BUG FIXES: to be in the same order than the raw retention times. -CHANGES IN VERSION 1.51.9 -------------------------- +Changes in version 1.51.9 +---------------------- NEW FEATURES: - fillChromPeaks, dropFilledChromPeaks methods and FillChromPeaksParam class. @@ -317,8 +793,8 @@ BUG FIXES: - Issue #137: Error in findChromPeaks if no peaks are found. -CHANGES IN VERSION 1.51.8 -------------------------- +Changes in version 1.51.8 +---------------------- NEW FEATURES: - Add Chromatogram class and extractChromatograms method. @@ -329,8 +805,8 @@ BUG FIXES: - Issue #134: xcmsSet constructor endless loop. -CHANGES IN VERSION 1.51.7 -------------------------- +Changes in version 1.51.7 +---------------------- USER VISIBLE CHANGES: - Major renaming of methods and classes to follow the naming convention: @@ -342,8 +818,8 @@ BUG FIXES: - Issue #127: failing unit test on Windows build machine. -CHANGES IN VERSION 1.51.6 -------------------------- +Changes in version 1.51.6 +---------------------- NEW FEATURES: - groupFeatures and adjustRtime methods for XCMSnExp objects. @@ -354,8 +830,8 @@ BUG FIXES: - Issue #124 (filterRt,XCMSnExp returned empty object). -CHANGES IN VERSION 1.51.5 -------------------------- +Changes in version 1.51.5 +---------------------- NEW FEATURES: - MsFeatureData and XCMSnExp objects. @@ -374,14 +850,15 @@ BUG FIXES: - Issue #91 (warning instead of error if no peaks in ROI). Thanks to J. Stanstrup. -CHANGES IN VERSION 1.51.4 -------------------------- +Changes in version 1.51.4 +---------------------- BUG FIXES: - added deepCopy to avoid corrupting the original object, thanks to J. Stanstrup, closes #93 -CHANGES IN VERSION 1.51.3 -------------------------- + +Changes in version 1.51.3 +---------------------- NEW FEATURES: - binYonX binning function. @@ -414,51 +891,53 @@ NEW FEATURES: - Fix issue #60: findPeaks.massifquant always returns a xcmsPeaks object. -CHANGES IN VERSION 1.51.2 -------------------------- +Changes in version 1.51.2 +---------------------- USER VISIBLE CHANGES: - As suggested by Jan Stanstrup, do not error if a centWave ROI contains no data, closes #90 -CHANGES IN VERSION 1.51.1 -------------------------- +Changes in version 1.51.1 +---------------------- BUG FIXES: - Fix incorrrect indexing getEIC function reported by Will Edmands, closes #92 -CHANGES IN VERSION 1.49.7 -------------------------- + +Changes in version 1.49.7 +---------------------- BUG FIXES: - Fix documentation warnings. -CHANGES IN VERSION 1.49.6 -------------------------- +Changes in version 1.49.6 +---------------------- USER VISIBLE CHANGES: - Peak Picking function findPeaks.centWaveWithPredictedIsotopeROIs() and findPeaks.addPredictedIsotopeFeatures(), which allow more sensitive detection of isotope features. -CHANGES IN VERSION 1.49.5 -------------------------- + +Changes in version 1.49.5 +---------------------- USER VISIBLE CHANGES: - Some documentation updates. - Preparation for a new binning function -CHANGES IN VERSION 1.49.4 -------------------------- +Changes in version 1.49.4 +---------------------- BUG FIXES: - Fix getXcmsRaw that would prevent retention time correction to be applied (issue #44 reported by Aleksandr). -CHANGES IN VERSION 1.49.3 -------------------------- +Changes in version 1.49.3 +---------------------- NEW FEATURE: - updateObject method for xcmsSet. @@ -479,16 +958,17 @@ OTHER CHANGES - Use roxygen2 to manage the DESCRIPTION's collate field. -CHANGES IN VERSION 1.49.2 -------------------------- +Changes in version 1.49.2 +---------------------- NEW FEATURE: - Initial support for exporint mzTab format. Since Changes are still to be expected, xcms:::writeMzTab() is not yet exported. -CHANGES IN VERSION 1.49.1 -------------------------- + +Changes in version 1.49.1 +---------------------- NEW FEATURE: @@ -496,13 +976,16 @@ NEW FEATURE: Instead of throwing an "m/z sort assumption violated !" error, the data is re-read and on-demand sorted by m/z. -CHANGES IN VERSION 1.47.3 -------------------------- + +Changes in version 1.47.3 +---------------------- + - Disable parallel processing in unit tests causing a timeout on BioC build machines -CHANGES IN VERSION 1.47.2 -------------------------- + +Changes in version 1.47.2 +---------------------- BUG FIXES - Fix problem in getEIC on xcmsSet objects reported by Alan Smith in issue #7 and @@ -510,22 +993,23 @@ BUG FIXES - Changed some unnecessary warnings into messages. -CHANGES IN VERSION 1.47.2 -------------------------- +Changes in version 1.47.2 +---------------------- USER VISIBLE CHANGES: - Disabled parallel processing in unit tests * migrate dependencies from ncdf -> ncdf4 -CHANGES IN VERSION 1.45.7 -------------------------- + +Changes in version 1.45.7 +---------------------- USER VISIBLE CHANGES: - Disabled Rmpi support and usage on Windows -CHANGES IN VERSION 1.45.6 -------------------------- +Changes in version 1.45.6 +---------------------- NEW FEATURE: - J. Rainer implemented a [ method that allows to subset an xcmsSet. @@ -535,8 +1019,8 @@ BUG FIXES: Added some details to the documentation of xcmsSet-class. -CHANGES IN VERSION 1.45.5 -------------------------- +Changes in version 1.45.5 +---------------------- USER VISIBLE CHANGES: - The sampclass method for xcmsSet will now return the content of the @@ -551,15 +1035,15 @@ BUG FIXES: when no input files are available to the xcmsSet function. -CHANGES IN VERSION 1.45.4 -------------------------- +Changes in version 1.45.4 +---------------------- BUG FIXES: - Fixed a bug in the levelplot method for xcmsSet. -CHANGES IN VERSION 1.45.3 -------------------------- +Changes in version 1.45.3 +---------------------- NEW FEATURE: @@ -594,8 +1078,9 @@ BUG FIXES: - Checking if phenoData is a data.frame or AnnotatedDataFrame and throw an error otherwise. - xcmsSet getEIC method for water Lock mass corrected files for a subset of files did not evaluate whether the specified files were corrected. -CHANGES IN VERSION 1.45.2 -------------------------- + +Changes in version 1.45.2 +---------------------- BUG FIXES: o The xcms split() function now accepts factors that are shorter @@ -603,8 +1088,8 @@ BUG FIXES: the standard split() behaviour -CHANGES IN VERSION 1.45.1 -------------------------- +Changes in version 1.45.1 +---------------------- NEW FEATURE: o plotrt now allows col to be a vector of color definition, @@ -613,33 +1098,39 @@ NEW FEATURE: o Allow to use the "parallel" package for parallel processing of the functions xcmsSet and fillPeaks.chrom. o Thanks to J. Rainer! -CHANGES IN VERSION 1.43.3 -------------------------- + +Changes in version 1.43.3 +---------------------- BUG FIXES: o Give a more verbose error message when file not found -CHANGES IN VERSION 1.43.2 --------------------------- + +Changes in version 1.43.2 +---------------------- BUG FIXES: o Use ProtGenerics, adapted xcms peaks() -CHANGES IN VERSION 1.43.1 --------------------------- + +Changes in version 1.43.1 +---------------------- NEW FEATURE: o function plotQC() for plotting various QC plots on RT and m/z -CHANGES IN VERSION 1.41.1 --------------------------- + +Changes in version 1.41.1 +---------------------- BUG FIXES: o fix sampclass generation from phenoData if some combinations of factors don't exist o disable parallel code in manpages to avoid issues on BioC windows build farm machines -CHANGES IN VERSION 1.39.6 --------------------------- + +Changes in version 1.39.6 +---------------------- + USER VISIBLE CHANGES: o Massifquant reports the maximum intensity for each isotope trace (peak). This is useful for interactive parameter optimization. @@ -648,26 +1139,34 @@ BUG FIXES: Jan Stanstrup. Now using an environment to mirror gvals to each list item in the very large argList. -CHANGES IN VERSION 1.39.4 --------------------------- + +Changes in version 1.39.4 +---------------------- + BUG FIXES: o Fixed write.cdf(), which had an intensity offset of +1, added a unit test -CHANGES IN VERSION 1.39.3 --------------------------- + +Changes in version 1.39.3 +---------------------- + BUG FIXES: o New R-devel check unload better. Lingering ramp code removed, import from mzR. Cleaned up other errors in package check. -CHANGES IN VERSION 1.39.1 --------------------------- + +Changes in version 1.39.1 +---------------------- + BUG FIXES: o Updated doubleMatrix c code to allow for larger profile matrixes REQUIRED CHANGES o Moved inst/doc to vignettes -CHANGES IN VERSION 1.37.6 --------------------------- + +Changes in version 1.37.6 +---------------------- + NEW FEATURE: o Introducing write.mzQuantML(xcmsSet) to export the peak list and grouped matrix to the PSI format mzQuantML (see http://www.psidev.info/mzquantml) USER VISIBLE CHANGES: @@ -677,49 +1176,62 @@ USER VISIBLE CHANGES: o Clean and update documentation for findPeaks.massifquant-methods.Rd BUG FIXES: o Remove unused parameters in findKalmanROIs() within xcmsRaw.R -CHANGES IN VERSION 1.37.5 --------------------------- + + +Changes in version 1.37.5 +---------------------- + BUG FIXES o fixed bug in retcor.obiwarp where the scanrange of the first sample would be checked instead of the center sample -CHANGES IN VERSION 1.37.4 --------------------------- + +Changes in version 1.37.4 +---------------------- + BUG FIXES o Skip t-test in diffreport() if one class has less than 2 samples. -CHANGES IN VERSION 1.37.3 --------------------------- + +Changes in version 1.37.3 +---------------------- + BUG FIXES o fixed bug in patternVsRowScore (group.nearest) that was introduced by the modifications in rev 65169 and caused features to be aligned that were far outside the given m/z and retention time windows. -CHANGES IN VERSION 1.37.1 --------------------------- +Changes in version 1.37.1 +---------------------- + BUG FIXES o fixed fillPeaks, which 1) dropped non-standard columns and 2) failed if nothing to do, based on patches by Tony Larson. -CHANGES IN VERSION 1.37.1 --------------------------- + +Changes in version 1.37.1 +---------------------- NEW FEATURES: o Introducing msn2xcmsRaw, to allow findPeaks() on MS2 and MSn data -CHANGES IN VERSION 1.35.7 --------------------------- +Changes in version 1.35.7 +---------------------- + BUG FIXES o fixed indexing bug in group.nearest, which under certain circumstances caused all peaks in the first sample to be ignored (reported by Tony Larson) -CHANGES IN VERSION 1.35.6 --------------------------- + +Changes in version 1.35.6 +---------------------- + BUG FIXES o Obiwarp retention time alignment error-ed if scanrange was used as a parameter setting during xcmsSet/peak detection The method now tries to automatically find the set scanrange and uses this range for alignment. -CHANGES IN VERSION 1.35.4 --------------------------- + +Changes in version 1.35.4 +---------------------- NEW FEATURES: o Introducing parallel fillPeaks @@ -728,15 +1240,16 @@ USER VISIBLE CHANGES o Replace snow requirement with minimum R version 2.14.0 -CHANGES IN VERSION 1.35.3 --------------------------- +Changes in version 1.35.3 +---------------------- + BUG FIXES o if group.density was used with very low minfrac settings (< 0.5) it did not return all feature groups, but only those that include features from at least 50% of samples in a group. This limitation was removed. -CHANGES IN VERSION 1.35.2 --------------------------- +Changes in version 1.35.2 +---------------------- UPDATED FEATURES: o Behind the scenes xcms now uses the xcmsSource class to read raw data. @@ -748,36 +1261,41 @@ BUG FIXES o massifquant: reporting features data structure compatible with multiple sample comparison within XCMS. -CHANGES IN VERSION 1.35.1 --------------------------- + +Changes in version 1.35.1 +---------------------- UPDATED FEATURES: o The mzData export is now much faster and uses less memory -CHANGES IN VERSION 1.33.16 --------------------------- + +Changes in version 1.33.16 +---------------------- USER VISIBLE CHANGES o diffreport and plotEIC have a new parameter mzdec, with is the number of decimal places of the m/z values in the EIC plot title -CHANGES IN VERSION 1.33.16 --------------------------- + +Changes in version 1.33.16 +---------------------- UPDATED FEATURES: -Lock mass gap filler now works with netCDF lock mass function file to find the exact times of the scans and works with -the newer Waters MS instruments. + o Lock mass gap filler now works with netCDF lock mass function file to find + the exact times of the scans and works with the newer Waters MS instruments. + -CHANGES IN VERSION 1.33.15 --------------------------- +Changes in version 1.33.15 +---------------------- BUG FIXES o scanrage is now honoured in xcmsSet, also when in parallel mode -CHANGES IN VERSION 1.33.14 --------------------------- + +Changes in version 1.33.14 +---------------------- BUG FIXES @@ -785,22 +1303,25 @@ BUG FIXES also in xcmsSet(matchedFilter), where previously it was ignored. -CHANGES IN VERSION 1.33.13 --------------------------- + +Changes in version 1.33.13 +---------------------- BUG FIXES o write.cdf() has been fixed to write files AMDIS can read -CHANGES IN VERSION 1.33.12 --------------------------- + +Changes in version 1.33.12 +---------------------- BUG FIXES o write.mzData adds Polarity to the file if available -CHANGES IN VERSION 1.33.11 --------------------------- + +Changes in version 1.33.11 +---------------------- USER VISIBLE CHANGES @@ -815,22 +1336,24 @@ BUG FIXES o group.mzClust was failing when result had one peak -For more details and all changes before May 2012 please see the (now discontinued) CHANGELOG in the source package (inst/ folder). ++ For more details and all changes before May 2012 please see the (now discontinued) CHANGELOG in the source package (inst/ folder). CHANGED BEHAVIOUR since Version 1.32: +---------------------- -Other Changes since Version 1.32: ++ Other Changes since Version 1.32: * improved mzData writing, now includes MSn spectra and less verbose. * improved netCDF writing, but not yet good enough for AMDIS CHANGED BEHAVIOUR since Version 1.14: +---------------------- * centWave may report a smaller set of peaks, due to a small bug in the ROI algorithm some features with mass deviation > ppm were retained. -Other Changes since Version 1.14: ++ Other Changes since Version 1.14: * New method for grouping: an algorithm inspired by mzMine group(method="nearest") has been implemented. It is slower diff --git a/inst/mgf/metlin-2724.mgf b/inst/mgf/metlin-2724.mgf new file mode 100644 index 000000000..96d4425a0 --- /dev/null +++ b/inst/mgf/metlin-2724.mgf @@ -0,0 +1,120 @@ +COM=Experimentexported by MSnbase on Wed Oct 2 04:37:05 2019 +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +225.008933267394 1.2464 +226.011208467866 1.09549 +258.067105636602 4.48022 +303.026172269839 0.514415 +304.108597940153 100 +304.324023901701 1.23842 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +146.995616361726 0.594773 +147.997159629763 0.953264 +217.041819813512 1.23358 +225.010672240906 1.39017 +226.010546112222 2.67445 +229.039095548481 0.729046 +258.066841499519 62.2361 +276.074125829337 0.500235 +304.108906263015 100 +304.298946860413 1.21156 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +42.0349967010836 4.33369 +103.006693506456 0.664295 +146.996569978845 1.97827 +147.998364752108 1.11036 +189.045696754412 0.814831 +203.060613914501 0.812005 +217.040192909006 7.81945 +226.011289819908 0.522286 +229.041382820418 5.03224 +230.071617393839 0.664121 +241.04082998419 0.899738 +258.06757899835 100 +258.263412759185 0.801551 +304.109827468897 5.46593 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +42.034643649651 100 +43.0557269879833 1.8229 +54.0353224380495 1.79522 +56.0479524454203 0.777029 +57.070537350628 0.961298 +58.0670667303092 1.49098 +62.9276895653731 0.518371 +68.0110679788541 0.773234 +81.0420198137458 1.13951 +87.9807220217925 1.61787 +88.0774192963756 1.79084 +91.0570200006232 1.43391 +93.0451953721334 1.09692 +95.0300590960543 0.797191 +103.006388474554 3.95202 +104.008676458962 5.06524 +107.027751626056 0.707551 +120.022852257381 0.553041 +121.04596574751 1.79111 +122.040209489313 8.38919 +128.049821006196 0.550676 +132.022360559313 5.10651 +132.893854011925 0.559772 +133.040353494664 0.75233 +134.03990278941 16.455 +135.044271090705 0.606465 +136.05655068599 1.85247 +138.03612185232 0.658689 +146.042807608739 1.19093 +146.994471167812 2.99079 +148.055161529467 24.4474 +155.060269610201 1.72835 +156.044218862988 0.713515 +158.036039196005 0.511162 +160.043136137407 0.932196 +161.052410294937 1.57792 +162.034562960787 42.5275 +164.019686021321 0.67081 +171.037162324025 0.946127 +173.05115899967 34.8828 +173.681100186857 0.527096 +174.051019595502 3.84553 +175.067294701203 8.4097 +176.050141568092 2.94374 +177.055959028752 0.655404 +179.063712717449 0.786613 +185.051260728001 0.740483 +187.032157220368 4.18396 +188.013604203648 1.64833 +189.046176520288 61.5992 +189.220160963471 0.577636 +190.071163924 0.578689 +201.045344700073 20.7712 +202.073710405983 3.46066 +203.059949920207 21.1065 +215.025200096222 6.48682 +217.039450822483 50.0867 +229.041331481439 23.1436 +230.070917059094 3.48537 +231.056426245743 1.63558 +241.040111798128 5.04475 +258.067811303445 14.6976 +END IONS diff --git a/inst/mgf/metlin-68898.mgf b/inst/mgf/metlin-68898.mgf new file mode 100644 index 000000000..92b48f727 --- /dev/null +++ b/inst/mgf/metlin-68898.mgf @@ -0,0 +1,155 @@ +COM=Experimentexported by MSnbase on Thu Oct 3 14:03:16 2019 +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +43.0554440711554 0.568522 +70.0285711476176 1.95798 +70.0619427307142 1.57257 +74.0984928724698 0.749754 +237.958504374212 0.627982 +265.958270467228 1.47379 +280.006109208668 0.88683 +305.732636487218 0.59592 +308.000846656352 41.7095 +308.159753102516 1.40363 +308.222376782232 0.768325 +308.282909312937 0.843641 +308.319258480269 0.786866 +308.564075104839 0.503678 +309.50326898262 0.527515 +376.037839783088 100 +376.206896301775 2.83987 +376.297590041637 1.69455 +376.35561149494 1.05724 +376.409366091614 0.926654 +376.471411776729 1.05431 +376.6049298989 1.40211 +376.746773467258 0.775535 +376.788524763238 0.642411 +376.878605688501 0.781216 +376.922451419015 0.661527 +377.032695708391 0.887165 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +43.0526116541698 0.994397 +70.0297582401806 7.25839 +70.0603158735295 4.3062 +85.0895964482202 1.12616 +155.054086405716 0.520373 +201.984370065163 1.05057 +216.000477045783 0.558062 +222.945457191861 1.13478 +237.957271487264 0.879158 +244.026357521185 1.73557 +245.042313625241 1.11018 +265.950850266949 14.9281 +280.009480217898 3.91586 +308.00052917629 100 +308.241882151287 1.06317 +308.274760065517 0.89463 +308.320347036336 0.968194 +308.373975069686 0.81762 +308.427735255793 0.812896 +308.500713220201 1.03819 +308.583157540717 0.697048 +308.683085767443 0.663932 +308.735870998983 0.895286 +308.808747962362 0.65279 +308.884622711727 0.554203 +308.949077200362 0.529155 +309.009481535035 0.838146 +309.204767821738 0.639033 +309.815683271353 0.674166 +376.04178241607 2.14714 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +41.0363136770032 2.11637 +42.033437894449 6.66789 +43.0545628992296 19.8371 +43.1161698185851 1.22892 +56.0463908051644 2.3252 +70.0278745104839 100 +70.0637701506645 39.5402 +70.1045172456669 5.95133 +70.1287320409478 2.03809 +70.1679654119018 1.26909 +70.2784707130767 1.30235 +70.3361774751934 1.36144 +70.4738630400363 1.25643 +70.5078772772969 1.28588 +85.0884215921274 15.0344 +85.1493639192579 1.24384 +98.0548497564572 1.4676 +113.078740870991 3.95549 +138.008615275315 3.37303 +166.002025106759 6.02886 +173.984986243398 1.49025 +174.971257351673 4.69692 +194.914020147531 6.25869 +194.973316394257 1.23188 +201.97736606021 8.54438 +209.065369673156 9.34018 +216.074324294958 1.2592 +222.948672729968 10.5918 +237.957814702443 2.64491 +244.02586440605 11.155 +245.038997196436 3.51389 +265.954317644767 33.8077 +280.00621936698 10.8839 +280.256255502935 1.38395 +307.879746686752 2.17285 +308.000146682147 67.7567 +308.164423638581 1.96681 +308.292783229476 1.56692 +309.496667906875 1.25105 +375.948359109046 1.32889 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +41.0396231509843 7.9611 +42.0347476064299 28.3335 +43.0547104384005 57.2471 +44.2617305461343 2.25094 +45.2451257664824 2.64856 +56.0507170127916 16.2602 +57.0587993039037 8.94879 +70.027805125519 100 +70.0646994677627 53.2144 +70.1069202830194 5.13478 +71.1399156522861 2.61112 +72.0428274020915 2.23451 +84.0788884770497 3.28178 +98.0651955637977 5.5663 +106.941617800937 4.06076 +132.956998374284 4.53581 +132.984653117063 2.41192 +153.004527249868 1.97294 +158.972090982648 3.71018 +159.980346904449 2.07436 +161.969518370536 11.2244 +166.919281769414 7.28405 +167.017753272692 2.91754 +168.4816401454 2.11845 +174.972445787876 2.03203 +187.9756778902 6.62716 +194.919378056534 6.08382 +196.930786169407 3.17837 +213.979110689544 2.74222 +222.947921520267 4.29579 +END IONS diff --git a/inst/mgf/metlin-72445.mgf b/inst/mgf/metlin-72445.mgf new file mode 100644 index 000000000..4f59a4410 --- /dev/null +++ b/inst/mgf/metlin-72445.mgf @@ -0,0 +1,335 @@ +COM=Experimentexported by MSnbase on Wed Oct 2 04:38:15 2019 +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +201.979844165259 0.702007 +217.011583314943 1.37002 +234.03195931199 1.26563 +262.065743494551 1.70459 +276.086146989631 2.39046 +304.112767912757 100 +304.374695143579 1.14062 +304.465225514636 0.983759 +304.574591570179 0.737818 +304.662805465389 0.623062 +304.792626352034 0.740605 +304.843265635025 0.77624 +305.064170006259 0.612898 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +44.9786730186437 1.22229 +60.080502627062 7.49286 +66.0437770642009 0.558273 +89.0389762827395 0.770675 +93.0685346630029 1.27801 +97.0081619829197 0.620092 +155.05095854428 1.21892 +184.982357465125 1.15067 +201.985150352503 4.36145 +217.007693232671 34.3881 +217.142326145753 0.691866 +217.777462754003 0.594703 +233.949364018268 0.608734 +234.033444442759 43.8036 +234.170939895555 1.45411 +234.266306185337 0.709129 +234.446973264791 0.50215 +245.040889119846 5.53256 +262.065065029799 29.3922 +262.216955149263 0.962492 +262.256159541644 0.666858 +262.554118723247 0.61453 +265.772803809446 0.697841 +275.983899711319 0.750152 +276.082151079227 43.7561 +276.228300442401 1.3373 +276.358471196727 0.839477 +276.496974336094 0.850701 +276.870151672343 0.623484 +302.061868702526 0.574283 +303.171473501115 0.558465 +304.112131952565 100 +304.271424507472 2.49932 +304.346506890477 1.36044 +304.396633745243 1.12393 +304.463736263985 0.804514 +304.545625369793 1.46513 +304.627531774632 0.791871 +304.667973594992 0.921973 +304.753120777003 0.689718 +304.887949203341 0.74141 +304.961992800424 0.543668 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +60.0806956342986 6.95089 +107.0507933322 1.45398 +122.038019941374 0.690266 +137.036445495708 0.921484 +153.037659328639 2.01267 +155.050388191218 1.70558 +168.994436944206 0.659327 +181.062179481916 0.689959 +198.990907032287 0.560993 +201.986142248473 7.35205 +217.008123388435 100 +217.202961730873 1.39315 +217.250115067086 0.841799 +217.289611129865 0.954416 +217.337038098768 0.773813 +217.416931133978 0.780527 +217.483566774497 0.814065 +217.544830112118 0.500186 +217.592650150956 0.764307 +217.62174726279 0.687463 +217.698214725358 0.626085 +217.746688949009 0.506061 +234.034250693496 23.5383 +234.174126029927 0.930737 +245.039115266466 1.1936 +262.072148289821 2.80339 +276.080866409055 5.24949 +304.127132577215 0.553538 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +41.0373655623447 0.678316 +43.0563869377061 3.48729 +44.98010424493 2.76905 +45.0345844648825 0.724891 +58.9961382414219 1.6349 +60.0808185729752 2.25361 +61.0093645954414 0.736221 +63.0209266675974 1.6036 +70.991170524654 0.868987 +77.0369769527074 5.21108 +78.0456839581519 1.45672 +79.0532797425774 6.24083 +79.9876343134077 2.92077 +80.0162882621825 0.882763 +80.0583924576778 2.48011 +91.0532283355914 6.55696 +93.0682288735009 3.30953 +97.0098483764883 7.93097 +103.051883914665 1.18801 +104.061248199871 3.66853 +107.04853708493 11.0279 +107.122657075293 0.708598 +108.058739947946 5.82079 +109.012866136004 0.809124 +109.062108194616 2.64306 +109.111169039368 0.743556 +110.016096342062 4.08924 +110.089625614303 0.732251 +111.029389985159 0.88487 +121.008449956042 4.18045 +121.069384445049 0.967542 +122.018635460722 6.2033 +125.043188354404 4.26886 +135.027391077605 1.62138 +137.040327820783 5.54097 +138.014045001575 5.60561 +139.023642937351 4.24085 +139.065211404316 0.683805 +153.038619504348 11.9357 +153.153114572674 0.79221 +155.042808287725 0.69385 +168.994382362977 11.8495 +169.074603365462 0.957029 +173.044335898195 0.82993 +184.979382254817 15.0456 +198.99631202887 8.75143 +199.038782979453 1.92313 +199.072523169161 0.876825 +200.991645312594 4.74178 +201.98532029687 100 +202.102917898371 2.68386 +202.159583449893 1.71609 +202.204196347479 1.25314 +202.339318433618 1.11551 +202.394767326646 0.739942 +202.443229932717 0.753744 +202.475619066146 0.739942 +202.66965532049 0.861184 +205.794851189276 0.756353 +214.990111250182 2.6193 +217.006546160629 29.1013 +217.144968795049 0.84117 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +78.959206994795 1.17107 +120.020420337094 1.46364 +138.017479477655 0.826264 +153.039085640222 1.70696 +274.06525385257 3.57423 +287.085608175156 1.0351 +302.098567059025 100 +302.249836115228 2.85995 +302.328996113531 1.73623 +302.482910923945 0.675211 +302.530408878849 0.739671 +302.584559068866 0.971337 +302.684199548025 0.730804 +302.761786043215 0.677927 +302.89026318984 0.562676 +302.997873820654 0.550702 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +35.5463645644266 0.673521 +78.9595863363869 37.0713 +79.0195947683768 1.95413 +79.0389095788722 1.03395 +89.4920235704049 0.875759 +120.021252387545 17.1526 +120.07281904742 1.39131 +137.030443781977 2.71992 +138.01343130386 11.2971 +138.096411454414 0.873356 +153.037838806059 27.596 +153.126110639235 1.31215 +153.188685608368 0.711089 +164.053799526657 3.57409 +179.077765656653 13.7638 +179.194006709579 0.715417 +180.039497491322 0.994011 +194.094631004874 1.60065 +195.097471048395 0.807793 +238.492564846967 2.50561 +245.025068986566 1.30165 +260.04944737349 10.546 +272.046105805112 1.41195 +274.066011312531 19.493 +274.200261491036 1.00663 +274.336769579912 0.711205 +287.075769329475 37.203 +287.231837405284 1.14899 +287.265161001992 1.07381 +287.352901584525 0.786043 +287.461927233812 0.771904 +288.070073356612 0.683951 +302.097983452328 100 +302.253239256033 3.22555 +302.316460113375 1.71391 +302.405346072514 1.68753 +302.463200570237 1.2333 +302.565998230259 0.95312 +302.677307049746 0.891158 +302.862894685201 0.717397 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +77.9750692578754 3.64397 +78.9600674310831 45.8777 +79.0498687490102 1.40745 +79.1592966109984 0.987715 +120.021393178255 45.6758 +120.09909436168 1.59982 +120.129760085801 1.11835 +120.304677806762 1.15412 +138.013897381165 100 +138.138497610772 1.78092 +138.176534894251 1.59556 +138.24360468358 1.33601 +138.301021703104 1.10917 +138.361878961099 1.18286 +138.404440693883 1.1529 +153.038593003189 51.6604 +153.130546619278 1.84862 +153.204426242882 1.42176 +153.379942145702 1.03936 +153.49620624009 0.949421 +153.53022619994 1.01674 +153.672236549089 1.01538 +154.034118698176 1.03631 +164.054103232397 21.867 +164.170879376948 0.946189 +164.237469205875 0.972983 +164.374670481066 1.17961 +179.077503650912 74.3063 +179.201073480419 2.78144 +179.243364912896 1.10357 +179.290992553709 1.06313 +179.331906077403 1.08566 +180.076281566743 1.26724 +194.098562887349 0.967325 +198.980310546399 2.63557 +214.011248664908 3.59261 +232.02096751167 1.52259 +240.881041108319 1.39231 +245.02712875972 8.0443 +260.050931656735 1.08436 +272.046856492407 4.18004 +274.06812673103 1.93704 +287.075359726791 12.5423 +302.102148774802 5.18163 +END IONS + +BEGIN IONS +SCANS= +TITLE=msLevel 2; retentionTime ; scanNum ; precMz ; precCharge +RTINSECONDS= +PEPMASS= +62.9642641052092 1.79585 +77.9766887688876 5.87701 +78.0297410268116 0.549259 +78.9609323888576 10.523 +120.023069965362 6.18028 +120.075093225117 0.507187 +120.266271023807 0.50876 +122.020544783348 0.570363 +137.029417901464 2.74659 +138.01406986567 100 +138.115341073187 3.32674 +138.158336051917 1.55115 +138.268477439604 1.10065 +138.320008856618 0.722308 +138.379581619977 1.40044 +138.410345397643 0.694243 +138.443150681802 0.821275 +138.470138306916 0.530852 +138.569923063962 0.558053 +138.663850190676 0.613797 +138.805619444816 0.614577 +139.020788806501 0.686562 +140.167698892723 0.504243 +153.034137557971 0.651121 +164.05436557369 30.2366 +164.140603932028 1.6071 +164.172705958657 1.11177 +164.212806602078 0.616252 +164.331373060423 0.508496 +164.36660442618 0.682171 +164.411035369379 0.706866 +164.580046859168 0.584937 +165.059485949051 2.33449 +198.980359247052 0.816632 +END IONS diff --git a/longtests/testthat.R b/longtests/testthat.R new file mode 100644 index 000000000..d3b0c5c5a --- /dev/null +++ b/longtests/testthat.R @@ -0,0 +1,76 @@ +library(testthat) +library(xcms) +library(faahKO) +library(msdata) + +attr(faahko, "filepaths") <- sapply( + as.list(basename(attr(faahko, "filepaths"))), + function(x) system.file("cdf", if (length(grep("ko",x)) > 0) "KO" else "WT", + x, package = "faahKO")) +if (.Platform$OS.type == "unix") { + prm <- MulticoreParam(3) +} else { + prm <- SnowParam(3) +} +register(bpstart(prm)) + +## Create some objects we can re-use in different tests: +faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +## An xcmsRaw for the first file: +faahko_xr_1 <- xcmsRaw(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + profstep = 0) +faahko_od <- readMSData(faahko_3_files, mode = "onDisk") +faahko_xod <- findChromPeaks( + faahko_od, param = CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000))) +faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), + method = "centWave", noise = 10000, snthresh = 40, + prefilter = c(3, 10000)) +faahko_xsg <- group(faahko_xs) +## Doing also the retention time correction etc +od_x <- faahko_od +mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +od_chrs <- chromatogram(od_x, mz = mzr) +xod_x <- faahko_xod +pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) +xod_xg <- groupChromPeaks(xod_x, param = pdp) +xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) +xod_xgrg <- groupChromPeaks(xod_xgr, param = pdp) +xod_r <- adjustRtime(as(od_x, "XCMSnExp"), param = ObiwarpParam()) + +xod_chr <- findChromPeaks(filterMz(filterRt(od_x, rt = c(2500, 3500)), + mz = c(334.9, 344.1)), + param = CentWaveParam()) + +faahko_grouped_filled <- fillPeaks(group(faahko)) +faahko_grouped_retcor_filled <- + fillPeaks(group(retcor(group(updateObject(faahko))))) + +microtofq_fs <- c(system.file("microtofq/MM14.mzML", package = "msdata"), + system.file("microtofq/MM8.mzML", package = "msdata")) +microtofq_xr <- xcmsRaw(microtofq_fs[1], profstep = 0) +microtofq_od <- readMSData(microtofq_fs, mode = "onDisk") + +## Direct injection data: +fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), + recursive = TRUE, full.names = TRUE) +fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") +fticr_xod <- findChromPeaks(fticr, MSWParam(scales = c(1, 7), + peakThr = 80000, ampTh = 0.005, + SNR.method = "data.mean", + winSize.noise = 500)) +fticr_xs <- xcmsSet(method="MSW", files=fticrf[1:2], scales=c(1,7), + SNR.method='data.mean' , winSize.noise=500, + peakThr=80000, amp.Th=0.005) + +fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO"), + system.file('cdf/KO/ko19.CDF', package = "faahKO")) +xs_1 <- xcmsSet(fs, profparam = list(step = 0), method = "centWave", + noise = 10000, snthresh = 50, prefilter = c(3, 10000)) + +test_check("xcms") diff --git a/longtests/testthat/test_do_adjustRtime-functions.R b/longtests/testthat/test_do_adjustRtime-functions.R new file mode 100644 index 000000000..690bafc18 --- /dev/null +++ b/longtests/testthat/test_do_adjustRtime-functions.R @@ -0,0 +1,223 @@ +test_that("getPeakGroupsRtMatrix works", { + param <- PeakGroupsParam() + nSamples <- length(fileNames(xod_xg)) + pkGrp <- .getPeakGroupsRtMatrix( + peaks = chromPeaks(xod_xg), + peakIndex = .peakIndex(xod_xg), + sampleIndex = seq_len(nSamples), + missingSample = nSamples - (nSamples * minFraction(param)), + extraPeaks = extraPeaks(param) + ) + ## expect_equal(colnames(pkGrp), colnames(chromPeaks(xod_xg))) + fts <- featureDefinitions(xod_xg)[rownames(pkGrp), ] + expect_true(all(pkGrp[, 1] >= fts$rtmin & pkGrp[, 1] <= fts$rtmax)) + expect_true(all(pkGrp[, 2] >= fts$rtmin & pkGrp[, 2] <= fts$rtmax)) + expect_true(all(pkGrp[, 3] >= fts$rtmin & pkGrp[, 3] <= fts$rtmax)) +}) + +test_that("do_adjustRtime_peakGroups works", { + xs <- faahko + xsg <- group(xs) + misSamp <- 1 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + expect_error(do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, + subset = "4")) + expect_error(do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, + subset = 4L)) + expect_error(do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, + subset = c(1, 2, 5, 14))) + + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr) + res_orig <- xcms:::do_adjustRtime_peakGroups_orig(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr) + expect_equal(res, res_orig) + expect_equal(xsa@rt$corrected, res) + ## Use only a subset. + res_sub <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, + subset = c(1, 3, 5, 7, 9, 11)) + ## Change settings. + misSamp <- 3 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr) + expect_equal(xsa@rt$corrected, res) + misSamp <- 2 + xtr <- 2 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr) + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr) + expect_equal(xsa@rt$corrected, res) + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + smooth = "linear") + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + smooth = "linear") + expect_equal(xsa@rt$corrected, res) + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + family = "symmetric") + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + family = "symmetric") + expect_equal(xsa@rt$corrected, res) + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + span = 1) + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + span = 1) + expect_equal(xsa@rt$corrected, res) +}) + +test_that("applyRtAdjustment works", { + xs <- faahko + ## group em. + xsg <- group(xs) + ## align em. + xsa <- retcor(xsg, method = "peakgroups") + pksAdj <- .applyRtAdjToChromPeaks(peaks(xsg), + rtraw = xsa@rt$raw, + rtadj = xsa@rt$corrected) + expect_equal(pksAdj, peaks(xsa)) + ## Reset em. + pksRaw <- .applyRtAdjToChromPeaks(pksAdj, + rtraw = xsa@rt$corrected, + rtadj = xsa@rt$raw) + expect_equal(pksRaw, peaks(xsg)) + + rt_raw <- rtime(xod_xgr, adjusted = FALSE, bySample = TRUE)[[1]] + rt_adj <- rtime(xod_xgr, bySample = TRUE)[[1]] + + rt_new <- xcms:::.applyRtAdjustment(rt_raw, rt_raw, rt_adj) + expect_equal(unname(rt_new), unname(rt_adj)) + + rt_new2 <- .applyRtAdjustment(rt_raw, rt_raw[200:1000], rt_adj[200:1000]) + + ## Artificial examples. + a_raw <- c(1, 2, 3, 5, 6, 7, 8, 10, 12, 13, 14, 16) + a_adj <- a_raw + 2 # shift by 2 + b <- .applyRtAdjustment(a_raw, a_raw, a_adj) + expect_equal(a_adj, b) + b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8]) + expect_equal(b, b_2) + + a_adj <- a_raw - 2 + b <- .applyRtAdjustment(a_raw, a_raw, a_adj) + expect_equal(a_adj, b) + b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8]) + expect_equal(b, b_2) +}) + +test_that(".get_closest_index works", { + expect_equal(.get_closest_index(2, c(1, 3, 5, 7)), 3) + expect_equal(.get_closest_index(2, c(1, 3, 5, 7), method = "previous"), 1) + expect_equal(.get_closest_index(2, c(1, 3, 5, 7), method = "closest"), 1) + expect_equal(.get_closest_index(6, c(1, 3, 5)), 5) + expect_equal(.get_closest_index(6, c(1, 3, 5), method = "previous"), 5) + expect_equal(.get_closest_index(6, c(1, 3, 5), method = "closest"), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5)), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5), method = "previous"), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5), method = "closest"), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9)), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9), method = "previous"), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9), method = "closest"), 5) + expect_equal(.get_closest_index(2, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(2, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(2, c(1, 5, 9), method = "closest"), 1) + expect_equal(.get_closest_index(3, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(3, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(3, c(1, 5, 9), method = "closest"), 1) + expect_equal(.get_closest_index(4, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(4, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(4, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(6, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(6, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(6, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(7, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(7, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(7, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(8, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(8, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(8, c(1, 5, 9), method = "closest"), 9) +}) + +test_that(".match_trim_vectors and index works", { + res <- .match_trim_vectors(list(1:10, 3:10)) + expect_equal(res, list(3:10, 3:10)) + res <- .match_trim_vectors(list(3:10, 4:15)) + expect_equal(res, list(3:10, 4:11)) + res <- .match_trim_vectors(list(1:5, 1:20)) + expect_equal(res, list(1:5, 1:5)) + res <- .match_trim_vector_index(list(1:10, 3:10)) + expect_equal(res, list(3:10, 1:8)) + res <- .match_trim_vector_index(list(2:10, 2:8)) + expect_equal(res, list(1:7, 1:7)) +}) + +test_that("adjustRtimeSubset works", { + rt_raw <- rtime(xod_xgr, adjusted = FALSE, bySample = TRUE) + rt_adj <- rtime(xod_xgr, adjusted = TRUE, bySample = TRUE) + + res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3), + method = "previous") + expect_equal(res[[1]], rt_adj[[1]]) + expect_equal(res[[3]], rt_adj[[3]]) + expect_true(all(res[[2]] != rt_adj[[2]])) + expect_equal(names(res[[2]]), names(rt_adj[[2]])) + expect_equal(unname(res[[2]]), unname(rt_adj[[1]])) + + a <- res[[1]] - rt_raw[[1]] + b <- res[[2]] - rt_raw[[2]] + c <- res[[3]] - rt_raw[[3]] + plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2, + ylim = range(a, b, c)) + points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1) + points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2) + + res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3), + method = "average") + expect_equal(res[[1]], rt_adj[[1]]) + expect_equal(res[[3]], rt_adj[[3]]) + expect_true(all(res[[2]] != rt_adj[[2]])) + expect_true(all(res[[2]] != rt_adj[[1]])) + expect_true(all(res[[2]] != rt_adj[[3]])) + + a <- res[[1]] - rt_raw[[1]] + b <- res[[2]] - rt_raw[[2]] + c <- res[[3]] - rt_raw[[3]] + plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2, + ylim = range(a, b, c)) + points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1) + points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2) +}) diff --git a/longtests/testthat/test_do_findChromPeaks-functions.R b/longtests/testthat/test_do_findChromPeaks-functions.R new file mode 100644 index 000000000..d6c55acdf --- /dev/null +++ b/longtests/testthat/test_do_findChromPeaks-functions.R @@ -0,0 +1,72 @@ +test_that("do_findChromPeaks_centWave works", { + ## xr <- xcmsRaw(fs[1], profstep = 0) + ## We expect that changing a parameter has an influence on the result. + xr <- deepCopy(faahko_xr_1) + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res1 <- do_findChromPeaks_centWave(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + snthresh = 200, + noise = 4000, + prefilter = c(3, 10000)) + ## Eventually disable the sleep option to improve speed! + res2 <- do_findChromPeaks_centWave(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + snthresh = 500, + noise = 4000, sleep = 0.00, + prefilter = c(3, 10000)) + expect_true(nrow(res1) > nrow(res2)) + + ## Check scanrange on findPeaks.centWave. + res_1 <- findPeaks.centWave(xr, scanrange = c(90, 345), noise = 2000, + prefilter = c(3, 10000)) + xr <- xr[90:345] + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res_2 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, + scantime = xr@scantime, valsPerSpect, + noise = 2000, prefilter = c(3, 10000)) + expect_equal(res_1@.Data, res_2) +}) + +test_that("do_findChromPeaks_massifquant works", { + xr <- deepCopy(faahko_xr_1) + res <- findPeaks.massifquant(xr, snthresh = 100) + mz <- xr@env$mz + int <- xr@env$intensity + valsPerSpect <- diff(c(xr@scanindex, length(mz))) + scantime <- xr@scantime + res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, + valsPerSpect = valsPerSpect, + scantime = scantime) + expect_equal(res@.Data, res_2) + ## With centWave: + res_3 <- do_findChromPeaks_massifquant(mz = mz, int = int, + valsPerSpect = valsPerSpect, + scantime = scantime, withWave = TRUE, + snthresh = 100, noise = 4000) + res_4 <- findPeaks.massifquant(xr, withWave = 1, snthresh = 100, + noise = 4000) + expect_equal(res_3, res_4@.Data) + expect_true(nrow(res_3) < nrow(res_2)) + + ## Subsetted data and scanrange: + res_1 <- findPeaks.massifquant(xr, scanrange = c(90, 345)) + xsub <- xr[90:345] + mz <- xsub@env$mz + int <- xsub@env$intensity + valsPerSpect <- diff(c(xsub@scanindex, length(mz))) + scantime <- xsub@scantime + res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, + valsPerSpect = valsPerSpect, + scantime = scantime) + expect_identical(res_1@.Data, res_2) +}) diff --git a/longtests/testthat/test_do_groupChromPeaks-functions.R b/longtests/testthat/test_do_groupChromPeaks-functions.R new file mode 100644 index 000000000..04b5a6656 --- /dev/null +++ b/longtests/testthat/test_do_groupChromPeaks-functions.R @@ -0,0 +1,29 @@ +test_that("do_groupPeaks_mzClust works", { + fts <- peaks(fticr_xs) + res <- do_groupPeaks_mzClust(peaks = fts, + sampleGroups = sampclass(fticr_xs)) + res_2 <- do_groupPeaks_mzClust(peaks = fts, + sampleGroups = sampclass(fticr_xs), + minFraction = 0, absMz = 2) + expect_true(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) + + res_x <- group(fticr_xs, method = "mzClust") + expect_equal(res_x@groups, res$featureDefinitions) + expect_equal(res_x@groupidx, res$peakIndex) +}) + +test_that("do_groupChromPeaks_nearest works", { + xs <- faahko_xs + features <- peaks(xs) + sampleGroups <- sampclass(xs) + mzVsRtBalance <- 10 + mzCheck <- 0.2 + rtCheck <- 15 + kNN <- 10 + + res <- do_groupChromPeaks_nearest(features, sampleGroups) + res_2 <- do_groupChromPeaks_nearest(features, sampleGroups, absRt = 3) + expect_true(nrow(res$featureDefinitions) < nrow(res_2$featureDefinitions)) + res_x <- group(xs, method = "nearest") + expect_equal(res_x@groups, res$featureDefinitions) +}) diff --git a/longtests/testthat/test_functions-OnDiskMSnExp.R b/longtests/testthat/test_functions-OnDiskMSnExp.R new file mode 100644 index 000000000..e34c421b3 --- /dev/null +++ b/longtests/testthat/test_functions-OnDiskMSnExp.R @@ -0,0 +1,32 @@ +test_that(".obiwarp works", { + + xs <- faahko_xs + od <- faahko_od + xod <- faahko_xod + ## Feature alignment on those: + ## object <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 10000, + ## snthresh = 40)) + prm <- ObiwarpParam(binSize = 1) + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm)) + expect_equal(xs_2@rt$raw[[2]], xs_2@rt$corrected[[2]]) + expect_true(sum(xs_2@rt$raw[[1]] != xs_2@rt$corrected[[1]]) > 500) + expect_true(sum(xs_2@rt$raw[[3]] != xs_2@rt$corrected[[3]]) > 500) + + ## Manually specify center Sample + centerSample(prm) <- 3 + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), center = centerSample(prm)) + expect_equal(xs_2@rt$raw[[centerSample(prm)]], + xs_2@rt$corrected[[centerSample(prm)]]) + res <- .obiwarp(od, param = prm) + expect_equal(res[[3]], unname(rtime(xod, bySample = TRUE)[[3]])) + expect_equal(xs_2@rt$corrected, res) + ## change some settings + gapInit(prm) <- 3.1 + gapExtend(prm) <- 0.9 + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), gapInit = gapInit(prm), + center = centerSample(prm), gapExtend = gapExtend(prm)) + expect_equal(xs_2@rt$raw[[centerSample(prm)]], + xs_2@rt$corrected[[centerSample(prm)]]) + res <- .obiwarp(od, param = prm) + expect_equal(xs_2@rt$corrected, res) +}) diff --git a/longtests/testthat/test_methods-OnDiskMSnExp.R b/longtests/testthat/test_methods-OnDiskMSnExp.R new file mode 100644 index 000000000..567189ebc --- /dev/null +++ b/longtests/testthat/test_methods-OnDiskMSnExp.R @@ -0,0 +1,133 @@ +test_that("profMat,OnDiskMSnExp works", { + ## Get it from all 3 files in one go. + res <- profMat(faahko_od, step = 2) + res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2) + expect_equal(res_2, res[[2]]) + res_2 <- profMat(xcmsRaw(faahko_3_files[3], profstep = 0), step = 2) + expect_equal(res_2, res[[3]]) + res_2 <- profMat(faahko_xod, step = 2) + expect_equal(res, res_2) + res <- profMat(faahko_od, step = 2, method = "binlin", fileIndex = 2) + res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2, + method = "binlin") + expect_equal(res_2, res[[1]]) + + ## Simulating issue #312 + od_1 <- filterFile(microtofq_od, 1) + od_1_clnd <- clean(removePeaks(od_1, t = 1800)) + res_clnd <- profMat(od_1_clnd) +}) + +test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam works", { + fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + xr <- deepCopy(faahko_xr_1) + onDisk <- filterFile(faahko_od, file = 1) + ppm <- 40 + snthresh <- 40 + res_x <- findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh, + noise = 100000)@.Data + ## Bypass xcmsRaw + xs <- xcmsSet(fs[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave", + noise = 100000) + expect_equal(xs@peaks[, colnames(res_x)], res_x) + ## OnDiskMSnExp + ## onDisk <- readMSData(fs[1], msLevel. = 1, mode = "onDisk") + cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh, noise = 100000, + prefilter = c(3, 10000)) + res <- findChromPeaks(onDisk, param = cwp, return.type = "list") + expect_equal(res[[1]], peaks(xs)@.Data) + + expect_error(findChromPeaks(onDisk, param = cwp, msLevel = 2)) + + ## returning an xcmsSet + res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") + pks <- peaks(res) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xs))], peaks(xs)) + expect_true(is(res, "xcmsSet")) + + ## Return type XCMSnExp + res <- findChromPeaks(onDisk, param = cwp) + expect_true(hasChromPeaks(res)) + expect_true(!hasAdjustedRtime(res)) + expect_true(!hasFeatures(res)) + pks <- chromPeaks(res) + rownames(pks) <- NULL + expect_equal(peaks(xs)@.Data, pks[, !colnames(pks) %in% c("is_filled", "ms_level")]) + + ## check that rownames are set + expect_true(!is.null(rownames(chromPeaks(res)))) + expect_true(length(grep("CP", rownames(chromPeaks(res)))) == + nrow(chromPeaks(res))) +}) + +test_that("findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam works", { + fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + xr <- deepCopy(faahko_xr_1) + snth <- 20 + ns <- 2500 + snthIso <- 5 + res_x <- findPeaks.centWaveWithPredictedIsotopeROIs(xr, noise = ns, + snthresh = snth, + snthreshIsoROIs = snthIso)@.Data + ## Bypass xcmsRaw + xs <- xcmsSet(fs[1], profparam = list(profstep = 0), snthresh = snth, + method = "centWaveWithPredictedIsotopeROIs", noise = ns, + snthreshIsoROIs = snthIso) + expect_equal(xs@peaks[, colnames(res_x)], res_x) + ## OnDiskMSnExp + onDisk <- readMSData(fs[1], msLevel. = 1, mode = "onDisk") + cwp <- CentWavePredIsoParam(snthresh = snth, noise = ns, + snthreshIsoROIs = snthIso) + res <- findChromPeaks(onDisk, param = cwp, return.type = "list") + expect_equal(res[[1]], peaks(xs)@.Data) + expect_error(findChromPeaks(onDisk, param = cwp, msLevel = 2)) + + ## returning an xcmsSet + res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") + pks <- peaks(res) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xs))], peaks(xs)) + expect_true(is(res, "xcmsSet")) + + ## Return an XCMSnExp + res <- findChromPeaks(onDisk, param = cwp) + expect_true(hasChromPeaks(res)) + expect_true(!hasAdjustedRtime(res)) + expect_true(!hasFeatures(res)) + pks <- chromPeaks(res) + rownames(pks) <- NULL + expect_equal(peaks(xs)@.Data, pks[, colnames(peaks(xs)@.Data)]) +}) + +test_that("findChromPeaks,OnDiskMSnExp,MassifquantParam works", { + mzf <- system.file("microtofq/MM14.mzML", package = "msdata") + mqp <- MassifquantParam(ppm = 20, criticalValue = 1.2) + res <- xcmsSet(mzf[1], method = "massifquant", ppm = 20, + criticalValue = 1.2) + ## onDisk + onDisk <- readMSData(mzf[1], mode = "onDisk") + res_o <- findChromPeaks(onDisk, param = mqp, return.type = "xcmsSet") + expect_equal(unname(peaks(res_o)[, colnames(peaks(res))]), + unname(peaks(res))) + expect_equal(unname(res_o@rt$raw[[1]]), unname(res@rt$raw[[1]])) + + expect_error(findChromPeaks(onDisk, param = mqp, msLevel = 2)) +}) + +test_that("findChromPeaks,OnDiskMSnExp,MatchedFilterParam works", { + fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + mfp <- MatchedFilterParam(binSize = 20, impute = "lin") + res <- xcmsSet(fs[1], method = "matchedFilter", profmethod = "binlin", + step = binSize(mfp)) + ## onDisk + ## onDisk <- readMSData(fs[1], mode = "onDisk") + onDisk <- filterFile(faahko_od, file = 1) + res_o <- findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet") + expect_equal(unname(peaks(res_o)[, colnames(peaks(res))]), + unname(peaks(res))) + expect_equal(unname(res_o@rt$raw[[1]]), unname(res@rt$raw[[1]])) + + expect_error(findChromPeaks(onDisk, param = mfp, msLevel = 2)) +}) diff --git a/longtests/testthat/test_methods-XCMSnExp.R b/longtests/testthat/test_methods-XCMSnExp.R new file mode 100644 index 000000000..7b3054d62 --- /dev/null +++ b/longtests/testthat/test_methods-XCMSnExp.R @@ -0,0 +1,472 @@ +test_that("XCMSnExp, XCMSnExp works", { + rts <- rtime(faahko_od) + rts_2 <- rtime(od_x) + expect_equal(rts, rts_2) + ## Test with bySample. + rts_3 <- rtime(xod_x, bySample = TRUE) + expect_equal(rts_3, split(rts, f = fromFile(faahko_od))) + ## Check if rtimes are correctly ordered for bySample + rts_4 <- rtime(filterFile(faahko_od, file = 2)) + expect_equal(rts_4, rts_3[[2]]) + rts_4 <- rtime(filterFile(faahko_od, file = 3)) + expect_equal(rts_4, rts_3[[3]]) + ## Compare with the values we get from an xcmsSet: + rtx <- faahko_xs@rt$raw + expect_equal(unlist(rtx, use.names = FALSE), + unlist(rtime(faahko_xod, bySample = TRUE), use.names = FALSE)) +}) + +test_that("featureValues,XCMSnExp works", { + fdp <- PeakDensityParam(sampleGroups = faahko_xs$class) + od_x <- groupChromPeaks(xod_x, param = fdp) + xs <- group(faahko_xs, method = "density") + fvs <- featureValues(od_x, value = "into") + expect_equal(rownames(fvs), rownames(featureDefinitions(od_x))) + rownames(fvs) <- NULL + colnames(fvs) <- NULL + gvs <- groupval(xs, value = "into") + rownames(gvs) <- NULL + colnames(gvs) <- NULL + expect_equal(fvs, gvs) + + ## Use the internal function + res <- xcms:::.feature_values(chromPeaks(od_x), featureDefinitions(od_x), + value = "into", method = "medret", + intensity = "into", + colnames = basename(fileNames(od_x))) + expect_equal(featureValues(od_x, value = "into"), res) + res <- xcms:::.feature_values(chromPeaks(od_x), featureDefinitions(od_x), + value = "into", method = "sum", + intensity = "into", + colnames = basename(fileNames(od_x))) + expect_equal(featureValues(od_x, value = "into", method = "sum"), res) + + + fsum <- featureSummary(xod_xg) + fv <- featureValues(xod_xg, method = "maxint", value = "into") + ## For feature 3 we have 2 peaks in sample 3 + idx <- unlist(featureDefinitions(xod_xg)[3, "peakidx"]) + pks <- chromPeaks(xod_xg)[idx, ] + expect_equal(max(pks[pks[, "sample"] == 3, "into"]), fv[3, 3]) + ## For feature 37 we have 2 peaks per sample + idx <- unlist(featureDefinitions(xod_xg)[37, "peakidx"]) + pks <- chromPeaks(xod_xg)[idx, ] + expect_equal(max(pks[pks[, "sample"] == 1, "into"]), fv[37, 1]) + expect_equal(max(pks[pks[, "sample"] == 2, "into"]), fv[37, 2]) + expect_equal(max(pks[pks[, "sample"] == 3, "into"]), fv[37, 3]) + + ## method sum + fv <- featureValues(xod_xg, method = "sum", value = "into") + ## For feature 3 we have 2 peaks in sample 3 + idx <- unlist(featureDefinitions(xod_xg)[3, "peakidx"]) + pks <- chromPeaks(xod_xg)[idx, ] + expect_equal(sum(pks[pks[, "sample"] == 3, "into"]), fv[3, 3]) + ## For feature 37 we have 2 peaks per sample + idx <- unlist(featureDefinitions(xod_xg)[37, "peakidx"]) + pks <- chromPeaks(xod_xg)[idx, ] + expect_equal(sum(pks[pks[, "sample"] == 1, "into"]), fv[37, 1]) + expect_equal(sum(pks[pks[, "sample"] == 2, "into"]), fv[37, 2]) + expect_equal(sum(pks[pks[, "sample"] == 3, "into"]), fv[37, 3]) + + ## missing + na_num <- sum(is.na(featureValues(od_x, value = "into"))) + res <- featureValues(od_x, value = "into", missing = 123) + expect_equal(sum(res == 123), na_num) + res <- featureValues(od_x, value = "into", missing = "rowmin_half") + res_na <- featureValues(od_x, value = "into") + is_na <- is.na(rowMeans(res_na)) + for (i in which(is_na)) { + are_na <- is.na(res_na[i, ]) + expect_true(all(res[i, are_na] == min(res_na[i, ], na.rm = TRUE) / 2)) + } + ## Check errors + expect_error(featureValues(od_x, value = "into", missing = "b")) + expect_error(featureValues(od_x, value = "into", missing = TRUE)) + + ## feature values with MS level > 1 + expect_error(featureValues(xod_xg, msLevel = 2), "No feature definitions") + ## Fake feature definitions for MS level 2 + cwp <- CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000)) + tmp <- xod_xg + fd <- new("MsFeatureData") + fd@.xData <- xcms:::.copy_env(tmp@msFeatureData) + chromPeakData(fd)$ms_level <- 2L + fd$featureDefinitions$ms_level <- 2L + lockEnvironment(fd, bindings = TRUE) + tmp@msFeatureData <- fd + expect_true(hasChromPeaks(tmp, msLevel = 2L)) + expect_true(hasFeatures(tmp, msLevel = 2L)) + expect_equal(featureValues(tmp, msLevel = 2L), featureValues(xod_xg)) + + tmp <- findChromPeaks(tmp, add = TRUE, param = cwp) + expect_equal(unname(chromPeaks(tmp, msLevel = 1L)[, "into"]), + unname(chromPeaks(tmp, msLevel = 2L)[, "into"])) + ## correspondence + pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + tmp <- groupChromPeaks(tmp, param = pdp, msLevel = 1L) + tmp <- groupChromPeaks(tmp, param = pdp, msLevel = 2L, add = TRUE) + expect_true(hasFeatures(tmp, msLevel = 1L)) + expect_true(hasFeatures(tmp, msLevel = 2L)) + + all <- featureValues(tmp) + ms1 <- featureValues(tmp, msLevel = 1L) + ms2 <- featureValues(tmp, msLevel = 2L) + expect_equal(all, rbind(ms1, ms2)) + rownames(ms1) <- rownames(ms2) <- NULL + expect_equal(ms1, ms2) +}) + +test_that("adjustRtime,peakGroups works", { + xod <- faahko_xod + xs <- faahko_xs + ## Group these + xsg <- group(xs) + xodg <- groupChromPeaks(xod, + param = PeakDensityParam(sampleGroups = xs$class)) + pks <- chromPeaks(xodg) + rownames(pks) <- NULL + expect_equal(peaks(xsg), pks[, colnames(peaks(xsg))]) + expect_equal(xsg@groupidx, featureDefinitions(xodg)$peakidx) + expect_true(length(processHistory(xodg, + type = .PROCSTEP.PEAK.DETECTION)) == 1) + expect_true(length(processHistory(xodg, + type = .PROCSTEP.PEAK.GROUPING)) == 1) + ## Now do the retention time correction + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 0.3) + ## minFr <- (length(fileNames(xod)) - 1) / length(fileNames(xod)) + p <- PeakGroupsParam(minFraction = 1, span = 0.3) + xodr <- adjustRtime(xodg, param = p) + ## Check that we've got process histories. + expect_true(validObject(xodr)) + expect_true(hasChromPeaks(xodr)) + expect_true(!hasFeatures(xodr)) + ## But we would like to keep the related process history step: + expect_true(hasAdjustedRtime(xodr)) + expect_true(hasFeatures(xodg)) + ## We want to keep the process history step of the feature alignment! + expect_true(length(processHistory(xodr, + type = .PROCSTEP.PEAK.GROUPING)) == 1) + expect_true(length(processHistory(xodr, + type = .PROCSTEP.RTIME.CORRECTION)) == 1) + ## Different from original: + expect_true(sum(chromPeaks(xod)[, "rt"] != chromPeaks(xodr)[, "rt"]) > 200) + expect_true(sum(chromPeaks(xod)[, "rtmin"] != chromPeaks(xodr)[, "rtmin"]) > 200) + expect_true(sum(chromPeaks(xod)[, "rtmax"] != chromPeaks(xodr)[, "rtmax"]) > 200) + ## between xcmsSet and XCMSnExp + pks <- chromPeaks(xodr) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) + ## To compare the adjusted retention time we have to extract it by sample! + ## Otherwise the ordering will not be the same, as rtime is ordered by + ## retention time, but @rt$raw by sample. + expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + ## Just to ensure - are the raw rt the same? + expect_equal(unlist(rtime(xod, bySample = TRUE), use.names = FALSE), + unlist(xs@rt$raw, use.names = FALSE)) + ## Check that we get the same by supplying the peakGroupsMatrix. + pgm <- adjustRtimePeakGroups(xodg, param = p) + p_2 <- p + minFraction(p_2) <- 0.5 + extraPeaks(p_2) <- 20 + peakGroupsMatrix(p_2) <- pgm + xodr_2 <- adjustRtime(xodg, param = p_2) + expect_equal(adjustedRtime(xodr), adjustedRtime(xodr_2)) + expect_equal(chromPeaks(xodr), chromPeaks(xodr_2)) + p_got <- processParam( + processHistory(xodr, type = .PROCSTEP.RTIME.CORRECTION)[[1]]) + peakGroupsMatrix(p_got) <- matrix(ncol = 0, nrow = 0) + expect_equal(p_got, p) + expect_equal(processParam( + processHistory(xodr_2, type = .PROCSTEP.RTIME.CORRECTION)[[1]]), + p_2) + ## Doing an additional grouping + xodrg <- groupChromPeaks(xodr, param = PeakDensityParam(sampleGroups = + xs$class)) + expect_true(length(processHistory(xodrg, + type = .PROCSTEP.PEAK.GROUPING)) == 2) + expect_true(hasAdjustedRtime(xodrg)) + expect_true(hasFeatures(xodrg)) + xsrg <- group(xsr) + expect_equal(xsrg@groupidx, featureDefinitions(xodrg)$peakidx) + + ## Mod settings: + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1) + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1)) + pks <- chromPeaks(xodr) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) + expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, + smooth = "linear") + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1, + smooth = "linear")) + pks <- chromPeaks(xodr) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) + expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, + family = "symmetric") + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1, + family = "symmetric")) + pks <- chromPeaks(xodr) + rownames(pks) <- NULL + expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) + expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + ## Dropping results. + tmp <- dropAdjustedRtime(xodr) + expect_equal(tmp, xod) + + ## With subset. + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = c(1, 3), + subsetAdjust = "previous")) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(xodg, bySample = TRUE)[[1]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[2]] != + rtime(xodg, bySample = TRUE)[[2]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] != + rtime(xodg, bySample = TRUE)[[3]])) + expect_equal(unname(rtime(res_sub, bySample = TRUE)[[1]]), + unname(rtime(res_sub, bySample = TRUE)[[2]])) + expect_equal(rtime(res_sub, bySample = TRUE)[[2]], + xcms:::.applyRtAdjustment(rtime(xodg, bySample = TRUE)[[2]], + rtime(xodg, bySample = TRUE)[[1]], + rtime(res_sub, bySample = TRUE)[[1]])) + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = c(1, 3), + subsetAdjust = "average")) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(xodg, bySample = TRUE)[[1]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[2]] != + rtime(xodg, bySample = TRUE)[[2]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] != + rtime(xodg, bySample = TRUE)[[3]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(res_sub, bySample = TRUE)[[2]])) + tmp <- adjustRtime(xodg, param = PeakGroupsParam()) + + ## With subsetAdjust = "average" and the left-out being at the end. + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = 1:2, subsetAdjust = "average")) + res_2 <- adjustRtime( + xodg, param = PeakGroupsParam(subset = 1:2, subsetAdjust = "previous")) + expect_equal(rtime(res_sub), rtime(res_2)) +}) + +test_that("featureValues,XCMSnExp works as with groupval", { + od_x <- faahko_xod + xs <- faahko_xs + + p <- PeakDensityParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = p) + + xs <- group(xs, method = "density") + + expect_equal(unname(groupval(xs, value = "into")), + unname(featureValues(od_x, value = "into"))) + expect_equal(unname(groupval(xs, method = "maxint", value = "into")), + unname(featureValues(od_x, method = "maxint", value = "into"))) + ## Checking errors + expect_error(featureValues(od_x, value = "bla")) +}) + +test_that("groupChromPeaks,XCMSnExp,PeakDensityParam works", { + od_x <- faahko_xod + xs <- faahko_xs + ## Check error if no features were found. issue #273 + pdp <- PeakDensityParam(sampleGroups = xs$class, minSamples = 30) + expect_warning(groupChromPeaks(od_x, param = pdp), "Unable to group any chromatographic peaks.") + + fdp <- PeakDensityParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = fdp) + xs <- group(xs, method = "density") + expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, !(colnames(fg) %in% c("peakidx", "ms_level"))]) + rownames(fg) <- NULL + expect_equal(xs@groups, fg) + expect_true(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), fdp) + expect_equal(rownames(featureDefinitions(od_x)), + .featureIDs(nrow(featureDefinitions(od_x)))) + + fdp2 <- PeakDensityParam(sampleGroups = xs$class, binSize = 2, + minFraction = 0.8) + od_x <- groupChromPeaks(od_x, param = fdp2) + xs <- group(xs, method = "density", minfrac = 0.8, mzwid = 2) + expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, !(colnames(fg) %in% c("peakidx", "ms_level"))]) + rownames(fg) <- NULL + expect_equal(xs@groups, fg) + expect_true(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), fdp2) + expect_equal(rownames(featureDefinitions(od_x)), + .featureIDs(nrow(featureDefinitions(od_x)))) + + pdp <- PeakDensityParam(sampleGroups = xs$class) + res <- groupChromPeaks(od_x, param = pdp) + res_2 <- groupChromPeaks(res, param = pdp) + expect_equal(featureDefinitions(res), featureDefinitions(res_2)) + res_2 <- groupChromPeaks(res, param = pdp, add = TRUE) + expect_true(nrow(featureDefinitions(res_2)) == + 2 * nrow(featureDefinitions(res))) + nr <- nrow(featureDefinitions(res)) + expect_equal(featureDefinitions(res), + featureDefinitions(res_2)[1:nr, ]) + expect_equal(featureDefinitions(res)$mzmed, + featureDefinitions(res_2)$mzmed[(nr + 1):(2 * nr)]) + expect_equal(featureDefinitions(res)$peakidx, + featureDefinitions(res_2)$peakidx[(nr + 1):(2 * nr)]) + + expect_error(groupChromPeaks(od_x, param = pdp, msLevel = 2), "MS level 2") + expect_error(groupChromPeaks(od_x, param = pdp, msLevel = 1:4), + "one MS level at a time") +}) + +test_that("groupPeaks,XCMSnExp,MzClustParam works", { + p <- MzClustParam(sampleGroups = sampclass(fticr_xs)) + fticr_xod2 <- groupChromPeaks(fticr_xod, param = p) + fticr_xs2 <- group(fticr_xs, method = "mzClust") + expect_equal(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) + fg <- featureDefinitions(fticr_xod2) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + expect_equal(fticr_xs2@groups, fg) + expect_true(length(processHistory(fticr_xod2)) == 2) + ph <- processHistory(fticr_xod2, + type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), p) + expect_equal(rownames(featureDefinitions(fticr_xod2)), + .featureIDs(nrow(featureDefinitions(fticr_xod2)))) + p2 <- MzClustParam(sampleGroups = fticr_xs$class, absMz = 1, + minFraction = 0.8) + fticr_xod2 <- groupChromPeaks(fticr_xod, param = p2) + fticr_xs2 <- group(fticr_xs, method = "mzClust", minfrac = 0.8, mzabs = 1) + expect_equal(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) + fg <- featureDefinitions(fticr_xod2) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + expect_equal(fticr_xs2@groups, fg) + expect_true(length(processHistory(fticr_xod2)) == 2) + ph <- processHistory(fticr_xod2, + type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), p2) + expect_equal(rownames(featureDefinitions(fticr_xod2)), + .featureIDs(nrow(featureDefinitions(fticr_xod2)))) +}) + +test_that("groupChromPeaks,XCMSnExp,NearestPeaksParam works", { + od_x <- faahko_xod + xs <- faahko_xs + p <- NearestPeaksParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = p) + xs <- group(xs, method = "nearest") + expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, !(colnames(fg) %in% c("peakidx", "ms_level"))]) + rownames(fg) <- NULL + expect_equal(xs@groups, fg) + expect_true(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), p) + expect_equal(rownames(featureDefinitions(od_x)), + .featureIDs(nrow(featureDefinitions(od_x)))) + fdp2 <- NearestPeaksParam(sampleGroups = xs$class, kNN = 3) + od_x <- groupChromPeaks(od_x, param = fdp2) + xs <- group(xs, method = "nearest", kNN = 3) + expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, !(colnames(fg) %in% c("peakidx", "ms_level"))]) + rownames(fg) <- NULL + expect_equal(xs@groups, fg) + expect_true(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), fdp2) + expect_equal(rownames(featureDefinitions(od_x)), + .featureIDs(nrow(featureDefinitions(od_x)))) + + expect_error(groupChromPeaks(od_x, param = p, msLevel = 2), "MS level 2") + expect_error(groupChromPeaks(od_x, param = p, msLevel = 1:3), " at a time") + res <- groupChromPeaks(od_x, param = p) + res_2 <- groupChromPeaks(res, param = p) + expect_equal(featureDefinitions(res), featureDefinitions(res_2)) + res_2 <- groupChromPeaks(res, param = p, add = TRUE) + expect_true(nrow(featureDefinitions(res_2)) == + 2 * nrow(featureDefinitions(res))) + nr <- nrow(featureDefinitions(res)) + expect_equal(featureDefinitions(res), featureDefinitions(res)[1:nr, ]) + expect_equal(featureDefinitions(res)$peakidx, + featureDefinitions(res_2)$peakidx[(nr + 1):(2 * nr)]) + expect_equal(featureDefinitions(res)$mzmed, + featureDefinitions(res_2)$mzmed[(nr + 1):(2 * nr)]) +}) + +test_that("fillChromPeaks,XCMSnExp with MSW works", { + p <- MzClustParam() + fticr_xodg <- groupChromPeaks(fticr_xod, param = p) + expect_error(res <- fillChromPeaks(fticr_xod)) + res <- fillChromPeaks(fticr_xodg) + + ## Got a signal for all of em. + expect_true(!any(is.na(featureValues(res)))) + ## 1) Compare with what I get for xcmsSet. + tmp_x <- fticr_xs + tmp_x <- group(tmp_x, method = "mzClust") + tmp_x <- fillPeaks(tmp_x, method = "MSW") + ## Compare + expect_equal(unname(groupval(tmp_x)), + unname(featureValues(res, value = "index"))) + expect_equal(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(res, value = "maxo"))) + expect_equal(unname(groupval(tmp_x, value = "into")), + unname(featureValues(res, value = "into"))) + expect_equal(unname(groupval(tmp_x, value = "mz")), + unname(featureValues(res, value = "mz"))) + expect_equal(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(res, value = "mzmin"))) + expect_equal(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(res, value = "mzmax"))) + ## OK + ## 2) Check if the fillChromPeaks returns same/similar data than the + ## findChromPeaks does: + fdef <- featureDefinitions(fticr_xodg) + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(fticr_xodg)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(median(tmp[, 1]), median(tmp[, 2]), + median(tmp[, 3]), median(tmp[, 4])) + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = fdef$mzmed) + ## Get peak data for all peaks in the first file + allPks <- .getMSWPeakData(filterFile(fticr_xodg, file = 1), + peakArea = pkArea, + sample_idx = 1, + cn = colnames(chromPeaks(fticr_xodg))) + curP <- chromPeaks(res)[chromPeaks(res)[, "sample"] == 1, ] + curP <- curP[order(curP[, "mz"]), ] + expect_equal(unname(allPks[, "mz"]), unname(curP[, "mz"])) + expect_equal(unname(allPks[, "maxo"]), unname(curP[, "maxo"])) + expect_true(cor(allPks[, "into"], curP[, "into"]) > 0.99) ## Not exactly the + ## same but highly similar. +}) diff --git a/tests/testthat/test_old_BiocParallel.R b/longtests/testthat/test_old_BiocParallel.R similarity index 100% rename from tests/testthat/test_old_BiocParallel.R rename to longtests/testthat/test_old_BiocParallel.R diff --git a/tests/testthat/test_old_absentPresent.R b/longtests/testthat/test_old_absentPresent.R similarity index 100% rename from tests/testthat/test_old_absentPresent.R rename to longtests/testthat/test_old_absentPresent.R diff --git a/tests/testthat/test_old_functions-IO.R b/longtests/testthat/test_old_functions-IO.R similarity index 100% rename from tests/testthat/test_old_functions-IO.R rename to longtests/testthat/test_old_functions-IO.R diff --git a/tests/testthat/test_old_functions-xcmsRaw.R b/longtests/testthat/test_old_functions-xcmsRaw.R similarity index 98% rename from tests/testthat/test_old_functions-xcmsRaw.R rename to longtests/testthat/test_old_functions-xcmsRaw.R index ad7a271fd..21fdbbd20 100644 --- a/tests/testthat/test_old_functions-xcmsRaw.R +++ b/longtests/testthat/test_old_functions-xcmsRaw.R @@ -1,11 +1,11 @@ test_that("xcmsRaw on MS1 asking for MS2 doesn't fail", { - filename <- system.file('microtofq/MM14.mzdata', package = "msdata") + filename <- system.file('microtofq/MM14.mzML', package = "msdata") ## This file has no MS/MS data at all, but should not fail expect_warning(x1 <- xcmsRaw(filename, includeMSn=TRUE, profstep = 0)) }) test_that("xcmsRaw with multiple MS levels works", { - filename <- system.file('iontrap/extracted.mzData', package = "msdata") + filename <- system.file('iontrap/extracted.mzML', package = "msdata") x1 <- xcmsRaw(filename, includeMSn=TRUE, profstep = 0) expect_warning(x2 <- xcmsRaw(filename, includeMSn=TRUE, mslevel=2, profstep = 0)) diff --git a/tests/testthat/test_old_functions-xcmsSet.R b/longtests/testthat/test_old_functions-xcmsSet.R similarity index 95% rename from tests/testthat/test_old_functions-xcmsSet.R rename to longtests/testthat/test_old_functions-xcmsSet.R index 1be8331f9..d95ca3041 100644 --- a/tests/testthat/test_old_functions-xcmsSet.R +++ b/longtests/testthat/test_old_functions-xcmsSet.R @@ -17,7 +17,7 @@ test_that(".getPeaks_xxx functions works", { pks_n <- .getPeaks_new(faahko_xr_1, peakrange = pks_range, step = 0.3) expect_equal(pks_o, pks_n) expect_true(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) - + ## Change profile generation settings. tmp <- deepCopy(faahko_xr_1) tmp@profmethod <- "binlin" @@ -29,7 +29,7 @@ test_that(".getPeaks_xxx functions works", { expect_true(cor(pks_o[, "into"], pks_n[, "into"]) > 0.999) expect_true(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) pks_tmp <- pks_o - + ## Change profile generation settings. tmp@profmethod <- "binlinbase" expect_warning(pks_o <- .getPeaks_orig(tmp, peakrange = pks_range, @@ -47,13 +47,13 @@ test_that(".getPeaks_xxx functions works", { }) test_that("xcmsSet can handle MS2 data", { - filename <- system.file('iontrap/extracted.mzData', package = "msdata") - expect_warning(xs2 <- xcmsSet(filename, snthresh = 4, mslevel = 2)) + filename <- system.file('iontrap/extracted.mzML', package = "msdata") + expect_equal(xcmsSet(filename, snthresh = 4, mslevel = 2)@mslevel, 2) }) test_that("xcmsSet works with MS2... again", { - filename <- system.file('iontrap/extracted.mzData', package = "msdata") - expect_warning(xs2 <- xcmsSet(filename, method="centWave", mslevel = 2)) + filename <- system.file('iontrap/extracted.mzML', package = "msdata") + expect_equal(xcmsSet(filename, snthresh = 4, mslevel = 2)@mslevel, 2) }) test_that("phenoDataFromPaths and others don't fail", { @@ -70,7 +70,7 @@ test_that("phenoDataFromPaths and others don't fail", { ##xcms::phenoData(xs) <- pd ## https://stat.ethz.ch/pipermail/r-devel/2008-April/049184.html xs <- xcms::`phenoData<-`(xs, pd) - + xsg <- group(xs) pd <- phenoDataFromPaths(files) @@ -262,7 +262,7 @@ test_that(".getProcessHistory works", { }) test_that("xcmsSet centWave works", { - file <- system.file('microtofq/MM14.mzdata', package = "msdata") + file <- system.file('microtofq/MM14.mzML', package = "msdata") xset1 <- xcmsSet(files=file, method="centWave", peakwidth=c(5,12), profparam = list(step = 0)) xset2 <- xcmsSet(files=file, method="centWave", peakwidth=c(5,12), @@ -274,7 +274,7 @@ test_that("xcmsSet centWave works", { }) test_that("xcmsSet matchedFilter works", { - file <- system.file('microtofq/MM14.mzdata', package = "msdata") + file <- system.file('microtofq/MM14.mzML', package = "msdata") xset1 <- xcmsSet(files=file, method="matchedFilter", fwhm=10) xset2 <- xcmsSet(files=file, method="matchedFilter", fwhm=10, scanrange=c(1,112)) @@ -285,7 +285,7 @@ test_that("xcmsSet matchedFilter works", { }) test_that("xcmsSet parallel works", { - file <- system.file('microtofq/MM14.mzdata', package = "msdata") + file <- system.file('microtofq/MM14.mzML', package = "msdata") xset1 <- xcmsSet(files=file, method="centWave", peakwidth=c(5,12), scanrange=c(1,80), profparam = list(step = 0)) xset2 <- xcmsSet(files=file, method="centWave", peakwidth=c(5,12), diff --git a/longtests/testthat/test_old_methods-IO.R b/longtests/testthat/test_old_methods-IO.R new file mode 100644 index 000000000..61fc7f4f3 --- /dev/null +++ b/longtests/testthat/test_old_methods-IO.R @@ -0,0 +1,18 @@ +test_that("write.cdf,xcmsRaw works", { + file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + xraw <- xcmsRaw(file, profstep = 0) + cdffile <- paste(tempdir(), "ko15.cdf", sep="/") + write.cdf(xraw, cdffile) + xrawCopy <- xcmsRaw(cdffile) + expect_true(all(xraw@env$mz == xrawCopy@env$mz)) + expect_true(all(xraw@env$intensity == xrawCopy@env$intensity)) +}) + +test_that("write.mzQuantML,xcmsSet works", { + xsg <- group(faahko) + mzqFile <- paste(tempdir(), "faahKO.mzq.xml", sep="/") + write.mzQuantML(xsg, mzqFile) + v <- verify.mzQuantML(filename=mzqFile) + expect_true(v$status == "0") +}) + diff --git a/tests/testthat/test_old_methods-xcmsRaw.R b/longtests/testthat/test_old_methods-xcmsRaw.R similarity index 99% rename from tests/testthat/test_old_methods-xcmsRaw.R rename to longtests/testthat/test_old_methods-xcmsRaw.R index 484a761d7..07b53e244 100644 --- a/tests/testthat/test_old_methods-xcmsRaw.R +++ b/longtests/testthat/test_old_methods-xcmsRaw.R @@ -85,11 +85,11 @@ test_that("findPeaks.addPredictedIsotopeFeatures,xcmsRaw works", { expect_equal(p1, p1_2) p2_2 <- findPeaks.addPredictedIsotopeFeatures( object = xr, xcmsPeaks = p1_2, noise = 10000) - options(originalCentWave = TRUE) + options(originalCentWave = TRUE) }) test_that("findPeaks,xcmsRaw massifquant works", { - file <- system.file('microtofq/MM14.mzdata', package = "msdata") + file <- system.file('microtofq/MM14.mzML', package = "msdata") xraw <- xcmsRaw(file, profstep = 0) p <- findPeaks(xraw, method = "massifquant") expect_equal(nrow(p), 114) diff --git a/tests/testthat/test_old_methods-xcmsSet.R b/longtests/testthat/test_old_methods-xcmsSet.R similarity index 98% rename from tests/testthat/test_old_methods-xcmsSet.R rename to longtests/testthat/test_old_methods-xcmsSet.R index 0b7551750..1d26319c2 100644 --- a/tests/testthat/test_old_methods-xcmsSet.R +++ b/longtests/testthat/test_old_methods-xcmsSet.R @@ -102,7 +102,7 @@ test_that("getEIC,xcmsSet works after retcor", { test_that("getXcmsRaw,xcmsSet works", { xset <- faahko_grouped_retcor_filled - + ## get the first as raw data file. xr <- getXcmsRaw(xset, sampleidx = 1) ## apply the rt correction @@ -170,8 +170,8 @@ test_that("getXcmsRaw,xcmsSet works, issue #44", { test_that("group,GroupDensity doesn't fail with OnePeak", { xs <- faahko p <- peaks(xs) - peaks(xs) <- p[1,,drop=FALSE] - g <- group(xs, minsamp=1, minfrac=0.001, method="density") + peaks(xs) <- p[1, , drop = FALSE] + g <- group(xs, minsamp = 1, minfrac = 0.001, method = "density") }) test_that("group,xcmsSet Nearest works", { @@ -275,6 +275,10 @@ test_that("[,xcmsSet works", { } } + res <- faahko[, integer()] + expect_true(is(res, "xcmsSet")) + expect_true(nrow(res@peaks) == 0) + xset <- faahko idx <- 8 xsub <- xset[, idx] diff --git a/longtests/testthat/test_old_xcmsSource.R b/longtests/testthat/test_old_xcmsSource.R new file mode 100644 index 000000000..54cd46cca --- /dev/null +++ b/longtests/testthat/test_old_xcmsSource.R @@ -0,0 +1,23 @@ +test_that("xcmsSource works", { + mz_file <- system.file("microtofq/MM8.mzML", package = "msdata") + src <- xcms:::xcmsSource(mz_file) + expect_true(is(src, "xcmsFileSource")) + tmp <- loadRaw(src) + expect_equal(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", + "mz", "intensity", "polarity")) + + cdf_file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + src <- xcms:::xcmsSource(cdf_file) + expect_true(is(src, "xcmsFileSource")) + tmp <- loadRaw(src) + expect_equal(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", + "mz", "intensity", "polarity")) + + ## MSn: + mzmlpath <- system.file("iontrap", package = "msdata") + mzmlfiles <- list.files(mzmlpath, pattern="extracted.mzML", + recursive = TRUE, full.names = TRUE) + src <- xcms:::xcmsSource(mzmlfiles[1]) + tmp <- loadRaw(src, includeMSn = TRUE) + +}) diff --git a/tests/testthat/test_specDist.R b/longtests/testthat/test_specDist.R similarity index 100% rename from tests/testthat/test_specDist.R rename to longtests/testthat/test_specDist.R diff --git a/man/AutoLockMass-methods.Rd b/man/AutoLockMass-methods.Rd index 5f559bedd..a8e6fbf3c 100644 --- a/man/AutoLockMass-methods.Rd +++ b/man/AutoLockMass-methods.Rd @@ -23,20 +23,22 @@ } \author{Paul Benton, \email{hpaul.benton08@imperial.ac.uk}} \examples{ - \dontrun{library(xcms) - library(faahKO) ## These files do not have this problem to correct for but just for an example - cdfpath <- system.file("cdf", package = "faahKO") - cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - xr<-xcmsRaw(cdffiles[1]) - xr - ##Lets assume that the lockmass starts at 1 and is every 100 scans - lockMass<-xcms:::makeacqNum(xr, freq=100, start=1) - ## these are equalvent - lockmass2<-AutoLockMass(xr) - all((lockmass == lockmass2) == TRUE) +\dontrun{library(xcms) + library(faahKO) + ## These files do not have this problem + ## to correct for but just for an example + cdfpath <- system.file("cdf", package = "faahKO") + cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) + xr<-xcmsRaw(cdffiles[1]) + xr + ##Lets assume that the lockmass starts at 1 and is every 100 scans + lockMass<-xcms:::makeacqNum(xr, freq=100, start=1) + ## these are equalvent + lockmass2<-AutoLockMass(xr) + all((lockmass == lockmass2) == TRUE) - ob<-stitch(xr, lockMass) - } + ob<-stitch(xr, lockMass) +} } \keyword{methods} diff --git a/man/GenericParam.Rd b/man/GenericParam.Rd index fff3ea249..c3965f70d 100644 --- a/man/GenericParam.Rd +++ b/man/GenericParam.Rd @@ -1,23 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, -% R/methods-Params.R +% Please edit documentation in R/DataClasses.R, R/functions-Params.R \docType{class} \name{GenericParam-class} \alias{GenericParam-class} \alias{GenericParam} -\alias{show,GenericParam-method} +\alias{Param} +\alias{class:Param} +\alias{Param-class} \title{Generic parameter class} \usage{ GenericParam(fun = character(), args = list()) - -\S4method{show}{GenericParam}(object) } \arguments{ \item{fun}{\code{character} representing the name of the function.} \item{args}{\code{list} (ideally named) with the arguments to the function.} - -\item{object}{\code{GenericParam} object.} } \value{ The \code{GenericParam} function returns a \code{GenericParam} @@ -39,8 +36,6 @@ The \code{GenericParam} class allows to store generic parameter \item{\code{args}}{\code{list} (ideally named) with the arguments to the function.} - -\item{\code{.__classVersion__}}{the version of the class.} }} \examples{ diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 0f5c98300..27b561b24 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -1,11 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/DataClasses.R, R/functions-ProcessHistory.R, -% R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +% R/functions-XCMSnExp.R, R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R \docType{class} \name{XCMSnExp-class} \alias{XCMSnExp-class} \alias{XCMSnExp} +\alias{updateObject,XCMSnExp-method} \alias{processHistoryTypes} +\alias{hasFilledChromPeaks} +\alias{featureArea} \alias{profMat,OnDiskMSnExp-method} \alias{show,XCMSnExp-method} \alias{show,MsFeatureData-method} @@ -19,6 +22,7 @@ \alias{hasChromPeaks,XCMSnExp-method} \alias{hasChromPeaks} \alias{hasChromPeaks,MsFeatureData-method} +\alias{hasFilledChromPeaks,XCMSnExp-method} \alias{adjustedRtime,XCMSnExp-method} \alias{adjustedRtime} \alias{adjustedRtime,MsFeatureData-method} @@ -34,9 +38,13 @@ \alias{chromPeaks,XCMSnExp-method} \alias{chromPeaks} \alias{chromPeaks,MsFeatureData-method} +\alias{chromPeakData,MsFeatureData-method} +\alias{chromPeakData} \alias{chromPeaks<-,XCMSnExp-method} \alias{chromPeaks<-} \alias{chromPeaks<-,MsFeatureData-method} +\alias{chromPeakData<-,MsFeatureData-method} +\alias{chromPeakData<-} \alias{rtime,XCMSnExp-method} \alias{mz,XCMSnExp-method} \alias{intensity,XCMSnExp-method} @@ -55,74 +63,131 @@ \alias{setAs} \alias{profMat,XCMSnExp-method} \alias{findChromPeaks,XCMSnExp,Param-method} +\alias{faahko_sub} \alias{dropFilledChromPeaks,XCMSnExp-method} \alias{dropFilledChromPeaks} \alias{spectrapply,XCMSnExp-method} \alias{c.XCMSnExp} +\alias{chromPeakData,XCMSnExp-method} +\alias{chromPeakData<-,XCMSnExp-method} +\alias{plot,XCMSnExp,missing-method} \title{Data container storing xcms preprocessing results} \usage{ processHistoryTypes() -\S4method{profMat}{OnDiskMSnExp}(object, method = "bin", step = 0.1, - baselevel = NULL, basespace = NULL, mzrange. = NULL, fileIndex, - ...) +hasFilledChromPeaks(object) + +featureArea( + object, + mzmin = median, + mzmax = median, + rtmin = median, + rtmax = median, + msLevel = unique(msLevel(object)), + features = character() +) + +\S4method{profMat}{OnDiskMSnExp}( + object, + method = "bin", + step = 0.1, + baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + fileIndex, + ... +) \S4method{show}{XCMSnExp}(object) \S4method{hasAdjustedRtime}{XCMSnExp}(object) -\S4method{hasFeatures}{XCMSnExp}(object) +\S4method{hasFeatures}{XCMSnExp}(object, msLevel = integer()) -\S4method{hasChromPeaks}{XCMSnExp}(object) +\S4method{hasChromPeaks}{XCMSnExp}(object, msLevel = integer()) + +\S4method{hasFilledChromPeaks}{XCMSnExp}(object) \S4method{adjustedRtime}{XCMSnExp}(object, bySample = FALSE) \S4method{adjustedRtime}{XCMSnExp}(object) <- value -\S4method{featureDefinitions}{XCMSnExp}(object, mz = numeric(), - rt = numeric(), ppm = 0, type = c("any", "within", "apex_within")) +\S4method{featureDefinitions}{XCMSnExp}( + object, + mz = numeric(), + rt = numeric(), + ppm = 0, + type = c("any", "within", "apex_within"), + msLevel = integer() +) \S4method{featureDefinitions}{XCMSnExp}(object) <- value -\S4method{chromPeaks}{XCMSnExp}(object, bySample = FALSE, - rt = numeric(), mz = numeric(), ppm = 0, type = c("any", - "within", "apex_within")) +\S4method{chromPeaks}{XCMSnExp}( + object, + bySample = FALSE, + rt = numeric(), + mz = numeric(), + ppm = 0, + msLevel = integer(), + type = c("any", "within", "apex_within"), + isFilledColumn = FALSE +) \S4method{chromPeaks}{XCMSnExp}(object) <- value -\S4method{rtime}{XCMSnExp}(object, bySample = FALSE, - adjusted = hasAdjustedRtime(object)) +\S4method{rtime}{XCMSnExp}(object, bySample = FALSE, adjusted = hasAdjustedRtime(object)) \S4method{mz}{XCMSnExp}(object, bySample = FALSE, BPPARAM = bpparam()) -\S4method{intensity}{XCMSnExp}(object, bySample = FALSE, - BPPARAM = bpparam()) +\S4method{intensity}{XCMSnExp}(object, bySample = FALSE, BPPARAM = bpparam()) -\S4method{spectra}{XCMSnExp}(object, bySample = FALSE, - adjusted = hasAdjustedRtime(object), BPPARAM = bpparam()) +\S4method{spectra}{XCMSnExp}( + object, + bySample = FALSE, + adjusted = hasAdjustedRtime(object), + BPPARAM = bpparam() +) \S4method{processHistory}{XCMSnExp}(object, fileIndex, type, msLevel) \S4method{dropChromPeaks}{XCMSnExp}(object, keepAdjustedRtime = FALSE) -\S4method{dropFeatureDefinitions}{XCMSnExp}(object, - keepAdjustedRtime = FALSE, dropLastN = -1) +\S4method{dropFeatureDefinitions}{XCMSnExp}(object, keepAdjustedRtime = FALSE, dropLastN = -1) \S4method{dropAdjustedRtime}{XCMSnExp}(object) -\S4method{profMat}{XCMSnExp}(object, method = "bin", step = 0.1, - baselevel = NULL, basespace = NULL, mzrange. = NULL, fileIndex, - ...) - -\S4method{findChromPeaks}{XCMSnExp,Param}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) +\S4method{profMat}{XCMSnExp}( + object, + method = "bin", + step = 0.1, + baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + fileIndex, + ... +) + +\S4method{findChromPeaks}{XCMSnExp,Param}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + add = FALSE +) \S4method{dropFilledChromPeaks}{XCMSnExp}(object) -\S4method{spectrapply}{XCMSnExp}(object, FUN = NULL, - BPPARAM = bpparam(), ...) +\S4method{spectrapply}{XCMSnExp}(object, FUN = NULL, BPPARAM = bpparam(), ...) \method{c}{XCMSnExp}(...) + +\S4method{chromPeakData}{XCMSnExp}(object) + +\S4method{chromPeakData}{XCMSnExp}(object) <- value + +\S4method{plot}{XCMSnExp,missing}(x, y, type = c("spectra", "XIC"), peakCol = "#ff000060", ...) } \arguments{ \item{object}{For \code{adjustedRtime}, \code{featureDefinitions}, @@ -130,6 +195,26 @@ processHistoryTypes() \code{hasChromPeaks} either a \code{MsFeatureData} or a \code{XCMSnExp} object, for all other methods a \code{XCMSnExp} object.} +\item{mzmin}{for \code{featureArea}: \code{function} to be applied to values +in the \code{"mzmin"} column of all chromatographic peaks of a feature +to define the lower m/z value of the feature area. +Defaults to \code{median}.} + +\item{mzmax}{for \code{featureArea}: \code{function} same as \code{mzmin} +but for the \code{"mzmax"} column.} + +\item{rtmin}{for \code{featureArea}: \code{function} same as \code{mzmin} +but for the \code{"rtmin"} column.} + +\item{rtmax}{for \code{featureArea}: \code{function} same as \code{mzmin} +but for the \code{"rtmax"} column.} + +\item{msLevel}{\code{integer} specifying the MS level(s) for which identified +chromatographic peaks should be returned.} + +\item{features}{for \code{featureArea}: IDs of features for which the area +should be extracted.} + \item{method}{The profile matrix generation method. Allowed are \code{"bin"}, \code{"binlin"}, \code{"binlinbase"} and \code{"intlin"}. See details section for more information.} @@ -156,7 +241,7 @@ details.} \item{mzrange.}{Optional numeric(2) manually specifying the mz value range to be used for binnind. If not provided, the whole mz value range is used.} -\item{fileIndex}{For \code{processHistory}: optional \code{numeric} +\item{fileIndex}{For \code{processHistory}: optional \code{integer} specifying the index of the files/samples for which the \code{\link{ProcessHistory}} objects should be retrieved.} @@ -193,20 +278,22 @@ returned.} type. Use the \code{processHistoryTypes} to list all supported values. For \code{chromPeaks}: \code{character} specifying which peaks to return if \code{rt} or \code{mz} are defined. For \code{type = "any"} all -chromatographic peaks partially overlapping the range defined by +chromatographic peaks partially overlapping the range defined by \code{mz} and/or \code{rt} are returned, \code{type = "within"} returns only peaks completely within the region and \code{type = "apex_within"} peaks for which the peak's apex is within the region.} +\item{isFilledColumn}{\code{logical(1)} whether a column \code{"is_filled"} +is included in the returned \code{"matrix"} providing the information +if a peak was filled in. Alternatively, this information would be +provided by the \code{chromPeakData} data frame.} + \item{adjusted}{logical(1) whether adjusted or raw (i.e. the original retention times reported in the files) should be returned.} \item{BPPARAM}{Parameter class for parallel processing. See \code{\link{bpparam}}.} -\item{msLevel}{\code{integer(1)} defining the MS level on which the peak -detection should be performed. Defaults to \code{msLevel = 1}.} - \item{keepAdjustedRtime}{For \code{dropFeatureDefinitions,XCMSnExp}: \code{logical(1)} defining whether eventually present retention time adjustment should not be dropped. By default dropping feature definitions @@ -228,8 +315,21 @@ chromatographic peak detection algorithm.} return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{add}{For \code{findChromPeaks}: if newly identified chromatographic +peaks should be added to the peak matrix with the already identified +chromatographic peaks. By default (\code{add = FALSE}) previous +peak detection results will be removed.} + \item{FUN}{For \code{spectrapply}: a function that should be applied to each spectrum in the object.} + +\item{x}{For \code{plot}: \code{XCMSnExp} object.} + +\item{y}{For \code{plot}: not used.} + +\item{peakCol}{For \code{plot}: the color that should be used to indicate +identified chromatographic peaks (only in combination with +\code{type = "XIC"} and if chromatographic peaks are present).} } \value{ For \code{profMat}: a \code{list} with a the profile matrix @@ -253,6 +353,9 @@ Column \code{"peakidx"} contains a \code{list} with indices of chromatographic peaks (rows) in the matrix returned by the \code{chromPeaks} method that belong to that feature group. The method returns \code{NULL} if no feature definitions are present. +\code{featureDefinitions} supports also parameters \code{mz}, \code{rt}, +\code{ppm} and \code{type} to return only features within certain ranges (see +description of \code{chromPeaks} for details). For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} (each row being a chromatographic peak, rownames representing unique IDs of the peaks) @@ -261,18 +364,17 @@ with at least the following columns: scans/retention times), \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximal mz value), -\code{"rt"} (retention time for the peak apex), +\code{"rt"} (retention time of the peak apex), \code{"rtmin"} (minimal retention time), \code{"rtmax"} (maximal retention time), \code{"into"} (integrated, original, intensity of the peak), \code{"maxo"} (maximum intentity of the peak), \code{"sample"} (sample index in which the peak was identified) and -\code{"is_filled"} defining whether the chromatographic peak was -identified by the peak picking algorithm (\code{0}) or was added by the -\code{fillChromPeaks} method (\code{1}). Depending on the employed peak detection algorithm and the -\code{verboseColumns} parameter of it additional columns might be -returned. For \code{bySample = TRUE} the chronatographic peaks are +\code{verboseColumns} parameter of it, additional columns might be +returned. If parameter \code{isFilledColumn} was set to \code{TRUE} a column +named \code{"is_filled"} is also returned. +For \code{bySample = TRUE} the chromatographic peaks are returned as a \code{list} of matrices, each containing the chromatographic peaks of a specific sample. For samples in which no peaks were detected a matrix with 0 rows is returned. @@ -305,7 +407,7 @@ The \code{XCMSnExp} object is a container for the results of a G/LC-MS data preprocessing that comprises chromatographic peak detection, alignment and correspondence. These results can be accessed with the \code{chromPeaks}, \code{adjustedRtime} and \code{featureDefinitions} functions; see below -(after the Usage, Arguments, Value and Slots sections) for more details). +(after the Usage, Arguments, Value and Slots sections) for more details). Along with the results, the object contains the processing history that allows to track each processing step along with the used settings. This can be extracted with the \code{\link{processHistory}} method. @@ -319,12 +421,27 @@ objects using the \code{as} method (see examples below). All preprocessing results will be passed along to the resulting \code{xcmsSet} object. -General functions for \code{XCMSnExp} objects are: +General functions for \code{XCMSnExp} objects are (see further below for +specific function to handle chromatographic peak data, alignment and +correspondence results): \code{processHistoryTypes} returns the available \emph{types} of process histories. These can be passed with argument \code{type} to the \code{processHistory} method to extract specific process step(s). +\code{hasFilledChromPeaks}: whether filled-in peaks are present or not. + +\code{featureArea} extracts the m/z - retention time region for each feature. +This area is defined by the m/z - retention time regions of all +chromatographic peaks associated with a feature. Parameters \code{mzmin}, +\code{mzmax}, \code{rtmin} and \code{rtmax} allow to define functions how +the corresponding value is calculated from the individual values (such as +the \code{"rtmin"}) of all chromatographic peaks of that feature. By default +the median \code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"} +is reported. Parameter \code{features} allows to provide feature IDs for +which the area should be extracted. By default it is extracted for all +features. + \code{profMat}: creates a \emph{profile matrix}, which is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and m (columns) the retention time of the corresponding scans. Each cell @@ -341,6 +458,9 @@ results (i.e. features). \code{hasChromPeaks}: whether the object contains peak detection results. +\code{hasFilledChromPeaks}: whether the object contains any filled-in +chromatographic peaks. + \code{adjustedRtime},\code{adjustedRtime<-}: extract/set adjusted retention times. \code{adjustedRtime<-} should not be called manually, it is called internally by the @@ -373,14 +493,24 @@ arguments \code{rt}, \code{mz}, \code{ppm} and \code{type} allow to extract only chromatographic peaks overlapping the defined retention time and/or m/z ranges. Argument \code{type} allows to define how \emph{overlapping} is determined: for \code{type == "any"} (the default), all peaks that are even -partially overlapping the region are returned, for \code{type == "within"} -the full peak has to be within the region and for -\code{type == "apex_within"} the peak's apex position (highest signal of the -peak) has to be within the region. +partially overlapping the region are returned (i.e. for which either +\code{"mzmin"} or \code{"mzmax"} of the \code{chromPeaks} or +\code{featureDefinitions} matrix are within the provided m/z range), for +\code{type == "within"} the full peak has to be within the region (i.e. +both \code{"mzmin"} and \code{"mzmax"} have to be within the m/z range) and +for \code{type == "apex_within"} the peak's apex position (highest signal +of the peak) has to be within the region (i.e. the peak's or features m/z +has to be within the m/z range). See description of the return value for details on the returned matrix. Users usually don't have to use the \code{chromPeaks<-} method directly as detected chromatographic peaks are added to the object by the -\code{\link{findChromPeaks}} method. +\code{\link{findChromPeaks}} method. Also, \code{chromPeaks<-} will replace +any existing \code{chromPeakData}. + +\code{chromPeakData} and \code{chromPeakData<-} allow to get or set arbitrary +chromatographic peak annotations. These are returned or ar returned as a +\code{DataFrame}. Note that the number of rows and the rownames of the +\code{DataFrame} have to match those of \code{chromPeaks}. \code{rtime}: extracts the retention time for each scan. The \code{bySample} parameter allows to return the values grouped @@ -412,7 +542,7 @@ If the \code{XCMSnExp} object contains adjusted retention times, these are returned by default in the \code{Spectrum} objects (can be overwritten by setting \code{adjusted = FALSE}). -\code{processHistory}: returns a \code{list} with +\code{processHistory}: returns a \code{list} of \code{\link{ProcessHistory}} objects (or objects inheriting from this base class) representing the individual processing steps that have been performed, eventually along with their settings (\code{Param} parameter @@ -449,9 +579,12 @@ adjustment. All related process history steps are removed too. \code{findChromPeaks} performs chromatographic peak detection on the provided \code{XCMSnExp} objects. For more details see the method -for \code{\linkS4class{XCMSnExp}}. Note that the \code{findChromPeaks} -method for \code{XCMSnExp} objects removes previously identified -chromatographic peaks and aligned features. Previous alignment (retention +for \code{\linkS4class{XCMSnExp}}. +Note that by default (with parameter \code{add = FALSE}) previous peak +detection results are removed. Use \code{add = TRUE} to perform a second +round of peak detection and add the newly identified peaks to the previous +peak detection results. Correspondence results (features) are always removed +prior to peak detection. Previous alignment (retention time adjustment) results are kept, i.e. chromatographic peak detection is performed using adjusted retention times if the data was first aligned using e.g. obiwarp (\code{\link{adjustRtime-obiwarp}}). @@ -468,6 +601,11 @@ results. If no function is specified the function simply returns the \code{XCMSnExp} objects can be combined with the \code{c} function. This combines identified chromatographic peaks and the objects' pheno data but discards alignment results or feature definitions. + +\code{plot} plots the spectrum data (see \code{\link{plot}} for +\code{\link{MSnExp}} objects in the \code{MSnbase} package for more details. +For \code{type = "XIC"}, identified chromatographic peaks will be indicated +as rectangles with border color \code{peakCol}. } \section{Slots}{ @@ -574,44 +712,44 @@ help page). \item \code{\link{featureSummary}} perform a simple summary of the defined features (see respective help page). -\item \code{link{overlappingFeatures}} identify features that are +\item \code{\link{overlappingFeatures}} identify features that are overlapping or close in the m/z - rt space (see respective help page). +\item \code{\link{quantify}} extract feature intensities and put them, along +with feature definitions and phenodata information, into a +\code{\link{SummarizedExperiment}}. See help page for details. } } \examples{ -## Loading the data from 2 files of the faahKO package. -library(faahKO) -od <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - system.file("cdf/KO/ko16.CDF", package = "faahKO")), - mode = "onDisk") -## Now we perform a chromatographic peak detection on this data set using the -## matched filter method. We are tuning the settings such that it performs -## faster. -mfp <- MatchedFilterParam(binSize = 6) -xod <- findChromPeaks(od, param = mfp) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) ## The results from the peak detection are now stored in the XCMSnExp ## object -xod +faahko_sub ## The detected peaks can be accessed with the chromPeaks method. -head(chromPeaks(xod)) +head(chromPeaks(faahko_sub)) ## The settings of the chromatographic peak detection can be accessed with ## the processHistory method -processHistory(xod) +processHistory(faahko_sub) ## Also the parameter class for the peak detection can be accessed -processParam(processHistory(xod)[[1]]) +processParam(processHistory(faahko_sub)[[1]]) ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes ## defined in Bioconductor's MSnbase package. To access the (raw) retention ## time for each spectrum we can use the rtime method. Setting bySample = TRUE ## would cause the retention times to be grouped by sample -head(rtime(xod)) +head(rtime(faahko_sub)) ## Similarly it is possible to extract the mz values or the intensity values ## using the mz and intensity method, respectively, also with the option to @@ -620,15 +758,15 @@ head(rtime(xod)) ## spectra method which returns Spectrum objects containing all raw data. ## Note that all these methods read the information from the original input ## files and subsequently apply eventual data processing steps to them. -mzs <- mz(xod, bySample = TRUE) +mzs <- mz(faahko_sub, bySample = TRUE) length(mzs) lengths(mzs) ## The full data could also be read using the spectra data, which returns ## a list of Spectrum object containing the mz, intensity and rt values. -## spctr <- spectra(xod) +## spctr <- spectra(faahko_sub) ## To get all spectra of the first file we can split them by file -## head(split(spctr, fromFile(xod))[[1]]) +## head(split(spctr, fromFile(faahko_sub))[[1]]) ############ ## Filtering @@ -638,19 +776,17 @@ lengths(mzs) ## retention time correction and peak grouping results) will be dropped. ## Below we filter the XCMSnExp object by file to extract the results for ## only the second file. -xod_2 <- filterFile(xod, file = 2) +xod_2 <- filterFile(faahko_sub, file = 2) xod_2 ## Now the objects contains only the idenfified peaks for the second file head(chromPeaks(xod_2)) -head(chromPeaks(xod)[chromPeaks(xod)[, "sample"] == 2, ]) - ########## ## Coercing to an xcmsSet object ## ## We can also coerce the XCMSnExp object into an xcmsSet object: -xs <- as(xod, "xcmsSet") +xs <- as(faahko_sub, "xcmsSet") head(peaks(xs)) } \seealso{ @@ -674,6 +810,16 @@ head(peaks(xs)) \code{\link{featureSummary}} to calculate basic feature summaries. + \code{\link{featureChromatograms}} to extract chromatograms for each + feature. + + \code{\link{chromPeakSpectra}} to extract MS2 spectra with the m/z of + the precursor ion within the m/z range of a peak and a retention time + within its retention time range. + + \code{\link{featureSpectra}} to extract MS2 spectra associated with + identified features. + \code{\link{fillChromPeaks}} for the method to fill-in eventually missing chromatographic peaks for a feature in some samples. } diff --git a/man/XCMSnExp-filter-methods.Rd b/man/XCMSnExp-filter-methods.Rd index a82f89e8a..3b150b777 100644 --- a/man/XCMSnExp-filter-methods.Rd +++ b/man/XCMSnExp-filter-methods.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/functions-XCMSnExp.R, R/methods-XCMSnExp.R -\docType{methods} \name{filterFeatureDefinitions} \alias{filterFeatureDefinitions} \alias{[,XCMSnExp,ANY,ANY,ANY-method} +\alias{XCMSnExp-filter} \alias{[[,XCMSnExp,ANY,ANY-method} \alias{filterMsLevel,XCMSnExp-method} \alias{filterFile,XCMSnExp-method} -\alias{XCMSnExp-filter} \alias{filterMz,XCMSnExp-method} \alias{filterRt,XCMSnExp-method} \alias{split,XCMSnExp,ANY-method} +\alias{filterChromPeaks,XCMSnExp-method} \title{XCMSnExp filtering and subsetting} \usage{ filterFeatureDefinitions(x, features) @@ -19,20 +19,30 @@ filterFeatureDefinitions(x, features) \S4method{[[}{XCMSnExp,ANY,ANY}(x, i, j, drop = FALSE) -\S4method{filterMsLevel}{XCMSnExp}(object, msLevel., - keepAdjustedRtime = hasAdjustedRtime(object)) +\S4method{filterMsLevel}{XCMSnExp}(object, msLevel., keepAdjustedRtime = hasAdjustedRtime(object)) -\S4method{filterFile}{XCMSnExp}(object, file, keepAdjustedRtime = FALSE) +\S4method{filterFile}{XCMSnExp}( + object, + file, + keepAdjustedRtime = hasAdjustedRtime(object), + keepFeatures = FALSE +) \S4method{filterMz}{XCMSnExp}(object, mz, msLevel., ...) -\S4method{filterRt}{XCMSnExp}(object, rt, msLevel., - adjusted = hasAdjustedRtime(object)) +\S4method{filterRt}{XCMSnExp}(object, rt, msLevel., adjusted = hasAdjustedRtime(object)) \S4method{split}{XCMSnExp,ANY}(x, f, drop = FALSE, ...) + +\S4method{filterChromPeaks}{XCMSnExp}( + object, + keep = rep(TRUE, nrow(chromPeaks(object))), + method = "keep", + ... +) } \arguments{ -\item{x}{For \code{[} and \code{[[}: an \code{\link{XCMSnExp}} object.} +\item{x}{For \code{[} and \code{[[}: an \code{XCMSnExp} object.} \item{features}{For \code{filterFeatureDefinitions}: either a \code{integer} specifying the indices of the features (rows) to keep, a \code{logical} @@ -49,15 +59,14 @@ For \code{[[}: a single integer or character.} \item{drop}{For \code{[} and \code{[[}: not supported.} -\item{object}{A \code{\link{XCMSnExp}} object.} +\item{object}{A \link{XCMSnExp} object.} -\item{msLevel.}{For \code{filterMz}, \code{filterRt}, \code{numeric(1)} +\item{msLevel.}{For \code{filterMz}, \code{filterRt}: \code{numeric} defining the MS level(s) to which operations should be applied or to -which the object should be subsetted. See \code{\link{filterMz}} -for more details} +which the object should be subsetted.} \item{keepAdjustedRtime}{For \code{filterFile}, \code{filterMsLevel}, -\code{[} \code{split}: \code{logical(1)} defining whether the adjusted +\code{[}, \code{split}: \code{logical(1)} defining whether the adjusted retention times should be kept, even if e.g. features are being removed (and the retention time correction was performed on these features).} @@ -66,6 +75,11 @@ within the object to subset the object by file or \code{character} specifying the file names to sub set. The indices are expected to be increasingly ordered, if not they are ordered internally.} +\item{keepFeatures}{For \code{filterFile}: \code{logical(1)} whether +correspondence results (feature definitions) should be kept or dropped. +Defaults to \code{keepFeatures = FALSE} hence feature definitions are removed +from the returned object by default.} + \item{mz}{For \code{filterMz}: \code{numeric(2)} defining the lower and upper mz value for the filtering.} @@ -79,82 +93,83 @@ For \code{spectra}: whether the retention times in the individual \code{Spectrum} objects should be the adjusted or raw retention times.} \item{f}{For \code{split} a vector of length equal to the length of x -defining how \code{x} will be splitted. It is converted internally to +defining how \code{x} should be splitted. It is converted internally to a \code{factor}.} + +\item{keep}{For \code{filterChromPeaks}: \code{logical}, \code{integer} or \code{character} +defining which chromatographic peaks should be retained.} + +\item{method}{For \code{filterChromPeaks}: \code{character(1)} allowing to specify the +method by which chromatographic peaks should be filtered. Currently only +\code{method = "keep"} is supported (i.e. specify with parameter \code{keep} which +chromatographic peaks should be retained).} } \value{ -All methods return an \code{\link{XCMSnExp}} object. +All methods return an \link{XCMSnExp} object. } \description{ -\code{filterFeatureDefinitions} allows to subset the feature definitions of -an \code{XCMSnExp} object. Which feature definitions should be kept can be -specified with the \code{features} argument that can be a \code{logical}, -\code{integer} or \code{character} vector. The function returns the -\code{XCMSnExp} with the reduced \code{featureDefinitions} data frame. - -The \code{[} method allows to subset a \code{\link{XCMSnExp}} -object by spectra. Be aware that the \code{[} method removes all -preprocessing results, except adjusted retention times if +The methods listed on this page allow to filter and subset \link{XCMSnExp} +objects. Most of them are inherited from the \link{OnDiskMSnExp} object defined +in the \code{MSnbase} package and have been adapted for \code{XCMSnExp} to enable +correct subsetting of preprocessing results. +\itemize{ +\item \code{[}: subset a \code{XCMSnExp} object by spectra. Be aware that this removes +\strong{all} preprocessing results, except adjusted retention times if \code{keepAdjustedRtime = TRUE} is passed to the method. - -\code{[[} extracts a single \code{\link{Spectrum}} -object from an \code{XCMSnExp}. The reported retention time is the -adjusted retention time if alignment has been performed on \code{x}. - -\code{filterMsLevel}: reduces the \code{\link{XCMSnExp}} -object to spectra of the specified MS level(s). See -\code{\link{filterMsLevel}} documentation for details and -examples. Presently, if \code{msLevel.} is provided, the function -removes identified chromatographic peaks and correspondence results -while keeping adjusted retention times by default (if present). The -latter can be changed setting \code{keepAdjustedRtime = FALSE}. - -The methods listed on this page allow to filter and subset -\code{\link{XCMSnExp}} objects. Most of them are inherited from the -\code{\link{OnDiskMSnExp}} object and have been adapted for -\code{\link{XCMSnExp}} to enable subsetting also on the preprocessing -results. - -\code{filterFile}: allows to reduce the -\code{\link{XCMSnExp}} to data from only certain files. Identified -chromatographic peaks for these files are retained while all eventually -present features (peak grouping information) are dropped. By default -also adjusted retention times are removed (if present). This can be -overwritten by setting \code{keepAdjustedRtime = TRUE}. - -\code{filterMz}: filters the data set based on the -provided mz value range. All chromatographic peaks and features (grouped -peaks) falling completely within the provided mz value range are retained -(if their minimal mz value is \code{>= mz[1]} and the maximal mz value -\code{<= mz[2]}. Adjusted retention times, if present, are not altered by -the filtering. - -\code{filterRt}: filters the data set based on the -provided retention time range. All chromatographic peaks and features -(grouped peaks) the specified retention time window are retained (i.e. if -the retention time corresponding to the peak's apex is within the +\item \code{[[}: extracts a single \code{Spectrum} object (defined in \code{MSnbase}). The +reported retention time is the adjusted retention time if alignment has +been performed. +\item \code{filterChromPeaks}: subset the \code{chromPeaks} \code{matrix} in \code{object}. Parameter +\code{method} allows to specify how the chromatographic peaks should be +filtered. Currently, only \code{method = "keep"} is supported which allows to +specify chromatographic peaks to keep with parameter \code{keep} (i.e. provide +a \code{logical}, \code{integer} or \code{character} defining which chromatographic peaks +to keep). Feature definitions (if present) are updated correspondingly. +\item \code{filterFeatureDefinitions}: allows to subset the feature definitions of +an \code{XCMSnExp} object. Parameter \code{features} allow to define which features +to keep. It can be a \code{logical}, \code{integer} (index of features to keep) or +\code{character} (feature IDs) vector. +\item \code{filterFile}: allows to reduce the \code{XCMSnExp} to data from only selected +files. Identified chromatographic peaks for these files are retained while +correspondence results (feature definitions) are removed by default. To +force keeping feature definitions use \code{keepFeatures = TRUE}. Adjusted +retention times (if present) are retained by default if present. Use +\code{keepAdjustedRtime = FALSE} to drop them. +\item \code{filterMsLevel}: reduces the \code{XCMSnExp} object to spectra of the +specified MS level(s). Chromatographic peaks and identified features are +also subsetted to the respective MS level. See also the \code{filterMsLevel} +documentation in \code{MSnbase} for details and examples. +\item \code{filterMz}: filters the data set based on the provided m/z value range. +All chromatographic peaks and features (grouped peaks) falling +\strong{completely} within the provided mz value range are retained +(i.e. if their minimal m/z value is \verb{>= mz[1]} and the maximal m/z value +\verb{<= mz[2]}. Adjusted retention times, if present, are kept. +\item \code{filterRt}: filters the data set based on the provided retention time +range. All chromatographic peaks and features (grouped peaks) +\strong{completely} within the specified retention time window are retained +(i.e. if the retention time corresponding to the peak's apex is within the specified rt range). If retention time correction has been performed, the method will by default filter the object by adjusted retention times. The argument \code{adjusted} allows to specify manually whether filtering -should be performed by raw or adjusted retention times. Filtering by +should be performed on raw or adjusted retention times. Filtering by retention time does not drop any preprocessing results nor does it remove or change alignment results (i.e. adjusted retention times). The method returns an empty object if no spectrum or feature is within the specified retention time range. - -\code{split} splits an \code{XCMSnExp} object into a \code{list} -of \code{XCMSnExp} objects based on the provided parameter \code{f}. -Note that by default all pre-processing results are removed by the -splitting, except adjusted retention times, if the optional argument -\code{keepAdjustedRtime = TRUE} is provided. +\item \code{split}: splits an \code{XCMSnExp} object into a \code{list} of \code{XCMSnExp} objects +based on the provided parameter \code{f}. Note that by default all +pre-processing results are removed by the splitting, except adjusted +retention times, if the optional argument \code{keepAdjustedRtime = TRUE} is +provided. +} } \details{ All subsetting methods try to ensure that the returned data is -consistent. Correspondence results for example are removed if the data -set is sub-setted by file, since the correspondence results are dependent -on the files on which correspondence was performed. Thus, some filter -and sub-setting methods drop some of the preprocessing results. An -exception are the adjusted retention times: most subsetting methods +consistent. Correspondence results for example are removed by default if the +data set is sub-setted by file, since the correspondence results are +dependent on the files on which correspondence was performed. This can be +changed by setting \code{keepFeatures = TRUE}. +For adjusted retention times, most subsetting methods support the argument \code{keepAdjustedRtime} (even the \code{[} method) that forces the adjusted retention times to be retained even if the default would be to drop them. @@ -169,73 +184,70 @@ the object. Note also that most of the filtering methods, and also the subsetting operations \code{[} drop all or selected preprocessing results. To consolidate the alignment results, i.e. ensure that adjusted retention -times are always preserved, use the \code{\link{applyAdjustedRtime}} +times are always preserved, use the \code{\link[=applyAdjustedRtime]{applyAdjustedRtime()}} function on the object that contains the alignment results. This replaces the raw retention times with the adjusted ones. } \examples{ -## Load some of the files from the faahKO package. -library(faahKO) -fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) -## Read the files -od <- readMSData(fs, mode = "onDisk") +## Loading a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") -## Perform peak detection on them using the matched filter algorithm. Note -## that we use a large value for binSize to reduce the runtime of the -## example code. -mfp <- MatchedFilterParam(binSize = 5) -xod <- findChromPeaks(od, param = mfp) +## Disable parallel processing for this example +register(SerialParam()) ## Subset the dataset to the first and third file. -xod_sub <- filterFile(xod, file = c(1, 3)) +xod_sub <- filterFile(faahko_sub, file = c(1, 3)) ## The number of chromatographic peaks per file for the full object -table(chromPeaks(xod)[, "sample"]) +table(chromPeaks(faahko_sub)[, "sample"]) ## The number of chromatographic peaks per file for the subset table(chromPeaks(xod_sub)[, "sample"]) -basename(fileNames(xod)) +basename(fileNames(faahko_sub)) basename(fileNames(xod_sub)) ## Filter on mz values; chromatographic peaks and features within the ## mz range are retained (as well as adjusted retention times). -xod_sub <- filterMz(xod, mz = c(300, 400)) +xod_sub <- filterMz(faahko_sub, mz = c(300, 400)) head(chromPeaks(xod_sub)) nrow(chromPeaks(xod_sub)) -nrow(chromPeaks(xod)) +nrow(chromPeaks(faahko_sub)) ## Filter on rt values. All chromatographic peaks and features within the ## retention time range are retained. Filtering is performed by default on ## adjusted retention times, if present. -xod_sub <- filterRt(xod, rt = c(2700, 2900)) +xod_sub <- filterRt(faahko_sub, rt = c(2700, 2900)) range(rtime(xod_sub)) head(chromPeaks(xod_sub)) range(chromPeaks(xod_sub)[, "rt"]) -nrow(chromPeaks(xod)) +nrow(chromPeaks(faahko_sub)) nrow(chromPeaks(xod_sub)) ## Extract a single Spectrum -xod[[4]] +faahko_sub[[4]] ## Subsetting using [ removes all preprocessing results - using ## keepAdjustedRtime = TRUE would keep adjusted retention times, if present. -xod_sub <- xod[fromFile(xod) == 1] +xod_sub <- faahko_sub[fromFile(faahko_sub) == 1] xod_sub ## Using split does also remove preprocessing results, but it supports the ## optional parameter keepAdjustedRtime. ## Split the object into a list of XCMSnExp objects, one per file -xod_list <- split(xod, f = fromFile(xod)) +xod_list <- split(faahko_sub, f = fromFile(faahko_sub)) xod_list } \seealso{ -\code{\link{XCMSnExp}} for base class documentation. +\link{XCMSnExp} for base class documentation. + +\code{\link[=XChromatograms]{XChromatograms()}} for similar filter functions on +\code{XChromatograms} objects. } \author{ Johannes Rainer diff --git a/man/XCMSnExp-inherited-methods.Rd b/man/XCMSnExp-inherited-methods.Rd index 8892cb56c..edcc74d88 100644 --- a/man/XCMSnExp-inherited-methods.Rd +++ b/man/XCMSnExp-inherited-methods.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R -\docType{methods} \name{bin,XCMSnExp-method} \alias{bin,XCMSnExp-method} \alias{clean,XCMSnExp-method} @@ -11,26 +10,34 @@ \alias{smooth,XCMSnExp-method} \title{XCMSnExp data manipulation methods inherited from MSnbase} \usage{ -\S4method{bin}{XCMSnExp}(object, binSize = 1L, msLevel.) +\S4method{bin}{XCMSnExp}(x, binSize = 1L, msLevel.) -\S4method{clean}{XCMSnExp}(object, all = FALSE, verbose = FALSE, - msLevel.) +\S4method{clean}{XCMSnExp}(object, all = FALSE, verbose = FALSE, msLevel.) \S4method{filterAcquisitionNum}{XCMSnExp}(object, n, file) \S4method{normalize}{XCMSnExp}(object, method = c("max", "sum"), ...) -\S4method{pickPeaks}{XCMSnExp}(object, halfWindowSize = 3L, - method = c("MAD", "SuperSmoother"), SNR = 0L, ...) - -\S4method{removePeaks}{XCMSnExp}(object, t = "min", verbose = FALSE, - msLevel.) - -\S4method{smooth}{XCMSnExp}(x, method = c("SavitzkyGolay", - "MovingAverage"), halfWindowSize = 2L, verbose = FALSE, ...) +\S4method{pickPeaks}{XCMSnExp}( + object, + halfWindowSize = 3L, + method = c("MAD", "SuperSmoother"), + SNR = 0L, + ... +) + +\S4method{removePeaks}{XCMSnExp}(object, t = "min", verbose = FALSE, msLevel.) + +\S4method{smooth}{XCMSnExp}( + x, + method = c("SavitzkyGolay", "MovingAverage"), + halfWindowSize = 2L, + verbose = FALSE, + ... +) } \arguments{ -\item{object}{\code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} +\item{x}{\code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} object.} \item{binSize}{\code{numeric(1)} defining the size of a bin (in Dalton).} @@ -40,6 +47,9 @@ object.} to which operations should be applied or to which the object should be subsetted.} +\item{object}{\code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} +object.} + \item{all}{For \code{clean}: \code{logical(1)}, if \code{TRUE} all zeros are removed.} @@ -50,7 +60,7 @@ displayed.} acquisition numbers of the spectra to which the data set should be sub-setted.} -\item{file}{For \code{filterAcquisitionNum}: +\item{file}{For \code{filterAcquisitionNum}: \code{integer} defining the file index within the object to subset the object by file.} @@ -77,9 +87,6 @@ documentation for details.} \item{t}{For \code{removePeaks}: either a \code{numeric(1)} or \code{"min"} defining the threshold (method) to be used. See \code{\link{removePeaks}} for details.} - -\item{x}{\code{\link{XCMSnExp}} or \code{\link{OnDiskMSnExp}} -object.} } \value{ For all methods: a \code{XCMSnExp} object. diff --git a/man/XCMSnExp-peak-grouping-results.Rd b/man/XCMSnExp-peak-grouping-results.Rd index 61972e8bc..56f16a843 100644 --- a/man/XCMSnExp-peak-grouping-results.Rd +++ b/man/XCMSnExp-peak-grouping-results.Rd @@ -1,19 +1,30 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R -\docType{methods} -\name{featureValues,XCMSnExp-method} +\name{quantify,XCMSnExp-method} +\alias{quantify,XCMSnExp-method} \alias{featureValues,XCMSnExp-method} \alias{featureValues} \title{Accessing mz-rt feature data values} \usage{ -\S4method{featureValues}{XCMSnExp}(object, method = c("medret", "maxint", - "sum"), value = "index", intensity = "into", filled = TRUE, - missing = NA) +\S4method{quantify}{XCMSnExp}(object, ...) + +\S4method{featureValues}{XCMSnExp}( + object, + method = c("medret", "maxint", "sum"), + value = "into", + intensity = "into", + filled = TRUE, + missing = NA, + msLevel = integer() +) } \arguments{ \item{object}{A \code{\link{XCMSnExp}} object providing the feature definitions.} +\item{...}{For \code{quantify}: additional parameters to be passed on to the +\code{\link{featureValues}} method.} + \item{method}{\code{character} specifying the method to resolve multi-peak mappings within the same sample, i.e. to define the \emph{representative} peak for a feature in samples where more than @@ -24,11 +35,10 @@ If \code{"maxint"}: select the peak yielding the largest signal. If \code{"maxo"}.} \item{value}{\code{character} specifying the name of the column in -\code{chromPeaks(object)} that should be returned or \code{"index"} (the -default) to return the index of the peak in the -\code{chromPeaks(object)} matrix corresponding to the -\emph{representative} peak for the feature in the respective sample. -To return the integrated peak area use \code{value = "into"}.} +\code{chromPeaks(object)} that should be returned. Defaults to +\code{"into"} in which case the integrated peak area is returned. To +get the index of the peak in the \code{chromPeaks(object)} matrix use +\code{"index"}.} \item{intensity}{\code{character} specifying the name of the column in the \code{chromPeaks(objects)} matrix containing the intensity value of the @@ -44,6 +54,10 @@ is returned in the matrix for the respective peak. See \code{NA} (the default), a \code{numeric} or \code{missing = "rowmin_half"}. The latter replaces any \code{NA} with half of the row's minimal (non-missing) value.} + +\item{msLevel}{for `featureValues`: `integer` defining the MS level(s) for +which feature values should be returned. By default, values for features +defined for all MS levels are returned.} } \value{ For \code{featureValues}: a \code{matrix} with @@ -53,6 +67,9 @@ of the features matches the order found in the \code{matrix} are the same than those of the \code{featureDefinitions} \code{DataFrame}. \code{NA} is reported for features without corresponding chromatographic peak in the respective sample(s). + +For \code{quantify}: a \code{\link{SummarizedExperiment}} representing +the preprocessing results. } \description{ \code{featureValues,XCMSnExp} : extract a \code{matrix} for @@ -62,6 +79,18 @@ Parameter \code{value} allows to define which column from the chromatographic peaks from the same sample can be assigned to a feature. Parameter \code{method} allows to specify the method to be used in such cases to chose from which of the peaks the value should be returned. +Parameter `msLevel` allows to choose a specific MS level for which feature +values should be returned (given that features have been defined for that MS +level). + +\code{quantify,XCMSnExp}: return the preprocessing results as an +\code{\link{SummarizedExperiment}} object containing the feature abundances +as assay matrix, the feature definitions (returned by +\code{\link{featureDefinitions}}) as \code{rowData} and the phenotype +information as \code{colData}. This is an ideal container for further +processing of the data. Internally, the \code{\link{featureValues}} method +is used to extract the feature abundances, parameters for that method can +be passed to \code{quantify} with \code{...}. } \note{ This method is equivalent to the \code{\link{groupval}} for @@ -71,10 +100,16 @@ after a call to \code{fillPeaks}. } \seealso{ \code{\link{XCMSnExp}} for information on the data object. + \code{\link{featureDefinitions}} to extract the \code{DataFrame} with the feature definitions. + +\code{\link{featureChromatograms}} to extract ion chromatograms for each +feature. + \code{\link{hasFeatures}} to evaluate whether the \code{\link{XCMSnExp}} provides feature definitions. + \code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. } \author{ diff --git a/man/XChromatogram.Rd b/man/XChromatogram.Rd new file mode 100644 index 000000000..46d11c57d --- /dev/null +++ b/man/XChromatogram.Rd @@ -0,0 +1,750 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XChromatograms.R, +% R/functions-XChromatogram.R, R/methods-XChromatogram.R, +% R/methods-XChromatograms.R +\name{XChromatograms} +\alias{XChromatograms} +\alias{XChromatogram} +\alias{XChromatogram-class} +\alias{XChromatograms-class} +\alias{coerce,MChromatograms,XChromatograms-method} +\alias{show,XChromatogram-method} +\alias{chromPeaks,XChromatogram-method} +\alias{filterChromPeaks} +\alias{chromPeaks<-,XChromatogram-method} +\alias{plot,XChromatogram,ANY-method} +\alias{filterMz,XChromatogram-method} +\alias{filterRt,XChromatogram-method} +\alias{hasChromPeaks,XChromatogram-method} +\alias{dropFilledChromPeaks,XChromatogram-method} +\alias{chromPeakData,XChromatogram-method} +\alias{chromPeakData<-,XChromatogram-method} +\alias{refineChromPeaks,XChromatogram,MergeNeighboringPeaksParam-method} +\alias{filterChromPeaks,XChromatogram-method} +\alias{transformIntensity,XChromatogram-method} +\alias{show,XChromatograms-method} +\alias{hasChromPeaks,XChromatograms-method} +\alias{hasFilledChromPeaks,XChromatograms-method} +\alias{chromPeaks,XChromatograms-method} +\alias{chromPeakData,XChromatograms-method} +\alias{filterMz,XChromatograms-method} +\alias{filterRt,XChromatograms-method} +\alias{plot,XChromatograms,ANY-method} +\alias{processHistory,XChromatograms-method} +\alias{hasFeatures,XChromatograms-method} +\alias{dropFeatureDefinitions,XChromatograms-method} +\alias{groupChromPeaks,XChromatograms,PeakDensityParam-method} +\alias{featureDefinitions,XChromatograms-method} +\alias{[,XChromatograms,ANY,ANY,ANY-method} +\alias{featureValues,XChromatograms-method} +\alias{plotChromPeakDensity,XChromatograms-method} +\alias{dropFilledChromPeaks,XChromatograms-method} +\alias{refineChromPeaks,XChromatograms,MergeNeighboringPeaksParam-method} +\alias{filterChromPeaks,XChromatograms-method} +\alias{transformIntensity,XChromatograms-method} +\title{Containers for chromatographic and peak detection data} +\usage{ +XChromatograms(data, phenoData, featureData, chromPeaks, chromPeakData, ...) + +XChromatogram( + rtime = numeric(), + intensity = numeric(), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), + fromFile = integer(), + aggregationFun = character(), + msLevel = 1L, + chromPeaks, + chromPeakData +) + +\S4method{show}{XChromatogram}(object) + +\S4method{chromPeaks}{XChromatogram}( + object, + rt = numeric(), + mz = numeric(), + ppm = 0, + type = c("any", "within", "apex_within"), + msLevel +) + +\S4method{chromPeaks}{XChromatogram}(object) <- value + +\S4method{plot}{XChromatogram,ANY}( + x, + col = "#00000060", + lty = 1, + type = "l", + xlab = "retention time", + ylab = "intensity", + main = NULL, + peakType = c("polygon", "point", "rectangle", "none"), + peakCol = "#00000060", + peakBg = "#00000020", + peakPch = 1, + ... +) + +\S4method{filterMz}{XChromatogram}(object, mz, ...) + +\S4method{filterRt}{XChromatogram}(object, rt, ...) + +\S4method{hasChromPeaks}{XChromatogram}(object) + +\S4method{dropFilledChromPeaks}{XChromatogram}(object) + +\S4method{chromPeakData}{XChromatogram}(object) + +\S4method{chromPeakData}{XChromatogram}(object) <- value + +\S4method{refineChromPeaks}{XChromatogram,MergeNeighboringPeaksParam}(object, param = MergeNeighboringPeaksParam()) + +\S4method{filterChromPeaks}{XChromatogram}(object, method = c("keepTop"), ...) + +\S4method{transformIntensity}{XChromatogram}(object, FUN = identity) + +\S4method{show}{XChromatograms}(object) + +\S4method{hasChromPeaks}{XChromatograms}(object) + +\S4method{hasFilledChromPeaks}{XChromatograms}(object) + +\S4method{chromPeaks}{XChromatograms}( + object, + rt = numeric(), + mz = numeric(), + ppm = 0, + type = c("any", "within", "apex_within"), + msLevel +) + +\S4method{chromPeakData}{XChromatograms}(object) + +\S4method{filterMz}{XChromatograms}(object, mz, ...) + +\S4method{filterRt}{XChromatograms}(object, rt, ...) + +\S4method{plot}{XChromatograms,ANY}( + x, + col = "#00000060", + lty = 1, + type = "l", + xlab = "retention time", + ylab = "intensity", + main = NULL, + peakType = c("polygon", "point", "rectangle", "none"), + peakCol = "#00000060", + peakBg = "#00000020", + peakPch = 1, + ... +) + +\S4method{processHistory}{XChromatograms}(object, fileIndex, type) + +\S4method{hasFeatures}{XChromatograms}(object, ...) + +\S4method{dropFeatureDefinitions}{XChromatograms}(object, ...) + +\S4method{groupChromPeaks}{XChromatograms,PeakDensityParam}(object, param) + +\S4method{featureDefinitions}{XChromatograms}( + object, + mz = numeric(), + rt = numeric(), + ppm = 0, + type = c("any", "within", "apex_within") +) + +\S4method{[}{XChromatograms,ANY,ANY,ANY}(x, i, j, drop = TRUE) + +\S4method{featureValues}{XChromatograms}( + object, + method = c("medret", "maxint", "sum"), + value = "into", + intensity = "into", + missing = NA, + ... +) + +\S4method{plotChromPeakDensity}{XChromatograms}( + object, + param, + col = "#00000060", + xlab = "retention time", + main = NULL, + peakType = c("polygon", "point", "rectangle", "none"), + peakCol = "#00000060", + peakBg = "#00000020", + peakPch = 1, + simulate = TRUE, + ... +) + +\S4method{dropFilledChromPeaks}{XChromatograms}(object) + +\S4method{refineChromPeaks}{XChromatograms,MergeNeighboringPeaksParam}(object, param = MergeNeighboringPeaksParam()) + +\S4method{filterChromPeaks}{XChromatograms}(object, method = c("keepTop"), ...) + +\S4method{transformIntensity}{XChromatograms}(object, FUN = identity) +} +\arguments{ +\item{data}{For \code{XChromatograms}: \code{list} of \code{Chromatogram} or +\code{XChromatogram} objects.} + +\item{phenoData}{For \code{XChromatograms}: either a \code{data.frame}, +\code{AnnotatedDataFrame} or \code{NAnnotatedDataFrame} describing the +phenotypical information of the samples.} + +\item{featureData}{For \code{XChromatograms}: either a \code{data.frame} or +\code{AnnotatedDataFrame} with additional information for each row of +chromatograms.} + +\item{chromPeaks}{For \code{XChromatogram}: \code{matrix} with required columns +\code{"rt"}, \code{"rtmin"}, \code{"rtmax"}, \code{"into"}, \code{"maxo"} and \code{"sn"}. +For \code{XChromatograms}: \code{list}, same length than \code{data}, with the +chromatographic peaks for each chromatogram. Each element has to be +a \code{matrix}, the ordering has to match the order of the chromatograms +in \code{data}.} + +\item{chromPeakData}{For \code{XChromatogram}: \code{DataFrame} with optional +additional annotations for each chromatographic peak. The number of rows +has to match the number of chromatographic peaks.} + +\item{...}{For \code{filterChromPeaks}: additional parameters defining how to +filter chromatographic peaks. See function description below for details.} + +\item{rtime}{For \code{XChromatogram}: \code{numeric} with the retention times +(length has to be equal to the length of \code{intensity}).} + +\item{intensity}{For \code{XChromatogram}: \code{numeric} with the intensity values +(length has to be equal to the length of \code{rtime}). + +\if{html}{\out{
}}\preformatted{For `featureValues`: `character(1)` specifying the name +of the column in `chromPeaks(object)` containing the intensity value +of the peak that should be used for the `method = "maxint"` conflict +resolution if. +}\if{html}{\out{
}}} + +\item{mz}{For \code{XChromatogram}: \code{numeric(2)} representing the m/z value +range (min, max) on which the chromatogram was created. This is +supposed to contain the \emph{real} range of m/z values in contrast +to the \code{filterMz} below. +For \code{chromPeaks} and \code{featureDefinitions}: \code{numeric(2)} defining the +m/z range for which chromatographic peaks or features should be returned. +For \code{filterMz}: \code{numeric(2)} defining the m/z range for which +chromatographic peaks should be retained.#'} + +\item{filterMz}{For \code{XChromatogram}: \code{numeric(2)} representing the m/z +value range (min, max) that was used to filter the original object +on m/z dimension. If not applicable use \code{filterMz = c(0, 0)}.} + +\item{precursorMz}{For \code{XChromatogram}: \code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the precursor ion. See details for more information.} + +\item{productMz}{For \code{XChromatogram}: \code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the product. See details for more information.} + +\item{fromFile}{For \code{XChromatogram}: \code{integer(1)} the index of the file +within the \code{OnDiskMSnExp} or \code{MSnExp} object from which the chromatogram +was extracted.} + +\item{aggregationFun}{For \code{XChromatogram}: \code{character(1)} specifying the +function that was used to aggregate intensity values for the same +retention time across the m/z range.} + +\item{msLevel}{For \code{XChromatogram}: \code{integer} with the MS level from which +the chromatogram was extracted. +For \code{chromPeaks} and \code{chromPeakData}: extract chromatographic peaks of a +certain MS level.} + +\item{object}{An \code{XChromatogram} or \code{XChromatograms} object.} + +\item{rt}{For \code{chromPeaks} and \code{featureDefinitions}: \code{numeric(2)} defining +the retention time range for which chromatographic peaks or features +should be returned. +For \code{filterRt}: \code{numeric(2)} defining the retention time range to +reduce \code{object} to.} + +\item{ppm}{For \code{chromPeaks} and \code{featureDefinitions}: \code{numeric(1)} defining +a ppm to expand the provided m/z range.} + +\item{type}{For \code{chromPeaks} and \code{featureDefinitions}: \code{character(1)} +defining which peaks or features to return if \code{rt} or \code{mz} is provided: +\code{"any"} (default) return all peaks that are even +partially overlapping with \code{rt}, \code{"within"} return peaks that are +completely within \code{rt} and \code{"apex_within"} return peaks which apex +is within \code{rt}. + +\if{html}{\out{
}}\preformatted{For `plot`: what type of plot should be used for the +chromatogram (such as `"l"` for lines, `"p"` for points etc), see help +of [plot()] in the `graphics` package for more details. +For `processHistory`: restrict returned processing steps to specific +types. Use [processHistoryTypes()] to list all supported values. +}\if{html}{\out{
}}} + +\item{value}{For \verb{chromPeaks<-}: a numeric \code{matrix} with required columns +\code{"rt"}, \code{"rtmin"}, \code{"rtmax"}, \code{"into"} and \code{"maxo"}. + +\if{html}{\out{
}}\preformatted{For `featureValues`: `character(1)` specifying the name of the column in +`chromPeaks(object)` that should be returned or `"index"` (default) to +return the index of the peak associated with the feature in each sample. +To return the integrated peak area instead of the index use +`value = "into"`. +}\if{html}{\out{
}}} + +\item{x}{For \code{plot}: an \code{XChromatogram} or \code{XChromatograms} object.} + +\item{col}{For \code{plot}: the color to be used to draw the chromatogram.} + +\item{lty}{For \code{plot} and \code{plotChromPeakDensity}: the line type.} + +\item{xlab}{For \code{plot} and \code{plotChromPeakDensity}: the x axis label.} + +\item{ylab}{For \code{plot}: the y axis label.} + +\item{main}{For \code{plot} and \code{plotChromPeakDensity}: an optional title for +the plot.} + +\item{peakType}{For \code{plot} and \code{plotChromPeakDensity}: +\code{character(1)} defining how (and if) identified chromatographic peak +within the chromatogram should be plotted. Options +are \code{"polygon"} (default): draw the peak borders with the \code{peakCol} color +and fill the peak area with the \code{peakBg} color, \code{"point"}: indicate the +peak's apex with a point, \code{"rectangle"}: draw a rectangle around the +identified peak and \code{"none"}: don't draw peaks.} + +\item{peakCol}{For \code{plot} and \code{plotChromPeakDensity}: the foreground color +for the peaks. For \code{peakType = "polygon"} and \code{peakType = "rectangle"} +this is the color for the border. Use \code{NA} to not use a foreground +color. This should either be a single color or a vector of colors with +the same length than \code{chromPeaks(x)} has rows.} + +\item{peakBg}{For \code{plot} and \code{plotChromPeakDensity}: the background color +for the peaks. For \code{peakType = "polygon"} and \code{peakType = "rectangle"} +the peak are or rectangle will be filled with this color. Use \code{NA} to +skip. This should be either a single color or a vector of colors with +the same length than \code{chromPeaks(x)} has rows.} + +\item{peakPch}{For \code{plot} and \code{plotChromPeakDensity}: the point character +to be used for \code{peakType = "point"}. See \code{\link[=plot]{plot()}} in the \code{graphics} +package for more details.} + +\item{param}{For \code{groupChromPeaks} and \code{plotChromPeakDensity}: a +\code{\link[=PeakDensityParam]{PeakDensityParam()}} object with the settings for the \emph{peak density} +correspondence analysis algorithm.} + +\item{method}{For \code{featureValues}: \code{character(1)} specifying the method to +resolve multi-peak mappings within the sample sample, i.e. to select +the \emph{representative} peak for a feature for which more than one peak +was assigned in one sample. Options are \code{"medret"} (default): select the +peak closest to the median retention time of the feature, \code{"maxint"}: +select the peak with the largest signal and \code{"sum"}: sum the values +of all peaks (only if \code{value} is \code{"into"} or \code{"maxo"}). +For \code{filterChromPeaks}: \code{character(1)} defining the method that should +be used to filter chromatographic peaks. See help on \code{filterChromPeaks} +below for details.} + +\item{FUN}{For \code{transformIntensity}: a function to transform the intensity +values of \code{object}.} + +\item{fileIndex}{For \code{processHistory}: optional \code{integer} specifying the +index of the files/samples for which the \link{ProcessHistory} objects should +be returned.} + +\item{i}{For \code{[}: \code{integer} with the row indices to subset the +\code{XChromatograms} object.} + +\item{j}{For \code{[}: \code{integer} with the column indices to subset the +\code{XChromatograms} object.} + +\item{drop}{For \code{[}: \code{logical(1)} whether the dimensionality should be +dropped (if possible). Defaults to \code{drop = TRUE}, thus, if length of \code{i} +and \code{j} is 1 a \code{XChromatogram} is returned. Note that \code{drop} is ignored +if length of \code{i} or \code{j} is larger than 1, thus a \code{XChromatograms} is +returned.} + +\item{missing}{For \code{featureValues}: how missing values should be reported. +Allowed values are \code{NA} (default), a \code{numeric(1)} to replace \code{NA}s with +that value or \code{missing = "rowmin_half"} to replace \code{NA}s with half +of the row's minimal (non-missing) value.} + +\item{simulate}{For \code{plotChromPeakDensity}: \code{logical(1)} whether a +correspondence analysis should be \emph{simulated} based on the available +data and the provided \code{\link[=PeakDensityParam]{PeakDensityParam()}} \code{param} argument. See +section \emph{Correspondence analysis} for details.} +} +\value{ +See help of the individual functions. +} +\description{ +The \code{XChromatogram} object allows to store chromatographic data (e.g. +an extracted ion chromatogram) along with identified chromatographic peaks +within that data. The object inherits all functions from the \code{\link[=Chromatogram]{Chromatogram()}} +object in the \code{MSnbase} package. + +Multiple \code{XChromatogram} objects can be stored in a \code{XChromatograms} object. +This class extends \code{\link[=MChromatograms]{MChromatograms()}} from the \code{MSnbase} package and allows +thus to arrange chromatograms in a matrix-like structure, columns +representing samples and rows m/z-retention time ranges. + +All functions are described (grouped into topic-related sections) after the +\strong{Arguments} section. +} +\note{ +Highlighting the peak area(s) in an \code{XChromatogram} or \code{XChromatograms} +object (\code{plot} with \code{peakType = "polygon"}) draws a polygon representing +the displayed chromatogram from the peak's minimal retention time to the +maximal retention time. If the \code{XChromatograms} was extracted from an +\code{\link[=XCMSnExp]{XCMSnExp()}} object with the \code{\link[=chromatogram]{chromatogram()}} function this might not +represent the actual identified peak area if the m/z range that was +used to extract the chromatogram was larger than the peak's m/z. +} +\section{Creation of objects}{ + + +Objects can be created with the contructor function \code{XChromatogram} and +\code{XChromatograms}, respectively. Also, they can be coerced from +\link{Chromatogram} or \code{\link[=MChromatograms]{MChromatograms()}} objects using +\code{as(object, "XChromatogram")} or \code{as(object, "XChromatograms")}. +} + +\section{Filtering and subsetting}{ + + +Besides classical subsetting with \code{[} specific filter operations on +\code{\link[=MChromatograms]{MChromatograms()}} and \code{XChromatograms} objects are available. See +\code{\link[=filterColumnsIntensityAbove]{filterColumnsIntensityAbove()}} for more details. + + +\itemize{ +\item \code{[} allows to subset a \code{XChromatograms} object by row (\code{i}) and column +(\code{j}), with \code{i} and \code{j} being of type \code{integer}. The \code{featureDefinitions} +will also be subsetted accordingly and the \code{peakidx} column updated. +\item \code{filterMz} filters the chromatographic peaks within an \code{XChromatogram} or +\code{XChromatograms}, if a column \code{"mz"} is present in the \code{chromPeaks} matrix. +This would be the case if the \code{XChromatogram} was extracted from an +\code{\link[=XCMSnExp]{XCMSnExp()}} object with the \code{\link[=chromatogram]{chromatogram()}} function. All +chromatographic peaks with their m/z within the m/z range defined by \code{mz} +will be retained. Also feature definitions (if present) will be subset +accordingly. The function returns a filtered \code{XChromatogram} or +\code{XChromatograms} object. +\item \code{filterRt} filters chromatogram(s) by the provided retention time range. +All eventually present chromatographic peaks with their apex within the +retention time range specified with \code{rt} will be retained. Also feature +definitions, if present, will be filtered accordingly. The function +returns a filtered \code{XChromatogram} or \code{XChromatograms} object. +} +} + +\section{Accessing data}{ + + +See also help of \link{Chromatogram} in the \code{MSnbase} package for general +information and data access. The methods listed here are specific for +\code{XChromatogram} and \code{XChromatograms} objects. +\itemize{ +\item \code{chromPeaks}, \verb{chromPeaks<-}: extract or set the matrix with the +chromatographic peak definitions. Parameter \code{rt} allows to specify a +retention time range for which peaks should be returned along with +parameter \code{type} that defines how \emph{overlapping} is defined (parameter +description for details). For \code{XChromatogram} objects the function returns +a \code{matrix} with columns \code{"rt"} (retention time of the peak apex), +\code{"rtmin"} (the lower peak boundary), \code{"rtmax"} (the upper peak boundary), +\code{"into"} (the ingegrated peak signal/area of the peak), \code{"maxo"} (the +maximum instensity of the peak and \code{"sn"} (the signal to noise ratio). +Note that, depending on the peak detection algorithm, the matrix may +contain additional columns. +For \code{XChromatograms} objects the \code{matrix} contains also columns \code{"row"} +and \code{"column"} specifying in which chromatogram of \code{object} the peak was +identified. Chromatographic peaks are ordered by row. +\item \code{chromPeakData}, \verb{chromPeakData<-}: extract or set the \code{\link[=DataFrame]{DataFrame()}} with +optional chromatographic peak annotations. +\item \code{hasChromPeaks}: infer whether a \code{XChromatogram} (or \code{XChromatograms}) +has chromatographic peaks. For \code{XChromatogram}: returns a \code{logical(1)}, +for \code{XChromatograms}: returns a \code{matrix}, same dimensions than \code{object} +with either \code{TRUE} or \code{FALSE} if chromatographic peaks are available in +the chromatogram at the respective position. +\item \code{hasFilledChromPeaks}: whether a \code{XChromatogram} (or a \code{XChromatogram} in +a \code{XChromatograms}) has filled-in chromatographic peaks. +For \code{XChromatogram}: returns a \code{logical(1)}, +for \code{XChromatograms}: returns a \code{matrix}, same dimensions than \code{object} +with either \code{TRUE} or \code{FALSE} if chromatographic peaks are available in +the chromatogram at the respective position. +\item \code{dropFilledChromPeaks}: removes filled-in chromatographic peaks. See +\code{\link[=dropFilledChromPeaks]{dropFilledChromPeaks()}} help for \code{\link[=XCMSnExp]{XCMSnExp()}} objects for more +information. +\item \code{hasFeatures}: for \code{XChromatograms} objects only: if correspondence +analysis has been performed and m/z-rt feature definitions are present. +Returns a \code{logical(1)}. +\item \code{dropFeatureDefinitions}: for \code{XChrmomatograms} objects only: delete any +correspondence analysis results (and related process history). +\item \code{featureDefinitions}: for \code{XChromatograms} objects only. Extract the +results from the correspondence analysis (performed with +\code{groupChromPeaks}). Returns a \code{DataFrame} with the properties of the +defined m/z-rt features: their m/z and retention time range. Columns +\code{peakidx} and \code{row} contain the index of the chromatographic peaks in the +\code{chromPeaks} matrix associated with the feature and the row in the +\code{XChromatograms} object in which the feature was defined. Similar to the +\code{chromPeaks} method it is possible to filter the returned feature matrix +with the \code{mz}, \code{rt} and \code{ppm} parameters. +\item \code{featureValues}: for \code{XChromatograms} objects only. Extract the abundance +estimates for the individuals features. Note that by default (with +parameter \code{value = "index"} a \code{matrix} of indices of the peaks in the +\code{chromPeaks} matrix associated to the feature is returned. To extract the +integrated peak area use \code{value = "into"}. The function returns a \code{matrix} +with one row per feature (in \code{featureDefinitions}) and each column being +a sample (i.e. column of \code{object}). For features without a peak associated +in a certain sample \code{NA} is returned. This can be changed with the +\code{missing} argument of the function. +\item \code{filterChromPeaks}: \emph{filters} chromatographic peaks in \code{object} depending +on parameter \code{method} and method-specific parameters passed as additional +arguments with \code{...}. Available methods are: +\itemize{ +\item \code{method = "keepTop"}: keep top \code{n} (default \code{n = 1L}) peaks in each +chromatogram ordered by column \code{order} (defaults to \code{order = "maxo"}). +Parameter \code{decreasing} (default \code{decreasing = TRUE}) can be used to +order peaks in descending (\code{decreasing = TRUE}) or ascending ( +\code{decreasing = FALSE}) order to keep the top \code{n} peaks with largest or +smallest values, respectively. +} +\item \code{processHistory}: returns a \code{list} of \link{ProcessHistory} objects representing +the individual performed processing steps. Optional parameters \code{type} and +\code{fileIndex} allow to further specify which processing steps to return. +} +} + +\section{Manipulating data}{ + +\itemize{ +\item \code{transformIntensity}: transforms the intensity values of the chromatograms +with provided function \code{FUN}. See \code{\link[=transformIntensity]{transformIntensity()}} in the \code{MSnbase} +package for details. For \code{XChromatogram} and \code{XChromatograms} in addition +to the intensity values also columns \code{"into"} and \code{"maxo"} in the object's +\code{chromPeaks} matrix are transformed by the same function. +} +} + +\section{Plotting and visualizing}{ + +\itemize{ +\item \code{plot} draws the chromatogram and highlights in addition any +chromatographic peaks present in the \code{XChromatogram} or \code{XChromatograms} +(unless \code{peakType = "none"} was specified). To draw peaks in different +colors a vector of color definitions with length equal to +\code{nrow(chromPeaks(x))} has to be submitted with \code{peakCol} and/or \code{peakBg} +defining one color for each peak (in the order as peaks are in +\verb{chromPeaks(x))}. For base peak chromatograms or total ion chromatograms +it might be better to set \code{peakType = "none"} to avoid generating busy +plots. +\item \code{plotChromPeakDensity}: visualize \emph{peak density}-based correspondence +analysis results. See section \emph{Correspondence analysis} for more details. +} +} + +\section{Chromatographic peak detection}{ + + +See \link{findChromPeaks-Chromatogram-CentWaveParam} for information. + +After chromatographic peak detection it is also possible to \emph{refine} +identified chromatographic peaks with the \code{refineChromPeaks} method (e.g. to +reduce peak detection artifacts). Currently, only peak refinement using the +\emph{merge neighboring peaks} method is available (see +\code{\link[=MergeNeighboringPeaksParam]{MergeNeighboringPeaksParam()}} for a detailed description of the approach. +} + +\section{Correspondence analysis}{ + + +Identified chromatographic peaks in an \code{XChromatograms} object can be grouped +into \emph{features} with the \code{groupChromPeaks} function. Currently, such a +correspondence analysis can be performed with the \emph{peak density} method +(see \link{groupChromPeaks} for more details) specifying the algorithm settings +with a \code{\link[=PeakDensityParam]{PeakDensityParam()}} object. A correspondence analysis is performed +separately for each row in the \code{XChromatograms} object grouping +chromatographic peaks across samples (columns). + +The analysis results are stored in the returned \code{XChromatograms} object +and can be accessed with the \code{featureDefinitions} method which returns a +\code{DataFrame} with one row for each feature. Column \code{"row"} specifies in +which row of the \code{XChromatograms} object the feature was identified. + +The \code{plotChromPeakDensity} method can be used to visualize \emph{peak density} +correspondence results, or to \emph{simulate} a peak density correspondence +analysis on chromatographic data. The resulting plot consists of two panels, +the upper panel showing the chromatographic data as well as the identified +chromatographic peaks, the lower panel the distribution of peaks (the peak +density) along the retention time axis. This plot shows each peak as a point +with it's peak's retention time on the x-axis, and the sample in which it +was found on the y-axis. The distribution of peaks along the retention time +axis is visualized with a density estimate. Grouped chromatographic peaks +are indicated with grey shaded rectangles. Parameter \code{simulate} allows to +define whether the correspondence analysis should be simulated ( +\code{simulate=TRUE}, based on the available data and the provided +\code{\link[=PeakDensityParam]{PeakDensityParam()}} parameter class) or not (\code{simulate=FALSE}). For the +latter it is assumed that a correspondence analysis has been performed with +the \emph{peak density} method on the \code{object}. +See examples below. + +Abundance estimates for each feature can be extracted with the +\code{featureValues} function using parameter \code{value = "into"} to extract the +integrated peak area for each feature. The result is a \code{matrix}, columns +being samples and rows features. +} + +\examples{ + +## ---- Creation of XChromatograms ---- +## +## Create a XChromatograms from Chromatogram objects +dta <- list(Chromatogram(rtime = 1:7, c(3, 4, 6, 12, 8, 3, 2)), + Chromatogram(1:10, c(4, 6, 3, 4, 7, 13, 43, 34, 23, 9))) + +## Create an XChromatograms without peak data +xchrs <- XChromatograms(dta) + +## Create an XChromatograms with peaks data +pks <- list(matrix(c(4, 2, 5, 30, 12, NA), nrow = 1, + dimnames = list(NULL, c("rt", "rtmin", "rtmax", "into", "maxo", "sn"))), + NULL) +xchrs <- XChromatograms(dta, chromPeaks = pks) + +## Create an XChromatograms from XChromatogram objects +dta <- lapply(dta, as, "XChromatogram") +chromPeaks(dta[[1]]) <- pks[[1]] + +xchrs <- XChromatograms(dta, nrow = 1) + +hasChromPeaks(xchrs) + +## Loading a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Subset the dataset to the first and third file. +xod_sub <- filterFile(faahko_sub, file = c(1, 3)) + +od <- as(xod_sub, "OnDiskMSnExp") + +## Extract chromatograms for a m/z - retention time slice +chrs <- chromatogram(od, mz = 344, rt = c(2500, 3500)) +chrs + +## --------------------------------------------------- ## +## Chromatographic peak detection ## +## --------------------------------------------------- ## +## Perform peak detection using CentWave +xchrs <- findChromPeaks(chrs, param = CentWaveParam()) +xchrs + +## Do we have chromatographic peaks? +hasChromPeaks(xchrs) + +## Process history +processHistory(xchrs) + +## The chromatographic peaks, columns "row" and "column" provide information +## in which sample the peak was identified. +chromPeaks(xchrs) + +## Spectifically extract chromatographic peaks for one sample/chromatogram +chromPeaks(xchrs[1, 2]) + +## Plot the results +plot(xchrs) + +## Plot the results using a different color for each sample +sample_colors <- c("#ff000040", "#00ff0040", "#0000ff40") +cols <- sample_colors[chromPeaks(xchrs)[, "column"]] +plot(xchrs, col = sample_colors, peakBg = cols) + +## Indicate the peaks with a rectangle +plot(xchrs, col = sample_colors, peakCol = cols, peakType = "rectangle", + peakBg = NA) + +## --------------------------------------------------- ## +## Correspondence analysis ## +## --------------------------------------------------- ## +## Group chromatographic peaks across samples +prm <- PeakDensityParam(sampleGroup = rep(1, 2)) +res <- groupChromPeaks(xchrs, param = prm) + +hasFeatures(res) +featureDefinitions(res) + +## Plot the correspondence results. Use simulate = FALSE to show the +## actual results. Grouped chromatographic peaks are indicated with +## grey shaded rectangles. +plotChromPeakDensity(res, simulate = FALSE) + +## Simulate a correspondence analysis based on different settings. Larger +## bw will increase the smoothing of the density estimate hence grouping +## chromatographic peaks that are more apart on the retention time axis. +prm <- PeakDensityParam(sampleGroup = rep(1, 3), bw = 60) +plotChromPeakDensity(res, param = prm) + +## Delete the identified feature definitions +res <- dropFeatureDefinitions(res) +hasFeatures(res) + +## Create a XChromatogram object +pks <- matrix(nrow = 1, ncol = 6) +colnames(pks) <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn") +pks[, "rtmin"] <- 2 +pks[, "rtmax"] <- 9 +pks[, "rt"] <- 4 +pks[, "maxo"] <- 19 +pks[, "into"] <- 93 + +xchr <- XChromatogram(rtime = 1:10, + intensity = c(4, 8, 14, 19, 18, 12, 9, 8, 5, 2), + chromPeaks = pks) +xchr + +## Add arbitrary peak annotations +df <- DataFrame(peak_id = c("a")) +xchr <- XChromatogram(rtime = 1:10, + intensity = c(4, 8, 14, 19, 18, 12, 9, 8, 5, 2), + chromPeaks = pks, chromPeakData = df) +xchr +chromPeakData(xchr) + +## Extract the chromatographic peaks +chromPeaks(xchr) + +## Plotting of a single XChromatogram object +## o Don't highlight chromatographic peaks +plot(xchr, peakType = "none") + +## o Indicate peaks with a polygon +plot(xchr) + +## Add a second peak to the data. +pks <- rbind(chromPeaks(xchr), c(7, 7, 10, NA, 15, NA)) +chromPeaks(xchr) <- pks + +## Plot the peaks in different colors +plot(xchr, peakCol = c("#ff000080", "#0000ff80"), + peakBg = c("#ff000020", "#0000ff20")) + +## Indicate the peaks as rectangles +plot(xchr, peakCol = c("#ff000060", "#0000ff60"), peakBg = NA, + peakType = "rectangle") + +## Filter the XChromatogram by retention time +xchr_sub <- filterRt(xchr, rt = c(4, 6)) +xchr_sub +plot(xchr_sub) +} +\seealso{ +\link[=findChromPeaks-Chromatogram-CentWaveParam]{findChromPeaks-centWave} for peak +detection on \code{\link[=MChromatograms]{MChromatograms()}} objects. +} +\author{ +Johannes Rainer +} diff --git a/man/adjustRtime-obiwarp.Rd b/man/adjustRtime-obiwarp.Rd index 719d3ce06..ba449dd51 100644 --- a/man/adjustRtime-obiwarp.Rd +++ b/man/adjustRtime-obiwarp.Rd @@ -7,7 +7,6 @@ \alias{ObiwarpParam-class} \alias{ObiwarpParam} \alias{adjustRtime,OnDiskMSnExp,ObiwarpParam-method} -\alias{show,ObiwarpParam-method} \alias{binSize,ObiwarpParam-method} \alias{binSize<-,ObiwarpParam-method} \alias{centerSample,ObiwarpParam-method} @@ -46,18 +45,29 @@ \alias{initPenalty} \alias{initPenalty<-,ObiwarpParam-method} \alias{initPenalty<-} +\alias{subset,ObiwarpParam-method} +\alias{subset<-,ObiwarpParam-method} +\alias{subsetAdjust,ObiwarpParam-method} +\alias{subsetAdjust<-,ObiwarpParam-method} \alias{adjustRtime,XCMSnExp,ObiwarpParam-method} \title{Align retention times across samples using Obiwarp} \usage{ -ObiwarpParam(binSize = 1, centerSample = integer(), response = 1L, - distFun = "cor_opt", gapInit = numeric(), gapExtend = numeric(), - factorDiag = 2, factorGap = 1, localAlignment = FALSE, - initPenalty = 0) - -\S4method{adjustRtime}{OnDiskMSnExp,ObiwarpParam}(object, param, - msLevel = 1L) - -\S4method{show}{ObiwarpParam}(object) +ObiwarpParam( + binSize = 1, + centerSample = integer(), + response = 1L, + distFun = "cor_opt", + gapInit = numeric(), + gapExtend = numeric(), + factorDiag = 2, + factorGap = 1, + localAlignment = FALSE, + initPenalty = 0, + subset = integer(), + subsetAdjust = c("average", "previous") +) + +\S4method{adjustRtime}{OnDiskMSnExp,ObiwarpParam}(object, param, msLevel = 1L) \S4method{binSize}{ObiwarpParam}(object) @@ -99,8 +109,15 @@ ObiwarpParam(binSize = 1, centerSample = integer(), response = 1L, \S4method{initPenalty}{ObiwarpParam}(object) <- value -\S4method{adjustRtime}{XCMSnExp,ObiwarpParam}(object, param, - msLevel = 1L) +\S4method{subset}{ObiwarpParam}(x) + +\S4method{subset}{ObiwarpParam}(object) <- value + +\S4method{subsetAdjust}{ObiwarpParam}(object) + +\S4method{subsetAdjust}{ObiwarpParam}(object) <- value + +\S4method{adjustRtime}{XCMSnExp,ObiwarpParam}(object, param, msLevel = 1L) } \arguments{ \item{binSize}{\code{numeric(1)} defining the bin size (in mz dimension) @@ -109,7 +126,9 @@ parameter in \code{\link{profile-matrix}} documentation for more details.} \item{centerSample}{\code{integer(1)} defining the index of the center sample in the experiment. It defaults to -\code{floor(median(1:length(fileNames(object))))}.} +\code{floor(median(1:length(fileNames(object))))}. Note that if +\code{subset} is used, the index passed with \code{centerSample} is +within these subset samples.} \item{response}{\code{numeric(1)} defining the \emph{responsiveness} of warping with \code{response = 0} giving linear warping on start and end @@ -146,6 +165,16 @@ performed instead of the default global alignment.} \item{initPenalty}{\code{numeric(1)} defining the penalty for initiating an alignment (for local alignment only).} +\item{subset}{\code{integer} with the indices of samples within the +experiment on which the alignment models should be estimated. Samples +not part of the subset are adjusted based on the closest subset sample. +See description above for more details.} + +\item{subsetAdjust}{\code{character} specifying the method with which +non-subset samples should be adjusted. Supported options are +\code{"previous"} and \code{"average"} (default). See description above +for more information.} + \item{object}{For \code{adjustRtime}: an \code{\link{XCMSnExp}} object. For all other methods: a \code{ObiwarpParam} object.} @@ -157,6 +186,8 @@ the alignment method.} time should be performed.} \item{value}{The value for the slot.} + +\item{x}{a \code{PeakGroupsParam} object.} } \value{ The \code{ObiwarpParam} function returns a @@ -178,11 +209,37 @@ the adjusted retention times per spectra (in the same order than } \description{ This method performs retention time adjustment using the - Obiwarp method [Prince 2006]. It is based on the code at - \url{http://obi-warp.sourceforge.net} but supports alignment of multiple - samples by aligning each against a \emph{center} sample. The alignment is - performed directly on the \code{\link{profile-matrix}} and can hence be - performed independently of the peak detection or peak grouping. +Obiwarp method [Prince 2006]. It is based on the code at +\url{http://obi-warp.sourceforge.net} but supports alignment of multiple +samples by aligning each against a \emph{center} sample. The alignment is +performed directly on the \code{\link{profile-matrix}} and can hence be +performed independently of the peak detection or peak grouping. + +It is also possible to exclude certain samples within an experiment from +the estimation of the alignment models. The parameter \code{subset} +allows to define the indices of samples within \code{object} that should +be aligned. Samples not part of this \code{subset} are left out in the +estimation of the alignment models, but their retention times are +subsequently adjusted based on the alignment results of the closest sample +in \code{subset} (close in terms of position within the \code{object}). +Alignment could thus be performed on only \emph{real} samples leaving out +e.g. blanks, which are then in turn adjusted based on the closest real +sample. Here it is up to the user to ensure that the samples within +\code{object} are ordered correctly (e.g. by injection index). + +How the non-subset samples are adjusted bases also on the parameter +\code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset +sample is adjusted based on the closest previous subset sample which results +in most cases with adjusted retention times of the non-subset sample being +identical to the subset sample on which the adjustment bases. The second, +default, option is to use \code{subsetAdjust = "average"} in which case +each non subset sample is adjusted based on the average retention time +adjustment from the previous and following subset sample. For the average +a weighted mean is used with weights being the inverse of the distance of +the non-subset sample to the subset samples used for alignment. + +See also section \emph{Alignment of experiments including blanks} in the +\emph{xcms} vignette for an example. The \code{ObiwarpParam} class allows to specify all settings for the retention time adjustment based on the \emph{obiwarp} @@ -219,6 +276,12 @@ The \code{ObiwarpParam} class allows to specify all \code{initPenalty},\code{initPenalty<-}: getter and setter for the \code{initPenalty} slot of the object. +\code{subset},\code{subset<-}: getter and + setter for the \code{subset} slot of the object. + +\code{subsetAdjust},\code{subsetAdjust<-}: getter and + setter for the \code{subsetAdjust} slot of the object. + \code{adjustRtime,XCMSnExp,ObiwarpParam}: performs retention time correction/alignment based on the total mz-rt data using the \emph{obiwarp} method. @@ -226,16 +289,15 @@ data using the \emph{obiwarp} method. \section{Slots}{ \describe{ -\item{\code{.__classVersion__,binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty,subset,subsetAdjust}}{See +corresponding parameter above.} }} \note{ These methods and classes are part of the updated and modernized - \code{xcms} user interface which will eventually replace the - \code{\link{retcor}} methods. All of the settings to the alignment - algorithm can be passed with a \code{ObiwarpParam} object. +\code{xcms} user interface which will eventually replace the +\code{\link{retcor}} methods. All of the settings to the alignment +algorithm can be passed with a \code{ObiwarpParam} object. Alignment using obiwarp is performed on the retention time of spectra of on MS level. Retention times for spectra of other MS levels are @@ -247,16 +309,17 @@ all peak grouping (correspondence) results and any previous retention time adjustment results to be dropped. } \examples{ -library(faahKO) -library(MSnbase) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) -## Perform retention time correction on the OnDiskMSnExp: -res <- adjustRtime(raw_data, param = ObiwarpParam()) +## Perform retention time correction: +res <- adjustRtime(faahko_sub, param = ObiwarpParam()) ## As a result we get a numeric vector with the adjusted retention times for ## all spectra. @@ -264,25 +327,8 @@ head(res) ## We can split this by file to get the adjusted retention times for each ## file -resL <- split(res, fromFile(raw_data)) - -############################## -## Perform retention time correction on an XCMSnExp: -## -## Perform first the chromatographic peak detection using the matchedFilter -## method. -mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -res <- findChromPeaks(raw_data, param = mfp) - -## Performing the retention time adjustment using obiwarp. -res_2 <- adjustRtime(res, param = ObiwarpParam()) - -head(rtime(res_2)) -head(rtime(raw_data)) +resL <- split(res, fromFile(res)) -## Also the retention times of the detected peaks were adjusted. -tail(chromPeaks(res)) -tail(chromPeaks(res_2)) } \references{ John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of @@ -300,8 +346,9 @@ ESI-LC-MS Proteomic Data Sets by Ordered Bijective Interpolated Warping" \code{\link{XCMSnExp}} for the object containing the results of the alignment. -Other retention time correction methods: \code{\link{adjustRtime-peakGroups}}, - \code{\link{adjustRtime}} +Other retention time correction methods: +\code{\link{adjustRtime-peakGroups}}, +\code{\link{adjustRtime}()} } \author{ Colin Smith, Johannes Rainer diff --git a/man/adjustRtime-peakGroups.Rd b/man/adjustRtime-peakGroups.Rd index 777fc93f2..0fade0be5 100644 --- a/man/adjustRtime-peakGroups.Rd +++ b/man/adjustRtime-peakGroups.Rd @@ -7,7 +7,6 @@ \alias{PeakGroupsParam-class} \alias{PeakGroupsParam} \alias{adjustRtimePeakGroups} -\alias{show,PeakGroupsParam-method} \alias{minFraction,PeakGroupsParam-method} \alias{minFraction<-,PeakGroupsParam-method} \alias{extraPeaks,PeakGroupsParam-method} @@ -30,17 +29,30 @@ \alias{peakGroupsMatrix} \alias{peakGroupsMatrix<-,PeakGroupsParam-method} \alias{peakGroupsMatrix<-} +\alias{subset,PeakGroupsParam-method} +\alias{subset} +\alias{subset<-,PeakGroupsParam-method} +\alias{subset<-} +\alias{subsetAdjust,PeakGroupsParam-method} +\alias{subsetAdjust} +\alias{subsetAdjust<-,PeakGroupsParam-method} +\alias{subsetAdjust<-} \alias{adjustRtime,XCMSnExp,PeakGroupsParam-method} \title{Retention time correction based on alignment of house keeping peak groups} \usage{ -PeakGroupsParam(minFraction = 0.9, extraPeaks = 1, smooth = "loess", - span = 0.2, family = "gaussian", peakGroupsMatrix = matrix(nrow = - 0, ncol = 0)) - -adjustRtimePeakGroups(object, param = PeakGroupsParam()) - -\S4method{show}{PeakGroupsParam}(object) +PeakGroupsParam( + minFraction = 0.9, + extraPeaks = 1, + smooth = "loess", + span = 0.2, + family = "gaussian", + peakGroupsMatrix = matrix(nrow = 0, ncol = 0), + subset = integer(), + subsetAdjust = c("average", "previous") +) + +adjustRtimePeakGroups(object, param = PeakGroupsParam(), msLevel = 1L) \S4method{minFraction}{PeakGroupsParam}(object) @@ -66,7 +78,15 @@ adjustRtimePeakGroups(object, param = PeakGroupsParam()) \S4method{peakGroupsMatrix}{PeakGroupsParam}(object) <- value -\S4method{adjustRtime}{XCMSnExp,PeakGroupsParam}(object, param) +\S4method{subset}{PeakGroupsParam}(x) + +\S4method{subset}{PeakGroupsParam}(object) <- value + +\S4method{subsetAdjust}{PeakGroupsParam}(object) + +\S4method{subsetAdjust}{PeakGroupsParam}(object) <- value + +\S4method{adjustRtime}{XCMSnExp,PeakGroupsParam}(object, param, msLevel = 1L) } \arguments{ \item{minFraction}{\code{numeric(1)} between 0 and 1 defining the minimum @@ -74,7 +94,11 @@ required fraction of samples in which peaks for the peak group were identified. Peak groups passing this criteria will aligned across samples and retention times of individual spectra will be adjusted based on this alignment. For \code{minFraction = 1} the peak group -has to contain peaks in all samples of the experiment.} +has to contain peaks in all samples of the experiment. Note that if +\code{subset} is provided, the specified fraction is relative to the +defined subset of samples and not to the total number of samples within +the experiment (i.e. a peak has to be present in the specified +proportion of subset samples).} \item{extraPeaks}{\code{numeric(1)} defining the maximal number of additional peaks for all samples to be assigned to a peak group (i.e. @@ -101,6 +125,16 @@ the peak groups on which the alignment should be performed. Each column represents a sample, each row a feature/peak group. Such a matrix is for example returned by the \code{\link{adjustRtimePeakGroups}} method.} +\item{subset}{\code{integer} with the indices of samples within the +experiment on which the alignment models should be estimated. Samples +not part of the subset are adjusted based on the closest subset sample. +See description above for more details.} + +\item{subsetAdjust}{\code{character} specifying the method with which +non-subset samples should be adjusted. Supported options are +\code{"previous"} and \code{"average"} (default). See description above +for more information.} + \item{object}{For \code{adjustRtime}: an \code{\link{XCMSnExp}} object containing the results from a previous chromatographic peak detection (see \code{\link{findChromPeaks}}) and alignment analysis (see @@ -111,6 +145,9 @@ for example returned by the \code{\link{adjustRtimePeakGroups}} method.} \item{param}{A \code{PeakGroupsParam} object containing all settings for the retention time correction method..} +\item{msLevel}{\code{integer(1)} specifying the MS level. Currently only MS +level 1 is supported.} + \item{value}{The value for the slot.} \item{x}{a \code{PeakGroupsParam} object.} @@ -135,13 +172,39 @@ object. } \description{ This method performs retention time adjustment based on the - alignment of chromatographic peak groups present in all/most samples - (hence corresponding to house keeping compounds). First the retention - time deviation of these peak groups is described by fitting either a - polynomial (\code{smooth = "loess"}) or a linear ( - \code{smooth = "linear"}) model to the data points. These models are - subsequently used to adjust the retention time of each spectrum in - each sample. +alignment of chromatographic peak groups present in all/most samples +(hence corresponding to house keeping compounds). First the retention +time deviation of these peak groups is described by fitting either a +polynomial (\code{smooth = "loess"}) or a linear ( +\code{smooth = "linear"}) model to the data points. These models are +subsequently used to adjust the retention time of each spectrum in +each sample. + +It is also possible to exclude certain samples within an experiment from +the estimation of the alignment models. The parameter \code{subset} +allows to define the indices of samples within \code{object} that should +be aligned. Samples not part of this \code{subset} are left out in the +estimation of the alignment models, but their retention times are +subsequently adjusted based on the alignment results of the closest sample +in \code{subset} (close in terms of position within the \code{object}). +Alignment could thus be performed on only \emph{real} samples leaving out +e.g. blanks, which are then in turn adjusted based on the closest real +sample. Here it is up to the user to ensure that the samples within +\code{object} are ordered correctly (e.g. by injection index). + +How the non-subset samples are adjusted bases also on the parameter +\code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset +sample is adjusted based on the closest previous subset sample which results +in most cases with adjusted retention times of the non-subset sample being +identical to the subset sample on which the adjustment bases. The second, +default, option is to use \code{subsetAdjust = "average"} in which case +each non subset sample is adjusted based on the average retention time +adjustment from the previous and following subset sample. For the average +a weighted mean is used with weights being the inverse of the distance of +the non-subset sample to the subset samples used for alignment. + +See also section \emph{Alignment of experiments including blanks} in the +\emph{xcms} vignette for an example. The \code{PeakGroupsParam} class allows to specify all settings for the retention time adjustment based on \emph{house keeping} @@ -170,6 +233,12 @@ selected for alignment/retention time correction. \code{peakGroupsMatrix},\code{peakGroupsMatrix<-}: getter and setter for the \code{peakGroupsMatrix} slot of the object. +\code{subset},\code{subset<-}: getter and + setter for the \code{subset} slot of the object. + +\code{subsetAdjust},\code{subsetAdjust<-}: getter and + setter for the \code{subsetAdjust} slot of the object. + \code{adjustRtime,XCMSnExp,PeakGroupsParam}: performs retention time correction based on the alignment of peak groups (features) found in all/most samples. The correction function identified @@ -180,22 +249,20 @@ adjusted. \section{Slots}{ \describe{ -\item{\code{.__classVersion__,minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix,subset,subsetAdjust}}{See corresponding parameter above.} }} \note{ These methods and classes are part of the updated and modernized - \code{xcms} user interface which will eventually replace the - \code{\link{group}} methods. All of the settings to the alignment - algorithm can be passed with a \code{PeakGroupsParam} object. +\code{xcms} user interface which will eventually replace the +\code{\link{group}} methods. All of the settings to the alignment +algorithm can be passed with a \code{PeakGroupsParam} object. - The matrix with the (raw) retention times of the peak groups used - in the alignment is added to the \code{peakGroupsMatrix} slot of the - \code{PeakGroupsParam} object that is stored into the corresponding - \emph{process history step} (see \code{\link{processHistory}} for how - to access the process history). +The matrix with the (raw) retention times of the peak groups used +in the alignment is added to the \code{peakGroupsMatrix} slot of the +\code{PeakGroupsParam} object that is stored into the corresponding +\emph{process history step} (see \code{\link{processHistory}} for how +to access the process history). \code{adjustRtimePeakGroups} is supposed to be called \emph{before} the sample alignment, but after a correspondence (peak grouping). @@ -212,30 +279,21 @@ re-adjusts adjusted retention times to ensure them being in the same order than the raw (original) retention times. } \examples{ -############################## -## Chromatographic peak detection and grouping. -## -## Below we perform first a peak detection (using the matchedFilter -## method) on some of the test files from the faahKO package followed by -## a peak grouping. -library(faahKO) -library(xcms) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) - -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") - -## Perform the peak detection using the matchedFilter method. -mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -res <- findChromPeaks(raw_data, param = mfp) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +res <- faahko_sub + +## Disable parallel processing for this example +register(SerialParam()) head(chromPeaks(res)) ## The number of peaks identified per sample: table(chromPeaks(res)[, "sample"]) ## Performing the peak grouping using the "peak density" method. -p <- PeakDensityParam(sampleGroups = c(1, 1)) +p <- PeakDensityParam(sampleGroups = c(1, 1, 1)) res <- groupChromPeaks(res, param = p) ## Perform the retention time adjustment using peak groups found in both @@ -254,7 +312,7 @@ points(x = pkGrps[, 2], y = rep(2, nrow(pkGrps))) segments(x0 = pkGrps[, 1], x1 = pkGrps[, 2], y0 = rep(1, nrow(pkGrps)), y1 = rep(2, nrow(pkGrps))) grid() -axis(side = 2, at = c(1, 2), labels = colnames(pkGrps)) +axis(side = 2, at = c(1, 2, 3), labels = colnames(pkGrps)) ## Next we perform the alignment. res <- adjustRtime(res, param = fgp) @@ -263,15 +321,13 @@ res <- adjustRtime(res, param = fgp) hasFeatures(res) ## Plot the raw against the adjusted retention times. -plot(rtime(raw_data), rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) +plot(rtime(res, adjusted = FALSE), + rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) ## Adjusterd retention times can be accessed using ## rtime(object, adjusted = TRUE) and adjustedRtime all.equal(rtime(res), adjustedRtime(res)) -## To get the raw, unadjusted retention times: -all.equal(rtime(res, adjusted = FALSE), rtime(raw_data)) - ## To extract the retention times grouped by sample/file: rts <- rtime(res, bySample = TRUE) } @@ -290,8 +346,9 @@ The \code{\link{do_adjustRtime_peakGroups}} core \code{\link{XCMSnExp}} for the object containing the results of the alignment. -Other retention time correction methods: \code{\link{adjustRtime-obiwarp}}, - \code{\link{adjustRtime}} +Other retention time correction methods: +\code{\link{adjustRtime-obiwarp}}, +\code{\link{adjustRtime}()} } \author{ Colin Smith, Johannes Rainer diff --git a/man/adjustRtime.Rd b/man/adjustRtime.Rd index ac6002324..76add615c 100644 --- a/man/adjustRtime.Rd +++ b/man/adjustRtime.Rd @@ -20,12 +20,13 @@ The \code{adjustRtime} method(s) perform retention time } } \seealso{ -\code{\link{retcor}} for the \emph{old} retention time correction +\code{\link{retcor}} for the \emph{old} retention time correction methods. \code{\link{plotAdjustedRtime}} for visualization of alignment results. -Other retention time correction methods: \code{\link{adjustRtime-obiwarp}}, - \code{\link{adjustRtime-peakGroups}} +Other retention time correction methods: +\code{\link{adjustRtime-obiwarp}}, +\code{\link{adjustRtime-peakGroups}} } \author{ Johannes Rainer diff --git a/man/applyAdjustedRtime.Rd b/man/applyAdjustedRtime.Rd index 95ae57e7f..c716b5dd3 100644 --- a/man/applyAdjustedRtime.Rd +++ b/man/applyAdjustedRtime.Rd @@ -32,17 +32,16 @@ the \code{\link[=processHistory]{processHistory()}} of the \code{object} to ensu history is preserved. } \examples{ -## Load test data -files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) -od <- readMSData(files, mode = "onDisk") +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") -## Apply obiwarp retention time adjustment. We have to convert the -## OnDiskMSnExp first to an XCMSnExp -xod <- as(od, "XCMSnExp") -xod <- adjustRtime(xod, param = ObiwarpParam()) +## Disable parallel processing for this example +register(SerialParam()) + +xod <- adjustRtime(faahko_sub, param = ObiwarpParam()) hasAdjustedRtime(xod) @@ -53,7 +52,7 @@ xod <- applyAdjustedRtime(xod) hasAdjustedRtime(xod) ## Raw retention times have been replaced with adjusted retention times -plot(split(rtime(od), fromFile(od))[[1]] - +plot(split(rtime(faahko_sub), fromFile(faahko_sub))[[1]] - split(rtime(xod), fromFile(xod))[[1]], type = "l") ## And the process history still contains the settings for the alignment @@ -61,12 +60,14 @@ processHistory(xod) } \seealso{ \code{\link[=adjustRtime]{adjustRtime()}} for the function to perform the alignment (retention -time correction).\preformatted{[adjustedRtime()] for the method to extract adjusted retention times from +time correction). + +\if{html}{\out{
}}\preformatted{[adjustedRtime()] for the method to extract adjusted retention times from an [XCMSnExp] object. [dropAdjustedRtime] for the method to delete alignment results and to restore the raw retention times. -} +}\if{html}{\out{
}} } \author{ Johannes Rainer diff --git a/man/binYonX.Rd b/man/binYonX.Rd index 7fdd227dc..6d6d6db16 100644 --- a/man/binYonX.Rd +++ b/man/binYonX.Rd @@ -4,10 +4,23 @@ \alias{binYonX} \title{Aggregate values in y for bins defined on x} \usage{ -binYonX(x, y, breaks, nBins, binSize, binFromX, binToX, fromIdx = 1L, - toIdx = length(x), method = "max", baseValue, - sortedX = !is.unsorted(x), shiftByHalfBinSize = FALSE, - returnIndex = FALSE, returnX = TRUE) +binYonX( + x, + y, + breaks, + nBins, + binSize, + binFromX, + binToX, + fromIdx = 1L, + toIdx = length(x), + method = "max", + baseValue, + sortedX = !is.unsorted(x), + shiftByHalfBinSize = FALSE, + returnIndex = FALSE, + returnX = TRUE +) } \arguments{ \item{x}{Numeric vector to be used for binning.} diff --git a/man/breaks_on_binSize.Rd b/man/breaks_on_binSize.Rd index 8f2c13ea4..27861be2c 100644 --- a/man/breaks_on_binSize.Rd +++ b/man/breaks_on_binSize.Rd @@ -48,7 +48,8 @@ seq(1, 10, by = 0.51) \seealso{ \code{\link{binYonX}} for a binning function. -Other functions to define bins: \code{\link{breaks_on_nBins}} +Other functions to define bins: +\code{\link{breaks_on_nBins}()} } \author{ Johannes Rainer diff --git a/man/breaks_on_nBins.Rd b/man/breaks_on_nBins.Rd index ff7b9ec10..240340d78 100644 --- a/man/breaks_on_nBins.Rd +++ b/man/breaks_on_nBins.Rd @@ -40,7 +40,8 @@ breaks_on_nBins(3, 20, nBins = 20, shiftByHalfBinSize = TRUE) \seealso{ \code{\link{binYonX}} for a binning function. -Other functions to define bins: \code{\link{breaks_on_binSize}} +Other functions to define bins: +\code{\link{breaks_on_binSize}()} } \author{ Johannes Rainer diff --git a/man/calibrate-calibrant-mass.Rd b/man/calibrate-calibrant-mass.Rd index 5ea91325a..ef97d13b7 100644 --- a/man/calibrate-calibrant-mass.Rd +++ b/man/calibrate-calibrant-mass.Rd @@ -10,8 +10,13 @@ \alias{calibrate,XCMSnExp-method} \title{Calibrant mass based calibration of chromatgraphic peaks} \usage{ -CalibrantMassParam(mz = list(), mzabs = 1e-04, mzppm = 5, - neighbors = 3, method = "linear") +CalibrantMassParam( + mz = list(), + mzabs = 1e-04, + mzppm = 5, + neighbors = 3, + method = "linear" +) isCalibrated(object) @@ -71,7 +76,7 @@ their mz values are replaced with the provided mz values. The mz values of all other peaks are either globally shifted (for \code{method = "shift"} or estimated by a linear model through all calibrants. Peaks are considered close to a calibrant mz if the difference between -the calibrant and its mz is \code{<= mzabs + mz * mzppm /1e6}. +the calibrant and its mz is \verb{<= mzabs + mz * mzppm /1e6}. \strong{Adjustment methods}: adjustment function/factor is estimated using the difference between calibrant and peak mz values only for peaks diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd new file mode 100644 index 000000000..fa6da8c50 --- /dev/null +++ b/man/chromPeakSpectra.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{chromPeakSpectra} +\alias{chromPeakSpectra} +\title{Extract spectra associated with chromatographic peaks} +\usage{ +chromPeakSpectra( + x, + msLevel = 2L, + expandRt = 0, + expandMz = 0, + ppm = 0, + method = c("all", "closest_rt", "closest_mz", "signal", "largest_tic", "largest_bpi"), + skipFilled = FALSE, + return.type = c("MSpectra", "Spectra", "list", "List"), + peaks = character() +) +} +\arguments{ +\item{x}{\link{XCMSnExp} object with identified chromatographic peaks.} + +\item{msLevel}{\code{integer(1)} defining whether MS1 or MS2 spectra should be +returned. \code{msLevel = 1} is currently only supported for \code{return.type} +being \code{"Spectra"} or \code{"List"}.} + +\item{expandRt}{\code{numeric(1)} to expand the retention time range of each +peak by a constant value on each side.} + +\item{expandMz}{\code{numeric(1)} to expand the m/z range of each peak by a +constant value on each side.} + +\item{ppm}{\code{numeric(1)} to expand the m/z range of each peak (on each side) +by a value dependent on the peak's m/z.} + +\item{method}{\code{character(1)} specifying which spectra to include in the +result. Defaults to \code{method = "all"}. See function description for +details.} + +\item{skipFilled}{\code{logical(1)} whether spectra for filled-in peaks should +be reported or not.} + +\item{return.type}{\code{character(1)} defining the result type. Defaults to +\code{return.type = "MSpectra"} but \code{return.type = "Spectra"} or +\code{return.type = "List"} are preferred. See below for more information.} + +\item{peaks}{\code{character}, \code{logical} or \code{integer} allowing to specify a +subset of chromatographic peaks in \code{chromPeaks} for which spectra should +be returned (providing either their ID, a logical vector same length +than \code{nrow(chromPeaks(x))} or their index in \code{chromPeaks(x)}). This +parameter overrides \code{skipFilled} and is only supported for \code{return.type} +being either \code{"Spectra"} or \code{"List"}.} +} +\value{ +parameter \code{return.type} allow to specify the type of the returned object: +\itemize{ +\item \code{return.type = "MSpectra"}: a \link{MSpectra} object with elements being +\linkS4class{Spectrum} objects. The result objects contains all spectra +for all peaks. Metadata column \code{"peak_id"} provides the ID of the +respective peak (i.e. its rowname in \code{\link[=chromPeaks]{chromPeaks()}}). +\item \code{return.type = "Spectra"}: a \code{Spectra} object (defined in the \code{Spectra} +package). The result contains all spectra for all peaks. Metadata column +\code{"peak_id"} provides the ID of the respective peak (i.e. its rowname in +\code{\link[=chromPeaks]{chromPeaks()}} and \code{"peak_index"} its index in the object's \code{chromPeaks} +matrix. +\item \code{return.type = "list"}: \code{list} of \code{list}s that are either of length +0 or contain \linkS4class{Spectrum2} object(s) within the m/z-rt range. The +length of the list matches the number of peaks. +\item \code{return.type = "List"}: \code{List} of length equal to the number of +chromatographic peaks is returned with elements being either \code{NULL} (no +spectrum found) or a \code{Spectra} object. +} +} +\description{ +Extract (MS1 or MS2) spectra from an \link{XCMSnExp} object for each identified +chromatographic peak. The function returns by default spectra for +chromatographic peaks of \strong{all} MS levels, but parameter \code{peaks} allows to +restrict the result to selected chromatographic peaks. +For \code{msLevel = 1L} (only supported for \code{return.type = "Spectra"} or +\code{return.type = "List"}) MS1 spectra within the retention time boundaries +(in the file in which the peak was detected) are returned. For +\code{msLevel = 2L} MS2 spectra are returned for a chromatographic +peak if their precursor m/z is within the retention time and m/z range of +the chromatographic peak. Parameter \code{method} allows to define whether all +or a single spectrum should be returned: +\itemize{ +\item \code{method = "all"}: (default): return all spectra for each peak. +\item \code{method = "closest_rt"}: return the spectrum with the retention time +closest to the peak's retention time (at apex). +\item \code{method = "closest_mz"}: return the spectrum with the precursor m/z +closest to the peaks's m/z (at apex); only supported for \code{msLevel = 2L}. +\item \code{method = "signal"}: return the spectrum with the sum of intensities most +similar to the peak's apex signal (\code{"maxo"}); only supported for +\code{msLevel = 2L}. +\item \code{method = "largest_tic"}: return the spectrum with the largest total +signal (sum of peaks intensities). +\item \code{method = "largest_bpi"}: return the spectrum with the largest peak +intensity (maximal peak intensity). +} + +Parameter \code{return.type} allows to specify the \emph{type} of the result object. +Please use \code{return.type = "Spectra"} or \code{return.type = "List"}, +\code{return.type = "list"} or the default \code{return.type = "MSpectra"} will be +deprecated (also, they do not support extracting MS1 spectra). + +See also the \emph{LC-MS/MS data analysis} vignette for more details and examples. +} +\examples{ + +## Read a file with DDA LC-MS/MS data +fl <- system.file("TripleTOF-SWATH/PestMix1_DDA.mzML", package = "msdata") +dda <- readMSData(fl, mode = "onDisk") + +## Subset the object to reduce runtime of the example +dda <- filterRt(dda, c(200, 400)) + +## Perform MS1 peak detection +dda <- findChromPeaks(dda, CentWaveParam(peakwidth = c(5, 15), prefilter = c(5, 1000))) + +## Load the required Spectra package and return all MS2 spectro for each +## chromatographic peaks as a Spectra object +ms2_sps <- chromPeakSpectra(dda, return.type = "Spectra") +ms2_sps + +## columns peak_id or peak_index assign spectra to the chromatographic peaks +ms2_sps$peak_id +ms2_sps$peak_index +chromPeaks(dda) + +## Alternatively, return the result as a List of Spectra objects. This list +## is parallel to chromPeaks hence the mapping between chromatographic peaks +## and MS2 spectra is easier. +ms2_sps <- chromPeakSpectra(dda, return.type = "List") +ms2_sps[[1L]] +length(ms2_sps) + +## In addition to MS2 spectra we could also return the MS1 spectrum for each +## chromatographic peak which is closest to the peak's apex position. +ms1_sps <- chromPeakSpectra(dda, msLevel = 1L, method = "closest_rt", + return.type = "Spectra") +ms1_sps + +## Parameter peaks would allow to extract spectra for specific peaks only +chromPeakSpectra(dda, msLevel = 1L, method = "closest_rt", peaks = c(3, 5)) +} +\author{ +Johannes Rainer +} diff --git a/man/chromatogram-method.Rd b/man/chromatogram-method.Rd index beab84634..65a034c70 100644 --- a/man/chromatogram-method.Rd +++ b/man/chromatogram-method.Rd @@ -1,134 +1,136 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R -\docType{methods} \name{chromatogram,XCMSnExp-method} \alias{chromatogram,XCMSnExp-method} \alias{chromatogram} \title{Extracting chromatograms} \usage{ -\S4method{chromatogram}{XCMSnExp}(object, rt, mz, aggregationFun = "sum", - missing = NA_real_, msLevel = 1L, BPPARAM = bpparam(), - adjustedRtime = hasAdjustedRtime(object)) +\S4method{chromatogram}{XCMSnExp}( + object, + rt, + mz, + aggregationFun = "sum", + missing = NA_real_, + msLevel = 1L, + BPPARAM = bpparam(), + adjustedRtime = hasAdjustedRtime(object), + filled = FALSE, + include = c("apex_within", "any", "none") +) } \arguments{ -\item{object}{Either a \code{\link{OnDiskMSnExp}} or -\code{\link{XCMSnExp}} object from which the chromatograms should be -extracted.} +\item{object}{Either a \link{OnDiskMSnExp} or \link{XCMSnExp} object from which the +chromatograms should be extracted.} \item{rt}{\code{numeric(2)} or two-column \code{matrix} defining the lower and upper boundary for the retention time range(s). If not specified, -the full retention time range of the original data will be used. -It is also possible to submit a \code{numeric(1)} in which case -\code{range} is called on it to transform it to a \code{numeric(2)}.} +the full retention time range of the original data will be used.} \item{mz}{\code{numeric(2)} or two-column \code{matrix} defining the lower and upper mz value for the MS data slice(s). If not specified, the -chromatograms will be calculated on the full mz range. -It is also possible to submit a \code{numeric(1)} in which case -\code{range} is called on it to transform it to a \code{numeric(2)}.} +chromatograms will be calculated on the full mz range.} -\item{aggregationFun}{\code{character} specifying the function to be used to +\item{aggregationFun}{\code{character(1)} specifying the function to be used to aggregate intensity values across the mz value range for the same -retention time. Allowed values are \code{"sum"}, \code{"max"}, +retention time. Allowed values are \code{"sum"} (the default), \code{"max"}, \code{"mean"} and \code{"min"}.} \item{missing}{\code{numeric(1)} allowing to specify the intensity value to be used if for a given retention time no signal was measured within the mz range of the corresponding scan. Defaults to \code{NA_real_} (see also Details and Notes sections below). Use \code{missing = 0} to resemble the -behaviour of the \code{getEIC} from the \code{old} user interface.} +behaviour of the \code{getEIC} from the \emph{old} user interface.} -\item{msLevel}{\code{integer} specifying the MS level from which the +\item{msLevel}{\code{integer(1)} specifying the MS level from which the chromatogram should be extracted. Defaults to \code{msLevel = 1L}.} \item{BPPARAM}{Parallelisation backend to be used, which will depend on the architecture. Default is \code{BiocParallel::bparam()}.} -\item{adjustedRtime}{For \code{chromatogram,XCMSnExp}: whether the +\item{adjustedRtime}{For \verb{chromatogram,XCMSnExp}: whether the adjusted (\code{adjustedRtime = TRUE}) or raw retention times (\code{adjustedRtime = FALSE}) should be used for filtering and returned -in the resulting \code{\link{Chromatogram}} object. Adjusted +in the resulting \link{MChromatograms} object. Adjusted retention times are used by default if available.} + +\item{filled}{\code{logical(1)} whether filled-in peaks should also be +returned. Defaults to \code{filled = FALSE}, i.e. returns only detected +chromatographic peaks in the result object.} + +\item{include}{\code{character(1)} defining which chromatographic peaks should be +returned. Supported are \code{include = "apex_within"} (the default) which +returns chromatographic peaks that have their apex within the \code{mz} \code{rt} +range, \code{include = "any"} to return all chromatographic peaks which +m/z and rt ranges overlap the \code{mz} and \code{rt} or \code{include = "none"} to +not include any chromatographic peaks.} } \value{ -\code{chromatogram} returns a \code{\link{Chromatograms}} object with +\code{chromatogram} returns a \link{XChromatograms} object with the number of columns corresponding to the number of files in \code{object} and number of rows the number of specified ranges (i.e. number of rows of matrices provided with arguments \code{mz} and/or -\code{rt}). +\code{rt}). All chromatographic peaks with their apex position within the +m/z and retention time range are also retained as well as all feature +definitions for these peaks. } \description{ -\code{chromatogram}: the method allows to extract -chromatograms from \code{\link{OnDiskMSnExp}} and -\code{\link{XCMSnExp}} objects. See also the -\code{\link{chromatogram}} implementation for -\code{\link{OnDiskMSnExp}} in the \code{MSnbase} package. +\code{chromatogram}: extract chromatographic data (such as an extracted ion +chromatogram, a base peak chromatogram or total ion chromatogram) from +an \link{OnDiskMSnExp} or \link{XCMSnExp} objects. See also the help page of the +\code{chromatogram} function in the \code{MSnbase} package. } \details{ -Arguments \code{rt} and \code{mz} allow to specify the MS -data slice from which the chromatogram should be extracted. +Arguments \code{rt} and \code{mz} allow to specify the MS data slice (i.e. the m/z +range and retention time window) from which the chromatogram should be +extracted. These parameters can be either a \code{numeric} of length 2 with the +lower and upper limit, or a \code{matrix} with two columns with the lower and +upper limits to extract multiple EICs at once. The parameter \code{aggregationSum} allows to specify the function to be -used to aggregate the intensities across the mz range for the same +used to aggregate the intensities across the m/z range for the same retention time. Setting \code{aggregationFun = "sum"} would e.g. allow -to calculate the \emph{total ion chromatogram} (TIC), -\code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). -The length of the extracted \code{\link{Chromatogram}} object, -i.e. the number of available data points, corresponds to the number of -scans/spectra measured in the specified retention time range. If in a -specific scan (for a give retention time) no signal was measured in the -specified mz range, a \code{NA_real_} is reported as intensity for the -retention time (see Notes for more information). This can be changed -using the \code{missing} parameter. +to calculate the \strong{total ion chromatogram} (TIC), +\code{aggregationFun = "max"} the \strong{base peak chromatogram} (BPC). + +If for a given retention time no intensity is measured in that spectrum a +\code{NA} intensity value is returned by default. This can be changed with the +parameter \code{missing}, setting \code{missing = 0} would result in a \code{0} intensity +being returned in these cases. } \note{ -\code{\link{Chromatogram}} objects extracted with -\code{chromatogram} -contain \code{NA_real_} values if, for a given retention time, no -signal was measured in the specified mz range. If no spectrum/scan is -present in the defined retention time window a \code{Chromatogram} object -of length 0 is returned. - -For \code{\link{XCMSnExp}} objects, if adjusted retention times are +For \link{XCMSnExp} objects, if adjusted retention times are available, the \code{chromatogram} method will by default report and use these (for the subsetting based on the provided parameter -\code{rt}). This can be overwritten with the parameter -\code{adjustedRtime}. +\code{rt}). This can be changed by setting \code{adjustedRtime = FALSE}. } \examples{ -## Read some files from the faahKO package. -library(xcms) -library(faahKO) -faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) -od <- readMSData(faahko_3_files, mode = "onDisk") +## Load a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) ## Extract the ion chromatogram for one chromatographic peak in the data. -chrs <- chromatogram(od, rt = c(2700, 2900), mz = 335) +chrs <- chromatogram(faahko_sub, rt = c(2700, 2900), mz = 335) chrs -## Plot the chromatogram -plot(rtime(chrs[1, 2]), intensity(chrs[1, 2]), type = "l", xlab = "rtime", - ylab = "intensity", col = "000080") -for(i in c(1, 3)) { - points(rtime(chrs[1, i]), intensity(chrs[1, i]), type = "l", - col = "00000080") -} +## Identified chromatographic peaks +chromPeaks(chrs) -## Plot the chromatogram using the dedicated plot method. +## Plot the chromatogram plot(chrs) ## Extract chromatograms for multiple ranges. mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) -chrs <- chromatogram(od, mz = mzr, rt = rtr) +chrs <- chromatogram(faahko_sub, mz = mzr, rt = rtr) -chrs +chromPeaks(chrs) -## Plot the extracted chromatograms plot(chrs) ## Get access to all chromatograms for the second mz/rt range @@ -138,18 +140,17 @@ chrs[1, ] plot(chrs[1, , drop = FALSE]) } \seealso{ -\code{\link{XCMSnExp}} for the data object. - \code{\link{Chromatogram}} for the object representing - chromatographic data. +\link{XCMSnExp} for the data object. +\link{Chromatogram} for the object representing chromatographic data. - \code{\link{Chromatograms}} for the object allowing to arrange - multiple \code{Chromatogram} objects. +\if{html}{\out{
}}\preformatted{[XChromatograms] for the object allowing to arrange +multiple [XChromatogram] objects. - \code{\link{plot}} to plot a \code{Chromatogram} or - \code{Chromatograms} objects. +[plot] to plot a [XChromatogram] or [MChromatograms] objects. - \code{\link{as}} (\code{as(x, "data.frame")}) in \code{MSnbase} - for a method to extract the MS data as \code{data.frame}. +`as` (`as(x, "data.frame")`) in `MSnbase` for a method to extract +the MS data as `data.frame`. +}\if{html}{\out{
}} } \author{ Johannes Rainer diff --git a/man/chromatographic-peak-detection.Rd b/man/chromatographic-peak-detection.Rd index 07162efe6..71da06c9a 100644 --- a/man/chromatographic-peak-detection.Rd +++ b/man/chromatographic-peak-detection.Rd @@ -21,7 +21,7 @@ The \code{findChromPeaks} methods perform the chromatographic \item{matchedFilter}{peak detection in chromatographic space. See \code{\link{matchedFilter}} for more details.} - \item{massifquant}{peak detection using the Kalman filter-based + \item{massifquant}{peak detection using the Kalman filter-based method. See \code{\link{massifquant}} for more details.} \item{MSW}{single-spectrum non-chromatography MS data peak detection. @@ -39,11 +39,18 @@ The \code{findChromPeaks} methods perform the chromatographic \code{\link{highlightChromPeaks}} to highlight identified chromatographic peaks in an extracted ion chromatogram plot. -Other peak detection methods: \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, - \code{\link{findChromPeaks-centWave}}, - \code{\link{findChromPeaks-massifquant}}, - \code{\link{findChromPeaks-matchedFilter}}, - \code{\link{findPeaks-MSW}} + \code{\link{refineChromPeaks}} for methods to refine or clean identified + chromatographic peaks. + + \code{\link{manualChromPeaks}} to manually add/define chromatographic + peaks. + +Other peak detection methods: +\code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, +\code{\link{findChromPeaks-centWave}}, +\code{\link{findChromPeaks-massifquant}}, +\code{\link{findChromPeaks-matchedFilter}}, +\code{\link{findPeaks-MSW}} } \author{ Johannes Rainer diff --git a/man/correlate-Chromatogram.Rd b/man/correlate-Chromatogram.Rd new file mode 100644 index 000000000..0bd641072 --- /dev/null +++ b/man/correlate-Chromatogram.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-Chromatogram.R, +% R/methods-MChromatograms.R +\name{correlate,Chromatogram,Chromatogram-method} +\alias{correlate,Chromatogram,Chromatogram-method} +\alias{correlate} +\alias{correlate,MChromatograms,missing-method} +\alias{correlate,MChromatograms,MChromatograms-method} +\title{Correlate chromatograms} +\usage{ +\S4method{correlate}{Chromatogram,Chromatogram}( + x, + y, + use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), + ... +) + +\S4method{correlate}{MChromatograms,missing}( + x, + y = NULL, + use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), + ... +) + +\S4method{correlate}{MChromatograms,MChromatograms}( + x, + y = NULL, + use = "pairwise.complete.obs", + method = c("pearson", "kendall", "spearman"), + align = c("closest", "approx"), + ... +) +} +\arguments{ +\item{x}{\code{\link[=Chromatogram]{Chromatogram()}} or \code{\link[=MChromatograms]{MChromatograms()}} object.} + +\item{y}{\code{\link[=Chromatogram]{Chromatogram()}} or \code{\link[=MChromatograms]{MChromatograms()}} object.} + +\item{use}{\code{character(1)} passed to the \code{cor} function. See \code{\link[=cor]{cor()}} for +details.} + +\item{method}{\code{character(1)} passed to the \code{cor} function. See \code{\link[=cor]{cor()}} for +details.} + +\item{align}{\code{character(1)} defining the alignment method to be used. See +help on \code{alignRt} in \code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} for details. The value of +this parameter is passed to the \code{method} parameter of \code{alignRt}.} + +\item{...}{optional parameters passed along to the \code{alignRt} method such as +\code{tolerance} that, if set to \code{0} requires the retention times to be +identical.} +} +\value{ +\code{numeric(1)} or \code{matrix} (if called on \code{MChromatograms} objects) +with the correlation coefficient. If a \code{matrix} is returned, the rows +represent the chromatograms in \code{x} and the columns the chromatograms in +\code{y}. +} +\description{ +\strong{For \code{xcms} >= 3.15.3 please use \code{\link[=compareChromatograms]{compareChromatograms()}} instead of +\code{correlate}} + +Correlate intensities of two chromatograms with each other. If the two +\code{Chromatogram} objects have different retention times they are first +\emph{aligned} to match data points in the first to data points in the second +chromatogram. See help on \code{alignRt} in \code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} for more +details. + +If \code{correlate} is called on a single \code{\link[=MChromatograms]{MChromatograms()}} object a pairwise +correlation of each chromatogram with each other is performed and a \code{matrix} +with the correlation coefficients is returned. + +Note that the correlation of two chromatograms depends also on their order, +e.g. \code{correlate(chr1, chr2)} might not be identical to +\code{correlate(chr2, chr1)}. The lower and upper triangular part of the +correlation matrix might thus be different. +} +\examples{ + +chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) +chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + +chrs <- MChromatograms(list(chr1, chr2, chr3)) + +## Using `compareChromatograms` instead of `correlate`. +compareChromatograms(chr1, chr2) +compareChromatograms(chr2, chr1) + +compareChromatograms(chrs, chrs) +} +\author{ +Michael Witting, Johannes Rainer +} diff --git a/man/diffreport-methods.Rd b/man/diffreport-methods.Rd index 236715d49..4e2850599 100644 --- a/man/diffreport-methods.Rd +++ b/man/diffreport-methods.Rd @@ -73,12 +73,13 @@ \item{missing}{ \code{numeric(1)} defining an optional value for missing values. \code{missing = 0} would e.g. replace all \code{NA} values - in the feature matrix with \code{0}. Note that also a call to + in the feature matrix with \code{0}. Note that also a call to \code{\link{fillPeaks}} results in a feature matrix in which - \code{NA} values are replaced by \code{0}. + \code{NA} values are replaced by \code{0}. } \item{...}{ - optional arguments to be passed to \code{\link{mt.teststat}} + optional arguments to be passed to \code{mt.teststat} from the + \code{multtest} package. } } \details{ @@ -149,7 +150,6 @@ } \seealso{ \code{\link{xcmsSet-class}}, - \code{\link{mt.teststat}}, \code{\link{palette}} } \keyword{methods} diff --git a/man/dirname.Rd b/man/dirname.Rd new file mode 100644 index 000000000..13acadeda --- /dev/null +++ b/man/dirname.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-OnDiskMSnExp.R +\name{dirname} +\alias{dirname} +\alias{dirname,OnDiskMSnExp-method} +\alias{dirname<-,OnDiskMSnExp-method} +\title{Change the file path of an \code{OnDiskMSnExp} object} +\usage{ +\S4method{dirname}{OnDiskMSnExp}(path) + +\S4method{dirname}{OnDiskMSnExp}(path) <- value +} +\arguments{ +\item{path}{\linkS4class{OnDiskMSnExp}.} + +\item{value}{\code{character} of length 1 or length equal to the number of files +defining the new path to the files.} +} +\description{ +\code{dirname} allows to get and set the path to the directory containing the +source files of the \linkS4class{OnDiskMSnExp} (or \linkS4class{XCMSnExp}) object. +} +\author{ +Johannes Rainer +} diff --git a/man/do_adjustRtime_peakGroups.Rd b/man/do_adjustRtime_peakGroups.Rd index 87c7eb591..6c124461d 100644 --- a/man/do_adjustRtime_peakGroups.Rd +++ b/man/do_adjustRtime_peakGroups.Rd @@ -5,10 +5,19 @@ \title{Align spectrum retention times across samples using peak groups found in most samples} \usage{ -do_adjustRtime_peakGroups(peaks, peakIndex, rtime, minFraction = 0.9, - extraPeaks = 1, smooth = c("loess", "linear"), span = 0.2, - family = c("gaussian", "symmetric"), peakGroupsMatrix = matrix(ncol = - 0, nrow = 0)) +do_adjustRtime_peakGroups( + peaks, + peakIndex, + rtime, + minFraction = 0.9, + extraPeaks = 1, + smooth = c("loess", "linear"), + span = 0.2, + family = c("gaussian", "symmetric"), + peakGroupsMatrix = matrix(ncol = 0, nrow = 0), + subset = integer(), + subsetAdjust = c("average", "previous") +) } \arguments{ \item{peaks}{a \code{matrix} or \code{data.frame} with the identified @@ -25,7 +34,11 @@ required fraction of samples in which peaks for the peak group were identified. Peak groups passing this criteria will aligned across samples and retention times of individual spectra will be adjusted based on this alignment. For \code{minFraction = 1} the peak group -has to contain peaks in all samples of the experiment.} +has to contain peaks in all samples of the experiment. Note that if +\code{subset} is provided, the specified fraction is relative to the +defined subset of samples and not to the total number of samples within +the experiment (i.e. a peak has to be present in the specified +proportion of subset samples).} \item{extraPeaks}{\code{numeric(1)} defining the maximal number of additional peaks for all samples to be assigned to a peak group (i.e. @@ -53,6 +66,16 @@ represents a sample, each row a feature/peak group. If not provided, this matrix will be determined depending on parameters \code{minFraction} and \code{extraPeaks}. If provided, \code{minFraction} and \code{extraPeaks} will be ignored.} + +\item{subset}{\code{integer} with the indices of samples within the +experiment on which the alignment models should be estimated. Samples +not part of the subset are adjusted based on the closest subset sample. +See description above for more details.} + +\item{subsetAdjust}{\code{character} specifying the method with which +non-subset samples should be adjusted. Supported options are +\code{"previous"} and \code{"average"} (default). See description above +for more information.} } \value{ A \code{list} with \code{numeric} vectors with the adjusted @@ -60,22 +83,29 @@ A \code{list} with \code{numeric} vectors with the adjusted } \description{ The function performs retention time correction by assessing - the retention time deviation across all samples using peak groups - (features) containg chromatographic peaks present in most/all samples. - The retention time deviation for these features in each sample is - described by fitting either a polynomial (\code{smooth = "loess"}) or - a linear (\code{smooth = "linear"}) model to the data points. The - models are subsequently used to adjust the retention time for each - spectrum in each sample. +the retention time deviation across all samples using peak groups +(features) containg chromatographic peaks present in most/all samples. +The retention time deviation for these features in each sample is +described by fitting either a polynomial (\code{smooth = "loess"}) or +a linear (\code{smooth = "linear"}) model to the data points. The +models are subsequently used to adjust the retention time for each +spectrum in each sample. } \details{ The alignment bases on the presence of compounds that can be found - in all/most samples of an experiment. The retention times of individual - spectra are then adjusted based on the alignment of the features - corresponding to these \emph{house keeping compounds}. The paraneters - \code{minFraction} and \code{extraPeaks} can be used to fine tune which - features should be used for the alignment (i.e. which features - most likely correspond to the above mentioned house keeping compounds). +in all/most samples of an experiment. The retention times of individual +spectra are then adjusted based on the alignment of the features +corresponding to these \emph{house keeping compounds}. The paraneters +\code{minFraction} and \code{extraPeaks} can be used to fine tune which +features should be used for the alignment (i.e. which features +most likely correspond to the above mentioned house keeping compounds). + +Parameter \code{subset} allows to define a subset of samples within the +experiment that should be aligned. All samples not being part of the subset +will be aligned based on the adjustment of the closest sample within the +subset. This allows to e.g. exclude blank samples from the alignment process +with their retention times being still adjusted based on the alignment +results of the \emph{real} samples. } \note{ The method ensures that returned adjusted retention times are diff --git a/man/do_findChromPeaks_centWave.Rd b/man/do_findChromPeaks_centWave.Rd index 6ec43197d..663b557d1 100644 --- a/man/do_findChromPeaks_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -4,12 +4,27 @@ \alias{do_findChromPeaks_centWave} \title{Core API function for centWave peak detection} \usage{ -do_findChromPeaks_centWave(mz, int, scantime, valsPerSpect, ppm = 25, - peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), - mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, - roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL, - sleep = 0) +do_findChromPeaks_centWave( + mz, + int, + scantime, + valsPerSpect, + ppm = 25, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + roiList = list(), + firstBaselineCheck = TRUE, + roiScales = NULL, + sleep = 0, + extendLengthMSW = FALSE +) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -82,7 +97,10 @@ be represented by a \code{list} of elements or a single row \code{data.frame}.} \item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} +data within regions of interest is checked to be above the first baseline. +In detail, a first rough estimate of the noise is calculated and peak +detection is performed only in regions in which multiple sequential +signals are higher than this first estimated baseline/noise level.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} defining the scale for each region of interest in \code{roiList} that @@ -93,6 +111,12 @@ iterations. Defaults to \code{sleep = 0}. If \code{> 0} a plot is generated visualizing the identified chromatographic peak. Note: this argument is for backward compatibility only and will be removed in future.} + +\item{extendLengthMSW}{Option to force centWave to use all scales when +running centWave rather than truncating with the EIC length. Uses the "open" +method to extend the EIC to a integer base-2 length prior to being passed to +\code{convolve} rather than the default "reflect" method. See +https://github.com/sneumann/xcms/issues/445 for more information.} } \value{ A matrix, each row representing an identified chromatographic peak, @@ -159,21 +183,26 @@ The \emph{centWave} was designed to work on centroided mode, thus it } \examples{ ## Load the test file -library(faahKO) -fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -xr <- xcmsRaw(fs, profstep = 0) +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Subset to one file and restrict to a certain retention time range +data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) + +## Get m/z and intensity values +mzs <- mz(data) +ints <- intensity(data) -## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -mzVals <- xr@env$mz -intVals <- xr@env$intensity ## Define the values per spectrum: -valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +valsPerSpect <- lengths(mzs) -## Calling the function. We're using a large value for noise to speed up -## the call in the example performance - in a real use case we would either +## Calling the function. We're using a large value for noise and prefilter +## to speed up the call in the example - in a real use case we would either ## set the value to a reasonable value or use the default value. -res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, -scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) +res <- do_findChromPeaks_centWave(mz = unlist(mzs), int = unlist(ints), + scantime = rtime(data), valsPerSpect = valsPerSpect, noise = 10000, + prefilter = c(3, 10000)) head(res) } \references{ @@ -184,10 +213,11 @@ Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly \seealso{ \code{\link{centWave}} for the standard user interface method. -Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, - \code{\link{do_findChromPeaks_massifquant}}, - \code{\link{do_findChromPeaks_matchedFilter}}, - \code{\link{do_findPeaks_MSW}} +Other core peak detection functions: +\code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}()}, +\code{\link{do_findChromPeaks_massifquant}()}, +\code{\link{do_findChromPeaks_matchedFilter}()}, +\code{\link{do_findPeaks_MSW}()} } \author{ Ralf Tautenhahn, Johannes Rainer diff --git a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd index 89e533b51..0591e8fad 100644 --- a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd +++ b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd @@ -5,20 +5,53 @@ \alias{do_findChromPeaks_addPredIsoROIs} \title{Core API function for two-step centWave peak detection with isotopes} \usage{ -do_findChromPeaks_centWaveWithPredIsoROIs(mz, int, scantime, valsPerSpect, - ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, - 100), mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, - roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL, - snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, - mzIntervalExtension = TRUE, polarity = "unknown") - -do_findChromPeaks_addPredIsoROIs(mz, int, scantime, valsPerSpect, - ppm = 25, peakwidth = c(20, 50), snthresh = 6.25, - prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1, - mzdiff = -0.001, fitgauss = FALSE, noise = 0, - verboseColumns = FALSE, peaks. = NULL, maxCharge = 3, maxIso = 5, - mzIntervalExtension = TRUE, polarity = "unknown") +do_findChromPeaks_centWaveWithPredIsoROIs( + mz, + int, + scantime, + valsPerSpect, + ppm = 25, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + roiList = list(), + firstBaselineCheck = TRUE, + roiScales = NULL, + snthreshIsoROIs = 6.25, + maxCharge = 3, + maxIso = 5, + mzIntervalExtension = TRUE, + polarity = "unknown", + extendLengthMSW = FALSE +) + +do_findChromPeaks_addPredIsoROIs( + mz, + int, + scantime, + valsPerSpect, + ppm = 25, + peakwidth = c(20, 50), + snthresh = 6.25, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + peaks. = NULL, + maxCharge = 3, + maxIso = 5, + mzIntervalExtension = TRUE, + polarity = "unknown" +) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -95,7 +128,10 @@ be represented by a \code{list} of elements or a single row \code{data.frame}.} \item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} +data within regions of interest is checked to be above the first baseline. +In detail, a first rough estimate of the noise is calculated and peak +detection is performed only in regions in which multiple sequential +signals are higher than this first estimated baseline/noise level.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} defining the scale for each region of interest in \code{roiList} that @@ -119,6 +155,12 @@ intensity peaks.} Currently not used, but has to be \code{"positive"}, \code{"negative"} or \code{"unknown"} if provided.} +\item{extendLengthMSW}{Option to force centWave to use all scales when +running centWave rather than truncating with the EIC length. Uses the "open" +method to extend the EIC to a integer base-2 length prior to being passed to +\code{convolve} rather than the default "reflect" method. See +https://github.com/sneumann/xcms/issues/445 for more information.} + \item{peaks.}{A matrix or \code{xcmsPeaks} object such as one returned by a call to \code{link{do_findChromPeaks_centWave}} or \code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) @@ -179,10 +221,11 @@ For more details on the centWave algorithm see \code{\link{centWave}}. } \seealso{ -Other core peak detection functions: \code{\link{do_findChromPeaks_centWave}}, - \code{\link{do_findChromPeaks_massifquant}}, - \code{\link{do_findChromPeaks_matchedFilter}}, - \code{\link{do_findPeaks_MSW}} +Other core peak detection functions: +\code{\link{do_findChromPeaks_centWave}()}, +\code{\link{do_findChromPeaks_massifquant}()}, +\code{\link{do_findChromPeaks_matchedFilter}()}, +\code{\link{do_findPeaks_MSW}()} } \author{ Hendrik Treutler, Johannes Rainer diff --git a/man/do_findChromPeaks_massifquant.Rd b/man/do_findChromPeaks_massifquant.Rd index 7dc14dc02..0f0b4b1d2 100644 --- a/man/do_findChromPeaks_massifquant.Rd +++ b/man/do_findChromPeaks_massifquant.Rd @@ -4,12 +4,27 @@ \alias{do_findChromPeaks_massifquant} \title{Core API function for massifquant peak detection} \usage{ -do_findChromPeaks_massifquant(mz, int, scantime, valsPerSpect, ppm = 10, - peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), - mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, - criticalValue = 1.125, consecMissedLimit = 2, unions = 1, - checkBack = 0, withWave = FALSE) +do_findChromPeaks_massifquant( + mz, + int, + scantime, + valsPerSpect, + ppm = 10, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + criticalValue = 1.125, + consecMissedLimit = 2, + unions = 1, + checkBack = 0, + withWave = FALSE +) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -157,22 +172,27 @@ This algorithm's performance has been tested rigorously better accuracy. } \examples{ -library(faahKO) -library(xcms) -cdfpath <- system.file("cdf", package = "faahKO") -cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - -## Read the first file -xraw <- xcmsRaw(cdffiles[1]) -## Extract the required data -mzVals <- xraw@env$mz -intVals <- xraw@env$intensity + +## Load the test file +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Subset to one file and restrict to a certain retention time range +data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) + +## Get m/z and intensity values +mzs <- mz(data) +ints <- intensity(data) + ## Define the values per spectrum: -valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) +valsPerSpect <- lengths(mzs) -## Perform the peak detection using massifquant -res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, -scantime = xraw@scantime, valsPerSpect = valsPerSpect) +## Perform the peak detection using massifquant - setting prefilter to +## a high value to speed up the call for the example +res <- do_findChromPeaks_massifquant(mz = unlist(mzs), int = unlist(ints), + scantime = rtime(data), valsPerSpect = valsPerSpect, + prefilter = c(3, 10000)) head(res) } \references{ @@ -183,10 +203,11 @@ detection" \emph{Bioinformatics} 2014, 30(18):2636-43. \seealso{ \code{\link{massifquant}} for the standard user interface method. -Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, - \code{\link{do_findChromPeaks_centWave}}, - \code{\link{do_findChromPeaks_matchedFilter}}, - \code{\link{do_findPeaks_MSW}} +Other core peak detection functions: +\code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}()}, +\code{\link{do_findChromPeaks_centWave}()}, +\code{\link{do_findChromPeaks_matchedFilter}()}, +\code{\link{do_findPeaks_MSW}()} } \author{ Christopher Conley diff --git a/man/do_findChromPeaks_matchedFilter.Rd b/man/do_findChromPeaks_matchedFilter.Rd index 8ebc402ac..9932925b6 100644 --- a/man/do_findChromPeaks_matchedFilter.Rd +++ b/man/do_findChromPeaks_matchedFilter.Rd @@ -4,10 +4,24 @@ \alias{do_findChromPeaks_matchedFilter} \title{Core API function for matchedFilter peak detection} \usage{ -do_findChromPeaks_matchedFilter(mz, int, scantime, valsPerSpect, - binSize = 0.1, impute = "none", baseValue, distance, fwhm = 30, - sigma = fwhm/2.3548, max = 5, snthresh = 10, steps = 2, - mzdiff = 0.8 - binSize * steps, index = FALSE, sleep = 0) +do_findChromPeaks_matchedFilter( + mz, + int, + scantime, + valsPerSpect, + binSize = 0.1, + impute = "none", + baseValue, + distance, + fwhm = 30, + sigma = fwhm/2.3548, + max = 5, + snthresh = 10, + steps = 2, + mzdiff = 0.8 - binSize * steps, + index = FALSE, + sleep = 0 +) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -120,19 +134,24 @@ This function exposes core peak detection functionality of method). } \examples{ + ## Load the test file -library(faahKO) -fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -xr <- xcmsRaw(fs) +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Subset to one file and restrict to a certain retention time range +data <- filterRt(filterFile(faahko_sub, 1), c(2500, 3000)) + +## Get m/z and intensity values +mzs <- mz(data) +ints <- intensity(data) -## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -mzVals <- xr@env$mz -intVals <- xr@env$intensity ## Define the values per spectrum: -valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +valsPerSpect <- lengths(mzs) -res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, -scantime = xr@scantime, valsPerSpect = valsPerSpect) +res <- do_findChromPeaks_matchedFilter(mz = unlist(mzs), int = unlist(ints), + scantime = rtime(data), valsPerSpect = valsPerSpect) head(res) } \references{ @@ -146,10 +165,11 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" \code{\link{imputeLinInterpol}} for the interpolation of missing values. \code{\link{matchedFilter}} for the standard user interface method. -Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, - \code{\link{do_findChromPeaks_centWave}}, - \code{\link{do_findChromPeaks_massifquant}}, - \code{\link{do_findPeaks_MSW}} +Other core peak detection functions: +\code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}()}, +\code{\link{do_findChromPeaks_centWave}()}, +\code{\link{do_findChromPeaks_massifquant}()}, +\code{\link{do_findPeaks_MSW}()} } \author{ Colin A Smith, Johannes Rainer diff --git a/man/do_findPeaks_MSW.Rd b/man/do_findPeaks_MSW.Rd index ac1302fea..e918b25ba 100644 --- a/man/do_findPeaks_MSW.Rd +++ b/man/do_findPeaks_MSW.Rd @@ -54,10 +54,11 @@ This is a wrapper around the peak picker in Bioconductor's method. \code{\link{peakDetectionCWT}} from the \code{MassSpecWavelet} package. -Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, - \code{\link{do_findChromPeaks_centWave}}, - \code{\link{do_findChromPeaks_massifquant}}, - \code{\link{do_findChromPeaks_matchedFilter}} +Other core peak detection functions: +\code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}()}, +\code{\link{do_findChromPeaks_centWave}()}, +\code{\link{do_findChromPeaks_massifquant}()}, +\code{\link{do_findChromPeaks_matchedFilter}()} } \author{ Joachim Kutzera, Steffen Neumann, Johannes Rainer diff --git a/man/do_groupChromPeaks_density.Rd b/man/do_groupChromPeaks_density.Rd index df96e38ab..b04cf5a45 100644 --- a/man/do_groupChromPeaks_density.Rd +++ b/man/do_groupChromPeaks_density.Rd @@ -5,9 +5,16 @@ \title{Core API function for peak density based chromatographic peak grouping} \usage{ -do_groupChromPeaks_density(peaks, sampleGroups, bw = 30, - minFraction = 0.5, minSamples = 1, binSize = 0.25, - maxFeatures = 50, sleep = 0) +do_groupChromPeaks_density( + peaks, + sampleGroups, + bw = 30, + minFraction = 0.5, + minSamples = 1, + binSize = 0.25, + maxFeatures = 50, + sleep = 0 +) } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and @@ -25,7 +32,7 @@ same group).} \item{bw}{\code{numeric(1)} defining the bandwidth (standard deviation ot the smoothing kernel) to be used. This argument is passed to the -\code{\link{density}} method.} +[density() method.} \item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples in at least one sample group in which the peaks have to be present to be @@ -45,34 +52,33 @@ to be identified in a single mz slice.} iterations and plot the result from the current iteration.} } \value{ -A \code{list} with elements \code{"featureDefinitions"} and -\code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row -representing a (mz-rt) feature (i.e. a peak group) with columns: -\describe{ -\item{"mzmed"}{median of the peaks' apex mz values.} -\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -\item{"rtmed"}{the median of the peaks' retention times.} -\item{"rtmin"}{the smallest retention time of the peaks in the group.} -\item{"rtmax"}{the largest retention time of the peaks in the group.} -\item{"npeaks"}{the total number of peaks assigned to the feature. -Note that this number can be larger than the total number of samples, since -multiple peaks from the same sample could be assigned to a feature.} +A \code{data.frame}, each row representing a (mz-rt) feature (i.e. a peak group) +with columns: +\itemize{ +\item \code{"mzmed"}: median of the peaks' apex mz values. +\item \code{"mzmin"}: smallest mz value of all peaks' apex within the feature. +\item \code{"mzmax"}:largest mz value of all peaks' apex within the feature. +\item \code{"rtmed"}: the median of the peaks' retention times. +\item \code{"rtmin"}: the smallest retention time of the peaks in the group. +\item \code{"rtmax"}: the largest retention time of the peaks in the group. +\item \code{"npeaks"}: the total number of peaks assigned to the feature. +\item \code{"peakidx"}: a \code{list} with the indices of all peaks in a feature in the +\code{peaks} input matrix. } -\code{"peakIndex"} is a \code{list} with the indices of all peaks in a -feature in the \code{peaks} input matrix. + +Note that this number can be larger than the total number of samples, since +multiple peaks from the same sample could be assigned to a feature. } \description{ -The \code{do_groupChromPeaks_density} function performs -chromatographic peak grouping based on the density (distribution) of peaks, -found in different samples, along the retention time axis in slices of -overlapping mz ranges. +The \code{do_groupChromPeaks_density} function performs chromatographic peak +grouping based on the density (distribution) of peaks, found in different +samples, along the retention time axis in slices of overlapping mz ranges. } \details{ For overlapping slices along the mz dimension, the function calculates the density distribution of identified peaks along the retention time axis and groups peaks from the same or different samples -that are close to each other. See [Smith 2006] for more details. +that are close to each other. See (Smith 2006) for more details. } \note{ The default settings might not be appropriate for all LC/GC-MS setups, @@ -80,31 +86,33 @@ especially the \code{bw} and \code{binSize} parameter should be adjusted accordingly. } \examples{ -## Load the test data set -library(faahKO) -data(faahko) +## Load the test file +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) ## Extract the matrix with the identified peaks from the xcmsSet: -fts <- peaks(faahko) +pks <- chromPeaks(faahko_sub) ## Perform the peak grouping with default settings: -res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) +res <- do_groupChromPeaks_density(pks, sampleGroups = rep(1, 3)) ## The feature definitions: -head(res$featureDefinitions) - -## The assignment of peaks from the input matrix to the features -head(res$peakIndex) +head(res) } \references{ Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -\emph{Anal. Chem.} 2006, 78:779-787. +Anal. Chem. 2006, 78:779-787. } \seealso{ -Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_nearest}}, - \code{\link{do_groupPeaks_mzClust}} +Other core peak grouping algorithms: +\code{\link{do_groupChromPeaks_nearest}()}, +\code{\link{do_groupPeaks_mzClust}()} } \author{ Colin Smith, Johannes Rainer diff --git a/man/do_groupChromPeaks_nearest.Rd b/man/do_groupChromPeaks_nearest.Rd index 99b0a824d..49e1b60b3 100644 --- a/man/do_groupChromPeaks_nearest.Rd +++ b/man/do_groupChromPeaks_nearest.Rd @@ -5,8 +5,14 @@ \title{Core API function for chromatic peak grouping using a nearest neighbor approach} \usage{ -do_groupChromPeaks_nearest(peaks, sampleGroups, mzVsRtBalance = 10, - absMz = 0.2, absRt = 15, kNN = 10) +do_groupChromPeaks_nearest( + peaks, + sampleGroups, + mzVsRtBalance = 10, + absMz = 0.2, + absRt = 15, + kNN = 10 +) } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and @@ -37,31 +43,33 @@ to check.} A \code{list} with elements \code{"featureDefinitions"} and \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row representing an (mz-rt) feature (i.e. peak group) with columns: -\describe{ -\item{"mzmed"}{median of the peaks' apex mz values.} -\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -\item{"rtmed"}{the median of the peaks' retention times.} -\item{"rtmin"}{the smallest retention time of the peaks in the feature.} -\item{"rtmax"}{the largest retention time of the peaks in the feature.} -\item{"npeaks"}{the total number of peaks assigned to the feature.} +\itemize{ +\item \code{"mzmed"}: median of the peaks' apex mz values. +\item \code{"mzmin"}: smallest mz value of all peaks' apex within the feature. +\item \code{"mzmax"}:largest mz value of all peaks' apex within the feature. +\item \code{"rtmed"}: the median of the peaks' retention times. +\item \code{"rtmin"}: the smallest retention time of the peaks in the feature. +\item \code{"rtmax"}: the largest retention time of the peaks in the feature. +\item \code{"npeaks"}: the total number of peaks assigned to the feature. } -\code{"peakIndex"} is a \code{list} with the indices of all peaks in a -feature in the \code{peaks} input matrix. + +\code{"peakIndex"} is a \code{list} with the indices of all peaks in a feature in the +\code{peaks} input matrix. } \description{ -The \code{do_groupChromPeaks_nearest} function groups peaks -across samples by creating a master peak list and assigning corresponding -peaks from all samples to each peak group (i.e. feature). The method is -inspired by the correspondence algorithm of mzMine [Katajamaa 2006]. +The \code{do_groupChromPeaks_nearest} function groups peaks across samples by +creating a master peak list and assigning corresponding peaks from all +samples to each peak group (i.e. feature). The method is inspired by the +correspondence algorithm of mzMine (Katajamaa 2006). } \references{ Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for processing and visualization of mass spectrometry based molecular profile -data. \emph{Bioinformatics} 2006, 22:634-636. +data. Bioinformatics 2006, 22:634-636. } \seealso{ -Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_density}}, - \code{\link{do_groupPeaks_mzClust}} +Other core peak grouping algorithms: +\code{\link{do_groupChromPeaks_density}()}, +\code{\link{do_groupPeaks_mzClust}()} } \concept{core peak grouping algorithms} diff --git a/man/do_groupPeaks_mzClust.Rd b/man/do_groupPeaks_mzClust.Rd index 9b94e0eaf..f3624c58a 100644 --- a/man/do_groupPeaks_mzClust.Rd +++ b/man/do_groupPeaks_mzClust.Rd @@ -4,8 +4,14 @@ \alias{do_groupPeaks_mzClust} \title{Core API function for peak grouping using mzClust} \usage{ -do_groupPeaks_mzClust(peaks, sampleGroups, ppm = 20, absMz = 0, - minFraction = 0.5, minSamples = 1) +do_groupPeaks_mzClust( + peaks, + sampleGroups, + ppm = 20, + absMz = 0, + minFraction = 0.5, + minSamples = 1 +) } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and @@ -39,23 +45,24 @@ considered a peak group (feature).} A \code{list} with elements \code{"featureDefinitions"} and \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row representing an (mz-rt) feature (i.e. peak group) with columns: -\describe{ -\item{"mzmed"}{median of the peaks' apex mz values.} -\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} -\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} -\item{"rtmed"}{always \code{-1}.} -\item{"rtmin"}{always \code{-1}.} -\item{"rtmax"}{always \code{-1}.} -\item{"npeaks"}{the total number of peaks assigned to the feature. -Note that this number can be larger than the total number of samples, since -multiple peaks from the same sample could be assigned to a group.} +\itemize{ +\item \code{"mzmed"}: median of the peaks' apex mz values. +\item \code{"mzmin"}: smallest mz value of all peaks' apex within the feature. +\item \code{"mzmax"}: largest mz value of all peaks' apex within the feature. +\item \code{"rtmed"}: always \code{-1}. +\item \code{"rtmin"}: always \code{-1}. +\item \code{"rtmax"}: always \code{-1}. +\item \code{"npeaks"}: the total number of peaks assigned to the feature. Note that +this number can be larger than the total number of samples, since +multiple peaks from the same sample could be assigned to a group. } -\code{"peakIndex"} is a \code{list} with the indices of all peaks in a -peak group in the \code{peaks} input matrix. + +\code{"peakIndex"} is a \code{list} with the indices of all peaks in a peak group in +the \code{peaks} input matrix. } \description{ -The \code{do_groupPeaks_mzClust} function performs high -resolution correspondence on single spectra samples. +The \code{do_groupPeaks_mzClust} function performs high resolution +correspondence on single spectra samples. } \references{ Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill @@ -64,7 +71,8 @@ development of a heuristic approach for metabolomics}.\cr Metabolomics, Vol. 2, No. 2, 75-83 (2006) } \seealso{ -Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_density}}, - \code{\link{do_groupChromPeaks_nearest}} +Other core peak grouping algorithms: +\code{\link{do_groupChromPeaks_density}()}, +\code{\link{do_groupChromPeaks_nearest}()} } \concept{core peak grouping algorithms} diff --git a/man/estimatePrecursorIntensity.Rd b/man/estimatePrecursorIntensity.Rd new file mode 100644 index 000000000..ed4ebb597 --- /dev/null +++ b/man/estimatePrecursorIntensity.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-OnDiskMSnExp.R +\name{estimatePrecursorIntensity} +\alias{estimatePrecursorIntensity} +\title{Estimate precursor intensity for MS level 2 spectra} +\usage{ +estimatePrecursorIntensity( + x, + ppm = 10, + method = c("previous", "interpolation"), + BPPARAM = bpparam() +) +} +\arguments{ +\item{x}{\code{OnDiskMSnExp} or \code{XCMSnExp} object.} + +\item{ppm}{\code{numeric(1)} defining the maximal acceptable difference (in ppm) +of the precursor m/z and the m/z of the corresponding peak in the MS 1 +scan.} + +\item{method}{\code{character(1)} defining the method how the precursor intensity +should be determined (see description above for details). Defaults to +\code{method = "previous"}.} + +\item{BPPARAM}{parallel processing setup. See \code{\link[=bpparam]{bpparam()}} for details.} +} +\value{ +\code{numeric} with length equal to the number of spectra in \code{x}. \code{NA} is +returned for MS 1 spectra or if no matching peak in a MS 1 scan can be +found for an MS 2 spectrum +} +\description{ +\code{estimatePrecursorIntensity} determines the precursor intensity for a MS 2 +spectrum based on the intensity of the respective signal from the +neighboring MS 1 spectra (i.e. based on the peak with the m/z matching the +precursor m/z of the MS 2 spectrum). Based on parameter \code{method} either the +intensity of the peak from the previous MS 1 scan is used +(\code{method = "previous"}) or an interpolation between the intensity from the +previous and subsequent MS1 scan is used (\code{method = "interpolation"}, which +considers also the retention times of the two MS1 scans and the retention +time of the MS2 spectrum). +} +\author{ +Johannes Rainer +} diff --git a/man/exportMetaboAnalyst.Rd b/man/exportMetaboAnalyst.Rd index 6d2a37da7..93765a2db 100644 --- a/man/exportMetaboAnalyst.Rd +++ b/man/exportMetaboAnalyst.Rd @@ -4,8 +4,15 @@ \alias{exportMetaboAnalyst} \title{Export data for use in MetaboAnalyst} \usage{ -exportMetaboAnalyst(x, file = NULL, label, value = "into", - digits = NULL, ...) +exportMetaboAnalyst( + x, + file = NULL, + label, + value = "into", + digits = NULL, + groupnames = FALSE, + ... +) } \arguments{ \item{x}{\link{XCMSnExp} object with identified chromatographic peaks grouped @@ -25,6 +32,12 @@ feature. See \code{\link[=featureValues]{featureValues()}} for more details.} used for numeric. The default \code{NULL} uses \code{getOption("digits")}. See \code{\link[=format]{format()}} for more information.} +\item{groupnames}{\code{logical(1)} whether row names of the resulting matrix +should be the feature IDs (\code{groupnames = FALSE}; default) or IDs that +are composed of the m/z and retention time of the features (in the +format \verb{MT} (\code{groupnames = TRUE}). See help of the \link{groupnames} +function for details.} + \item{...}{additional parameters to be passed to the \code{\link[=featureValues]{featureValues()}} function.} } @@ -34,7 +47,7 @@ the format supported by MetaboAnalyst. } \description{ Export the feature table for further analysis in the MetaboAnalyst -software (or the \code{MetaboAnalystR} R package. +software (or the \code{MetaboAnalystR} R package). } \author{ Johannes Rainer diff --git a/man/extractMsData-method.Rd b/man/extractMsData-method.Rd index 4ca07aa7f..a413ae3d7 100644 --- a/man/extractMsData-method.Rd +++ b/man/extractMsData-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R -\docType{methods} \name{extractMsData,OnDiskMSnExp-method} \alias{extractMsData,OnDiskMSnExp-method} \alias{extractMsData,XCMSnExp-method} @@ -9,8 +8,13 @@ \usage{ \S4method{extractMsData}{OnDiskMSnExp}(object, rt, mz, msLevel = 1L) -\S4method{extractMsData}{XCMSnExp}(object, rt, mz, msLevel = 1L, - adjustedRtime = hasAdjustedRtime(object)) +\S4method{extractMsData}{XCMSnExp}( + object, + rt, + mz, + msLevel = 1L, + adjustedRtime = hasAdjustedRtime(object) +) } \arguments{ \item{object}{A \code{XCMSnExp} or \code{OnDiskMSnExp} object.} @@ -24,7 +28,7 @@ data should be extracted.} should be sub-setted prior to extraction; defaults to \code{msLevel = 1L}.} -\item{adjustedRtime}{(for \code{extractMsData,XCMSnExp}): \code{logical(1)} +\item{adjustedRtime}{(for \verb{extractMsData,XCMSnExp}): \code{logical(1)} specifying if adjusted or raw retention times should be reported. Defaults to adjusted retention times, if these are present in \code{object}.} @@ -49,27 +53,21 @@ values from each file/sample in the provided rt-mz range (or for the full data range if \code{rt} and \code{mz} are not defined). } \examples{ -## Read some files from the test data package. -library(faahKO) -library(xcms) -library(magrittr) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) -raw_data <- readMSData(fls[1:2], mode = "onDisk") -## Extract the full data as a data.frame -ms_all <- as(raw_data, "data.frame") -head(ms_all) -nrow(ms_all) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") -## Read the full MS data for a defined mz-rt region. -res <- raw_data \%>\% - filterRt(rt = c(2700, 2900)) \%>\% - filterMz(mz = c(300, 320)) \%>\% - as("data.frame") +## Disable parallel processing for this example +register(SerialParam()) -head(res) -nrow(res) +## Extract the full MS data for a certain retention time range +## as a data.frame +tmp <- filterRt(faahko_sub, rt = c(2800, 2900)) +ms_all <- as(tmp, "data.frame") +head(ms_all) +nrow(ms_all) } \seealso{ \code{XCMSnExp} for the data object. diff --git a/man/feature-grouping.Rd b/man/feature-grouping.Rd new file mode 100644 index 000000000..4249dd9b6 --- /dev/null +++ b/man/feature-grouping.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-group-features.R +\name{feature-grouping} +\alias{feature-grouping} +\alias{featureGroups,XCMSnExp-method} +\alias{featureGroups<-,XCMSnExp-method} +\title{Compounding of LC-MS features} +\usage{ +\S4method{featureGroups}{XCMSnExp}(object) + +\S4method{featureGroups}{XCMSnExp}(object) <- value +} +\arguments{ +\item{object}{an \code{\link[=XCMSnExp]{XCMSnExp()}} object.} + +\item{value}{for \verb{featureGroups<-}: replacement for the feature groups in +\code{object}. Has to be of length 1 or length equal to the number of features +in \code{object}.} +} +\description{ +Feature \emph{compounding} aims at identifying and grouping LC-MS features +representing different ions or adducts (including isotopes) of the same +originating compound. +The \href{https://bioconductor.org/packages/MsFeatures}{MsFeatures} package +provides a general framework and functionality to group features based on +different properties. The \code{groupFeatures} methods for \linkS4class{XCMSnExp} +objects implemented in \code{xcms} extend these to enable the \emph{compounding} of +LC-MS data. Note that these functions simply define feature groups but don't +actually \emph{aggregate} or combine the features. + +See \code{\link[MsFeatures:groupFeatures]{MsFeatures::groupFeatures()}} for an overview on the general feature +grouping concept as well as details on the individual settings and +parameters. + +The available options for \code{groupFeatures} on \code{xcms} preprocessing results +(i.e. on \code{XCMSnExp} objects after correspondence analysis with +\code{\link[=groupChromPeaks]{groupChromPeaks()}}) are: +\itemize{ +\item Grouping by similar retention times: \code{\link[=groupFeatures-similar-rtime]{groupFeatures-similar-rtime()}}. +\item Grouping by similar feature values across samples: +\code{\link[=AbundanceSimilarityParam]{AbundanceSimilarityParam()}}. +\item Grouping by similar peak shape of extracted ion chromatograms: +\code{\link[=EicSimilarityParam]{EicSimilarityParam()}}. +} + +An ideal workflow grouping features should sequentially perform the above +methods (in the listed order). + +Compounded feature groups can be accessed with the \code{featureGroups} function. +} +\seealso{ +\code{\link[=plotFeatureGroups]{plotFeatureGroups()}} for visualization of grouped features. +} +\author{ +Johannes Rainer, Mar Garcia-Aloy, Vinicius Veri Hernandes +} diff --git a/man/featureChromatograms.Rd b/man/featureChromatograms.Rd new file mode 100644 index 000000000..83a5b2da6 --- /dev/null +++ b/man/featureChromatograms.Rd @@ -0,0 +1,121 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{featureChromatograms} +\alias{featureChromatograms} +\title{Extract ion chromatograms for each feature} +\usage{ +featureChromatograms( + x, + expandRt = 0, + aggregationFun = "max", + features, + include = c("feature_only", "apex_within", "any", "all"), + filled = FALSE, + n = length(fileNames(x)), + value = c("maxo", "into"), + expandMz = 0, + ... +) +} +\arguments{ +\item{x}{\code{XCMSnExp} object with grouped chromatographic peaks.} + +\item{expandRt}{\code{numeric(1)} to expand the retention time range for each +chromatographic peak by a constant value on each side.} + +\item{aggregationFun}{\code{character(1)} specifying the name that should be +used to aggregate intensity values across the m/z value range for +the same retention time. The default \code{"sum"} returns a base peak +chromatogram.} + +\item{features}{\code{integer}, \code{character} or \code{logical} defining a subset of +features for which chromatograms should be returned. Can be the index +of the features in \code{featureDefinitions}, feature IDs (row names of +\code{featureDefinitions}) or a logical vector.} + +\item{include}{\code{character(1)} defining which chromatographic peaks (and +related feature definitions) should be included in the returned +\code{\link[=XChromatograms]{XChromatograms()}}. Defaults to \code{"feature_only"}; See description above +for options and details.} + +\item{filled}{\code{logical(1)} whether filled-in peaks should be included in +the result object. The default is \code{filled = FALSE}, i.e. only detected +peaks are reported.} + +\item{n}{\code{integer(1)} to optionally specify the number of \emph{top n} samples +from which the EIC should be extracted.} + +\item{value}{\code{character(1)} specifying the column to be used to sort the +samples. Can be either \code{"maxo"} (the default) or \code{"into"} to use the +maximal peak intensity or the integrated peak area, respectively.} + +\item{expandMz}{\code{numeric(1)} to expand the m/z range for each chromatographic +peak by a constant value on each side. Be aware that by extending the +m/z range the extracted EIC might \strong{no longer} represent the actual +identified chromatographic peak because intensities of potential +additional mass peaks within each spectra would be aggregated into the +final reported intensity value per spectrum (retention time).} + +\item{...}{optional arguments to be passed along to the \code{\link[=chromatogram]{chromatogram()}} +function.} +} +\value{ +\code{\link[=XChromatograms]{XChromatograms()}} object. +} +\description{ +Extract ion chromatograms for features in an \linkS4class{XCMSnExp} object. The +function returns for each feature its extracted ion chromatogram and all +associated peaks with it. The chromatogram is extracted from the m/z - rt +region including all chromatographic peaks of that features (i.e. based on +the ranges of \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"}, \code{"rtmax"} of all +chromatographic peaks of the feature). + +By default only chromatographic peaks associated with a feature are included +for an extracted ion chromatogram (parameter \code{include = "feature_only"}). By +setting \code{include = "apex_within"} all chromatographic peaks (and eventually +the feature which they are part of - if feature definitions are present) +that have their apex position within the m/z - rt range from which the +chromatogram is extracted are returned too. +With \code{include = "any"} or \code{include = "all"} all chromatographic peaks (and +eventually the feature in which they are present) overlapping the m/z and rt +range will be returned. +} +\note{ +When extracting EICs from only the top \code{n} samples it can happen that one +or more of the features specified with \code{features} are dropped because they +have no detected peak in the \emph{top n} samples. The chance for this to happen +is smaller if \code{x} contains also filled-in peaks (with \code{fillChromPeaks}). +} +\examples{ + +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Subset the object to a smaller retention time range +xdata <- filterRt(faahko_sub, c(2500, 3500)) + +xdata <- groupChromPeaks(xdata, + param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) + +## Get the feature definitions +featureDefinitions(xdata) + +## Extract ion chromatograms for the first 3 features. Parameter +## `features` can be either the feature IDs or feature indices. +chrs <- featureChromatograms(xdata, features = 1:3) + +## Plot the XIC for the first feature using different colors for each file +plot(chrs[1, ], col = c("red", "green", "blue")) +} +\seealso{ +\code{\link[=filterColumnsKeepTop]{filterColumnsKeepTop()}} to filter the extracted EICs keeping only +the \emph{top n} columns (samples) with the highest intensity. +} +\author{ +Johannes Rainer +} diff --git a/man/featureSpectra.Rd b/man/featureSpectra.Rd new file mode 100644 index 000000000..e0672ce45 --- /dev/null +++ b/man/featureSpectra.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{featureSpectra} +\alias{featureSpectra} +\title{Extract spectra associated with features} +\usage{ +featureSpectra( + x, + msLevel = 2L, + expandRt = 0, + expandMz = 0, + ppm = 0, + skipFilled = FALSE, + return.type = c("MSpectra", "Spectra", "list", "List"), + features = character(), + ... +) +} +\arguments{ +\item{x}{\link{XCMSnExp} object with feature defitions available.} + +\item{msLevel}{\code{integer(1)} defining whether MS1 or MS2 spectra should be +returned. \code{msLevel = 1} is currently only supported for \code{return.type} +being \code{"Spectra"} or \code{"List"}.} + +\item{expandRt}{\code{numeric(1)} to expand the retention time range of each +peak by a constant value on each side.} + +\item{expandMz}{\code{numeric(1)} to expand the m/z range of each peak by a +constant value on each side.} + +\item{ppm}{\code{numeric(1)} to expand the m/z range of each peak (on each side) +by a value dependent on the peak's m/z.} + +\item{skipFilled}{\code{logical(1)} whether spectra for filled-in peaks should +be reported or not.} + +\item{return.type}{\code{character(1)} defining the result type. Defaults to +\code{return.type = "MSpectra"} but \code{return.type = "Spectra"} or +\code{return.type = "List"} are preferred. See below for more information.} + +\item{features}{\code{character}, \code{logical} or \code{integer} allowing to specify a +subset of features in \code{featureDefinitions} for which spectra should +be returned (providing either their ID, a logical vector same length +than \code{nrow(featureDefinitions(x))} or their index in +\code{featureDefinitions(x)}). This parameter overrides \code{skipFilled} and is +only supported for \code{return.type} being either \code{"Spectra"} or \code{"List"}.} + +\item{...}{additional arguments to be passed along to \code{\link[=chromPeakSpectra]{chromPeakSpectra()}}, +such as \code{method}.} +} +\value{ +parameter \code{return.type} allow to specify the type of the returned object: +\itemize{ +\item \code{return.type = "MSpectra"}: a \link{MSpectra} object with elements being +\linkS4class{Spectrum} objects. The result objects contains all spectra +for all features. Metadata column \code{"feature_id"} provides the ID of the +respective feature (i.e. its rowname in \code{\link[=featureDefinitions]{featureDefinitions()}}). +\item \code{return.type = "Spectra"}: a \code{Spectra} object (defined in the \code{Spectra} +package). The result contains all spectra for all features. Metadata column +\code{"feature_id"} provides the ID of the respective feature (i.e. its rowname +in \code{\link[=featureDefinitions]{featureDefinitions()}}. +\item \code{return.type = "list"}: \code{list} of \code{list}s that are either of length +0 or contain \linkS4class{Spectrum2} object(s) within the m/z-rt range. The +length of the list matches the number of features. +\item \code{return.type = "List"}: \code{List} of length equal to the number of +features with MS level \code{msLevel} is returned with elements being either +\code{NULL} (no spectrum found) or a \code{Spectra} object. +} +} +\description{ +This function returns spectra associated with the identified features in the +input object. By default, spectra are returned for all features (from all +MS levels), but parameter \code{features} allows to specify selected features for +which the result should be returned. +Parameter \code{msLevel} allows to define whether MS level 1 or 2 +spectra should be returned. For \code{msLevel = 1L} all MS1 spectra within the +retention time range of each chromatographic peak (in that respective data +file) associated with a feature are returned. Note that for samples in which +no peak was identified (or even filled-in) no spectra are returned. +For \code{msLevel = 2L} all MS2 +spectra with a retention time within the retention time range and their +precursor m/z within the m/z range of any chromatographic peak of a feature +are returned. See also \code{\link[=chromPeakSpectra]{chromPeakSpectra()}} (used internally to extract +spectra for each chromatographic peak of a feature) for additional +information. + +In contrast to the \code{\link[=chromPeakSpectra]{chromPeakSpectra()}} function, selecting a \code{method} +different than \code{"all"} will not return a single spectrum per feature, but +one spectrum per \strong{chromatographic peak} assigned to the feature. + +Note also that \code{msLevel = 1L} is only supported for \code{return.type = "List"} +or \code{return.type = "Spectra"}. +} +\author{ +Johannes Rainer +} diff --git a/man/featureSummary.Rd b/man/featureSummary.Rd index 04ebd42ed..837a9921f 100644 --- a/man/featureSummary.Rd +++ b/man/featureSummary.Rd @@ -4,7 +4,13 @@ \alias{featureSummary} \title{Simple feature summaries} \usage{ -featureSummary(x, group, perSampleCounts = FALSE, method = "maxint") +featureSummary( + x, + group, + perSampleCounts = FALSE, + method = "maxint", + skipFilled = TRUE +) } \arguments{ \item{x}{`XCMSnExp` object with correspondence results.} @@ -18,6 +24,9 @@ counts per sample should be returned too.} \item{method}{`character` passed to the [featureValues()] function. See respective help page for more information.} + +\item{skipFilled}{`logical(1)` whether filled-in peaks should be excluded +(default) or included in the summary calculation.} } \value{ `matrix` with one row per feature and columns: diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd index 1b74d527e..84792fe8a 100644 --- a/man/fillChromPeaks.Rd +++ b/man/fillChromPeaks.Rd @@ -4,10 +4,11 @@ \docType{class} \name{FillChromPeaksParam-class} \alias{FillChromPeaksParam-class} +\alias{ChromPeakAreaParam-class} \alias{FillChromPeaksParam} \alias{fixedRt} \alias{fixedMz} -\alias{show,FillChromPeaksParam-method} +\alias{ChromPeakAreaParam} \alias{expandMz,FillChromPeaksParam-method} \alias{expandMz} \alias{expandMz<-,FillChromPeaksParam-method} @@ -20,17 +21,28 @@ \alias{ppm<-,FillChromPeaksParam-method} \alias{fillChromPeaks,XCMSnExp,FillChromPeaksParam-method} \alias{fillChromPeaks} +\alias{fillChromPeaks,XCMSnExp,ChromPeakAreaParam-method} \alias{fillChromPeaks,XCMSnExp,missing-method} \title{Integrate areas of missing peaks} \usage{ -FillChromPeaksParam(expandMz = 0, expandRt = 0, ppm = 0, - fixedMz = 0, fixedRt = 0) +FillChromPeaksParam( + expandMz = 0, + expandRt = 0, + ppm = 0, + fixedMz = 0, + fixedRt = 0 +) fixedRt(object) fixedMz(object) -\S4method{show}{FillChromPeaksParam}(object) +ChromPeakAreaParam( + mzmin = function(z) quantile(z, probs = 0.25), + mzmax = function(z) quantile(z, probs = 0.75), + rtmin = function(z) quantile(z, probs = 0.25), + rtmax = function(z) quantile(z, probs = 0.75) +) \S4method{expandMz}{FillChromPeaksParam}(object) @@ -44,51 +56,82 @@ fixedMz(object) \S4method{ppm}{FillChromPeaksParam}(object) <- value -\S4method{fillChromPeaks}{XCMSnExp,FillChromPeaksParam}(object, param, - BPPARAM = bpparam()) +\S4method{fillChromPeaks}{XCMSnExp,FillChromPeaksParam}(object, param, msLevel = 1L, BPPARAM = bpparam()) -\S4method{fillChromPeaks}{XCMSnExp,missing}(object, param, - BPPARAM = bpparam()) +\S4method{fillChromPeaks}{XCMSnExp,ChromPeakAreaParam}(object, param, msLevel = 1L, BPPARAM = bpparam()) + +\S4method{fillChromPeaks}{XCMSnExp,missing}(object, param, BPPARAM = bpparam(), msLevel = 1L) } \arguments{ -\item{expandMz}{\code{numeric(1)} defining the value by which the mz width of -peaks should be expanded. Each peak is expanded in mz direction by -\code{expandMz *} their original mz width. A value of \code{0} means no -expansion, a value of \code{1} grows each peak by 1 * the mz width of -the peak resulting in peakswith twice their original size in mz -direction (expansion by half mz width to both sides).} - -\item{expandRt}{\code{numeric(1)}, same as \code{expandRt} but for the -retention time width.} - -\item{ppm}{\code{numeric(1)} optionally specifying a \emph{ppm} by which the -mz width of the peak region should be expanded. For peaks with an mz -width smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the -\code{mzmin} will be replaced by +\item{expandMz}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining the value +by which the mz width of peaks should be expanded. Each peak is expanded +in mz direction by \verb{expandMz *} their original m/z width. A value of +\code{0} means no expansion, a value of \code{1} grows each peak by \verb{1 *} the m/z +width of the peak resulting in peaks with twice their original size in +m/z direction (expansion by half m/z width to both sides).} + +\item{expandRt}{for \code{FillChromPeaksParam}: \code{numeric(1)}, same as \code{expandMz} +but for the retention time width.} + +\item{ppm}{for \code{FillChromPeaksParam}: \code{numeric(1)} optionally specifying a +\emph{ppm} by which the m/z width of the peak region should be expanded. For +peaks with an m/z width smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, +the \code{mzmin} will be replaced by \code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} -and \code{mzmax} by +\code{mzmax} by \code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. -This is applied before eventually expanding the mz width using the +This is applied before eventually expanding the m/z width using the \code{expandMz} parameter.} -\item{fixedMz}{\code{numeric(1)} defining a constant factor by which the -m/z width of each feature is to be expanded. The m/z width is expanded -on both sides by \code{fixedMz} (i.e. \code{fixedMz} is subtracted -from the lower m/z and added to the upper m/z). This expansion is -applied \emph{after} \code{expandMz} and \code{ppm}.} - -\item{fixedRt}{\code{numeric(1)} defining a constant factor by which the -retention time width of each factor is to be expanded. The rt width is -expanded on both sides by \code{fixedRt} (i.e. \code{fixedRt} is -subtracted from the lower rt and added to the upper rt). This -expansion is applied \emph{after} \code{expandRt}.} - -\item{object}{\code{XCMSnExp} object with identified and grouped -chromatographic peaks.} +\item{fixedMz}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining a constant +factor by which the m/z width of each feature is to be expanded. +The m/z width is expanded on both sides by \code{fixedMz} (i.e. \code{fixedMz} +is subtracted from the lower m/z and added to the upper m/z). This +expansion is applied \emph{after} \code{expandMz} and \code{ppm}.} + +\item{fixedRt}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining a constant +factor by which the retention time width of each factor is to be +expanded. The rt width is expanded on both sides by \code{fixedRt} (i.e. +\code{fixedRt} is subtracted from the lower rt and added to the upper rt). +This expansion is applied \emph{after} \code{expandRt}.} + +\item{object}{\code{XCMSnExp} object with identified and grouped chromatographic +peaks.} + +\item{mzmin}{\code{function} to be applied to values in the \code{"mzmin"} column of all +chromatographic peaks of a feature to define the lower m/z value of the +area from which signal for the feature should be integrated. Defaults to +\code{mzmin = function(z) quantile(z, probs = 0.25)} hence using the 25\% +quantile of all values.} + +\item{mzmax}{\code{function} to be applied to values in the \code{"mzmax"} column of all +chromatographic peaks of a feature to define the upper m/z value of the +area from which signal for the feature should be integrated. Defaults to +\code{mzmax = function(z) quantile(z, probs = 0.75)} hence using the 75\% +quantile of all values.} + +\item{rtmin}{\code{function} to be applied to values in the \code{"rtmin"} column of all +chromatographic peaks of a feature to define the lower rt value of the +area from which signal for the feature should be integrated. Defaults to +\code{rtmin = function(z) quantile(z, probs = 0.25)} hence using the 25\% +quantile of all values.} + +\item{rtmax}{\code{function} to be applied to values in the \code{"rtmax"} column of all +chromatographic peaks of a feature to define the upper rt value of the +area from which signal for the feature should be integrated. Defaults to +\code{rtmax = function(z) quantile(z, probs = 0.75)} hence using the 75\% +quantile of all values.} \item{value}{The value for the slot.} -\item{param}{A \code{FillChromPeaksParam} object with all settings.} +\item{param}{\code{FillChromPeaksParam} or \code{ChromPeakAreaParam} object +defining which approach should be used (see details section).} + +\item{msLevel}{\code{integer(1)} defining the MS level on which peak filling +should be performed (defaults to \code{msLevel = 1L}). Only peak filling +on one MS level at a time is supported, to fill in peaks for MS level 1 +and 2 run first using \code{msLevel = 1} and then (on the returned +result object) again with \code{msLevel = 2}.} \item{BPPARAM}{Parallel processing settings.} } @@ -96,14 +139,10 @@ chromatographic peaks.} The \code{FillChromPeaksParam} function returns a \code{FillChromPeaksParam} object. -A \code{\link{XCMSnExp}} object with previously missing -chromatographic peaks for features filled into its \code{chromPeaks} -matrix. +A \code{XCMSnExp} object with previously missing chromatographic peaks for +features filled into its \code{\link[=chromPeaks]{chromPeaks()}} matrix. } \description{ -The \code{FillChromPeaksParam} object encapsules all settings for -the signal integration for missing peaks. - \code{expandMz},\code{expandMz<-}: getter and setter for the \code{expandMz} slot of the object. @@ -115,41 +154,68 @@ the signal integration for missing peaks. Integrate signal in the mz-rt area of a feature (chromatographic peak group) for samples in which no chromatographic peak for this -feature was identified and add it to the \code{chromPeaks}. Such peaks -will have a value of \code{1} in the \code{"is_filled"} column of the -\code{\link{chromPeaks}} matrix of the object. +feature was identified and add it to the \code{\link[=chromPeaks]{chromPeaks()}} matrix. Such +\emph{filled-in} peaks are indicated with a \code{TRUE} in column \code{"is_filled"} in +the result object's \code{\link[=chromPeakData]{chromPeakData()}} data frame. + +Two different gap-filling approaches are implemented: +\itemize{ +\item \code{param = FillChromPeaksParam()}: the default of the original \code{xcms} code. +Signal is integrated from the m/z and retention time range as defined in +the \code{\link[=featureDefinitions]{featureDefinitions()}} data frame, i.e. from the \code{"rtmin"}, \code{"rtmax"}, +\code{"mzmin"} and \code{"mzmax"}. See details below for more information and +settings for this method. +\item \code{param = ChromPeakAreaParam()}: the area from which the signal for a +feature is integrated is defined based on the feature's chromatographic +peak areas. The m/z range is by default defined as the the lower quartile +of chromatographic peaks' \code{"mzmin"} value to the upper quartile of the +chromatographic peaks' \code{"mzmax"} values. The retention time range for the +area is defined analogously. Alternatively, by setting \code{mzmin = median}, +\code{mzmax = median}, \code{rtmin = median} and \code{rtmax = median} in +\code{ChromPeakAreaParam}, the median \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and +\code{"rtmax"} values from all detected chromatographic peaks of a feature +would be used instead. +In contrast to the \code{FillChromPeaksParam} approach this method uses the +actual identified chromatographic peaks of a feature to define the area +from which the signal should be integrated. +} } \details{ After correspondence (i.e. grouping of chromatographic peaks across samples) there will always be features (peak groups) that do not include peaks from every sample. The \code{fillChromPeaks} method defines intensity values for such features in the missing samples by integrating -the signal in the mz-rt region of the feature. The mz-rt area is defined -by the median mz and rt start and end points of the other detected -chromatographic peaks for a given feature. Various parameters allow to -increase this area, either by a constant value (\code{fixedMz} and -\code{fixedRt}) or by a feature-relative amount (\code{expandMz} and -\code{expandRt}). +the signal in the mz-rt region of the feature. Two different approaches +to define this region are available: with \code{ChromPeakAreaParam} the region +is defined based on the detected \strong{chromatographic peaks} of a feature, +while with \code{FillChromPeaksParam} the region is defined based on the m/z and +retention times of the \strong{feature} (which represent the m/z and retentention +times of the apex position of the associated chromatographic peaks). For the +latter approach various parameters are available to increase the area from +which signal is to be integrated, either by a constant value (\code{fixedMz} and +\code{fixedRt}) or by a feature-relative amount (\code{expandMz} and \code{expandRt}). Adjusted retention times will be used if available. Based on the peak finding algorithm that was used to identify the -(chromatographic) peaks different internal functions are employed to +(chromatographic) peaks, different internal functions are used to guarantee that the integrated peak signal matches as much as possible the peak signal integration used during the peak detection. For peaks -identified with the \code{\link{matchedFilter}} method, signal +identified with the \code{\link[=matchedFilter]{matchedFilter()}} method, signal integration is performed on the \emph{profile matrix} generated with the same settings used also during peak finding (using the same \code{bin} size for example). For direct injection data and peaks -identified with the \code{\link{MSW}} algorithm signal is integrated +identified with the \code{MSW} algorithm signal is integrated only along the mz dimension. For all other methods the complete (raw) -signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, -\code{"rtmin"} and \code{"rtmax"} is used. +signal within the area is used. } \section{Slots}{ \describe{ -\item{\code{.__classVersion__,expandMz,expandRt,ppm,fixedMz,fixedRt}}{See corresponding parameter above. \code{.__classVersion__} stores the version of the class.} +\item{\code{expandMz,expandRt,ppm,fixedMz,fixedRt}}{See corresponding parameter +above.} + +\item{\code{rtmin,rtmax,mzmin,mzmax}}{See corresponding parameter above.} }} \note{ @@ -158,29 +224,18 @@ The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and which the signal was integrated. Note that no peak is filled in if no signal was present in a file/sample in the respective mz-rt area. These samples will still show a \code{NA} -in the matrix returned by the \code{\link{featureValues}} method. This -is in contrast to the \code{\link{fillPeaks.chrom}} method that returned -an \code{"into"} and \code{"maxo"} of \code{0} for such peak areas. -Growing the mz-rt area using the \code{expandMz} and \code{expandRt} -might help to reduce the number of missing peak signals after filling. +in the matrix returned by the \code{\link[=featureValues]{featureValues()}} method. } \examples{ -## Perform the peak detection using centWave on some of the files from the -## faahKO package. Files are read using the readMSData from the MSnbase -## package -library(faahKO) -library(xcms) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) -raw_data <- readMSData(fls[1:2], mode = "onDisk") - -## Create a CentWaveParam object. Note that the noise is set to 10000 to -## speed up the execution of the example - in a real use case the default -## value should be used, or it should be set to a reasonable value. -cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 40) +## Load a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +res <- faahko_sub -res <- findChromPeaks(raw_data, param = cwp) +## Disable parallel processing for this example +register(SerialParam()) ## Perform the correspondence. We assign all samples to the same group. res <- groupChromPeaks(res, @@ -189,43 +244,35 @@ res <- groupChromPeaks(res, ## For how many features do we lack an integrated peak signal? sum(is.na(featureValues(res))) -## Filling missing peak data using default settings. -res <- fillChromPeaks(res) +## Filling missing peak data using the peak area from identified +## chromatographic peaks. +res <- fillChromPeaks(res, param = ChromPeakAreaParam()) -## Get the peaks that have been filled in: -fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] -head(fp) - -## Did we get a signal for all missing peaks? +## How many missing values do we have after peak filling? sum(is.na(featureValues(res))) -## No. +## Get the peaks that have been filled in: +fp <- chromPeaks(res)[chromPeakData(res)$is_filled, ] +head(fp) ## Get the process history step along with the parameters used to perform -## The peak filling: +## The peak filling: ph <- processHistory(res, type = "Missing peak filling")[[1]] ph ## The parameter class: ph@param -## Drop the filled in peaks: +## It is also possible to remove filled-in peaks: res <- dropFilledChromPeaks(res) -## Perform the peak filling with modified settings: allow expansion of the -## mz range by a specified ppm and expanding the mz range by mz width/2 -prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) -res <- fillChromPeaks(res, param = prm) - -## Did we get a signal for all missing peaks? sum(is.na(featureValues(res))) - -## Still the same missing peaks. } \seealso{ -\code{\link{groupChromPeaks}} for methods to perform the - correspondence. - \code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +\code{\link[=groupChromPeaks]{groupChromPeaks()}} for methods to perform the correspondence. + +\link{featureArea} for the function to define the m/z-retention time +region for each feature. } \author{ Johannes Rainer diff --git a/man/filter-MChromatograms.Rd b/man/filter-MChromatograms.Rd new file mode 100644 index 000000000..ca96dfa68 --- /dev/null +++ b/man/filter-MChromatograms.Rd @@ -0,0 +1,154 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-MChromatograms.R, +% R/methods-XChromatograms.R +\name{filterColumnsIntensityAbove,MChromatograms-method} +\alias{filterColumnsIntensityAbove,MChromatograms-method} +\alias{filterColumnsIntensityAbove} +\alias{filterColumnsKeepTop} +\alias{filterColumnsKeepTop,MChromatograms-method} +\alias{filterColumnsIntensityAbove,XChromatograms-method} +\alias{filterColumnsKeepTop,XChromatograms-method} +\title{Filtering sets of chromatographic data} +\usage{ +\S4method{filterColumnsIntensityAbove}{MChromatograms}( + object, + threshold = 0, + value = c("bpi", "tic"), + which = c("any", "all") +) + +\S4method{filterColumnsKeepTop}{MChromatograms}( + object, + n = 1L, + sortBy = c("bpi", "tic"), + aggregationFun = sum +) + +\S4method{filterColumnsIntensityAbove}{XChromatograms}( + object, + threshold = 0, + value = c("bpi", "tic", "maxo", "into"), + which = c("any", "all") +) + +\S4method{filterColumnsKeepTop}{XChromatograms}( + object, + n = 1L, + sortBy = c("bpi", "tic", "maxo", "into"), + aggregationFun = sum +) +} +\arguments{ +\item{object}{\code{\link[=MChromatograms]{MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} + +\item{threshold}{for \code{filterColumnsIntensityAbove}: \code{numeric(1)} with the +threshold value to compare against.} + +\item{value}{\code{character(1)} defining which value should be used in the +comparison or sorting. Can be \code{value = "bpi"} (default) to use the +maximum intensity per chromatogram or \code{value = "tic"} to use the sum +of intensities per chromatogram. For \code{\link[=XChromatograms]{XChromatograms()}} objects also +\code{value = "maxo"} and \code{value = "into"} is supported to use the maximum +intensity or the integrated area of identified chromatographic peaks +in each chromatogram.} + +\item{which}{for \code{filterColumnsIntensityAbove}: \code{character(1)} defining +whether \strong{any} (\code{which = "any"}, default) or \strong{all} (\code{which = "all"}) +chromatograms in a column have to fulfill the criteria for the column +to be kept.} + +\item{n}{for \code{filterColumnsKeepTop}: \code{integer(1)} specifying the number of +columns that should be returned. \code{n} will be rounded to the closest +(larger) integer value.} + +\item{sortBy}{for \code{filterColumnsKeepTop}: the value by which columns should +be ordered to determine the top n columns. Can be either \code{sortBy = "bpi"} +(the default), in which case the maximum intensity of each column's +chromatograms is used, or \code{sortBy = "tic"} to use the total intensity +sum of all chromatograms. For \code{\link[=XChromatograms]{XChromatograms()}} objects also +\code{value = "maxo"} and \code{value = "into"} is supported to use the maximum +intensity or the integrated area of identified chromatographic peaks +in each chromatogram.} + +\item{aggregationFun}{for \code{filterColumnsKeepTop}: function to be used to +aggregate (combine) the values from all chromatograms in each column. +Defaults to \code{aggregationFun = sum} in which case the sum of the values +is used to rank the columns. Alternatively the \code{mean}, \code{median} or +similar function can be used.} +} +\value{ +a filtered \code{MChromatograms} (or \code{XChromatograms}) object with the +same number of rows (EICs) but eventually a lower number of columns +(samples). +} +\description{ +These functions allow to filter (subset) \code{\link[=MChromatograms]{MChromatograms()}} or +\code{\link[=XChromatograms]{XChromatograms()}} objects, i.e. sets of chromatographic data, without +changing the data (intensity and retention times) within the individual +chromatograms (\code{\link[=Chromatogram]{Chromatogram()}} objects). +\itemize{ +\item \code{filterColumnsIntensityAbove}: subsets a \code{MChromatograms} objects keeping +only columns (samples) for which \code{value} is larger than the provided +\code{threshold} in \code{which} rows (i.e. if \code{which = "any"} a +column is kept if \strong{any} of the chromatograms in that column have a +\code{value} larger than \code{threshold} or with \code{which = "all"} \strong{all} +chromatograms in that column fulfill this criteria). Parameter \code{value} +allows to define on which value the comparison should be performed, with +\code{value = "bpi"} the maximum intensity of each chromatogram is compared to +\code{threshold}, with \verb{value = "tic" the total sum of intensities of each chromatogram is compared to }threshold\verb{. For }XChromatograms\verb{object,}value = "maxo"\code{and}value = "into"\verb{are supported which compares the largest intensity of all identified chromatographic peaks in the chromatogram with}threshold`, or the integrated peak area, respectively. +\item \code{filterColumnsKeepTop}: subsets a \code{MChromatograms} object keeping the top +\code{n} columns sorted by the value specified with \code{sortBy}. In detail, for +each column the value defined by \code{sortBy} is extracted from each +chromatogram and aggregated using the \code{aggregationFun}. Thus, by default, +for each chromatogram the maximum intensity is determined +(\code{sortBy = "bpi"}) and these values are summed up for chromatograms in the +same column (\code{aggregationFun = sum}). The columns are then sorted by these +values and the top \code{n} columns are retained in the returned +\code{MChromatograms}. Similar to the \code{filterColumnsIntensityAbove} function, +this function allows to use for \code{XChromatograms} objects to sort the +columns by column \code{sortBy = "maxo"} or \code{sortBy = "into"} of the +\code{chromPeaks} matrix. +} +} +\examples{ + +chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) +chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) +chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + +chrs <- MChromatograms(list(chr1, chr2, chr1, chr3, chr2, chr3), + ncol = 3, byrow = FALSE) +chrs + +#### filterColumnsIntensityAbove +## +## Keep all columns with for which the maximum intensity of any of its +## chromatograms is larger 90 +filterColumnsIntensityAbove(chrs, threshold = 90) + +## Require that ALL chromatograms in a column have a value larger 90 +filterColumnsIntensityAbove(chrs, threshold = 90, which = "all") + +## If none of the columns fulfills the criteria no columns are returned +filterColumnsIntensityAbove(chrs, threshold = 900) + +## Filtering XChromatograms allow in addition to filter on the columns +## "maxo" or "into" of the identified chromatographic peaks within each +## chromatogram. + +#### filterColumnsKeepTop +## +## Keep the 2 columns with the highest sum of maximal intensities in their +## chromatograms +filterColumnsKeepTop(chrs, n = 1) + +## Keep the 50 percent of columns with the highest total sum of signal. Note +## that n will be rounded to the next larger integer value +filterColumnsKeepTop(chrs, n = 0.5 * ncol(chrs), sortBy = "tic") +} +\author{ +Johannes Rainer +} diff --git a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd index 68f892623..664e44fb4 100644 --- a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd +++ b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd @@ -1,60 +1,92 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/methods-Chromatogram.R -\docType{methods} +% Please edit documentation in R/methods-Chromatogram.R, +% R/methods-MChromatograms.R \name{findChromPeaks,Chromatogram,CentWaveParam-method} \alias{findChromPeaks,Chromatogram,CentWaveParam-method} +\alias{findChromPeaks,MChromatograms,CentWaveParam-method} +\alias{findChromPeaks-Chromatogram-CentWaveParam} +\alias{findChromPeaks,MChromatograms,MatchedFilterParam-method} \title{centWave-based peak detection in purely chromatographic data} \usage{ \S4method{findChromPeaks}{Chromatogram,CentWaveParam}(object, param, ...) + +\S4method{findChromPeaks}{MChromatograms,CentWaveParam}(object, param, BPPARAM = bpparam(), ...) + +\S4method{findChromPeaks}{MChromatograms,MatchedFilterParam}(object, param, BPPARAM = BPPARAM, ...) } \arguments{ -\item{object}{a \link{Chromatogram} or \link{Chromatograms} object.} +\item{object}{a \link{Chromatogram} or \link{MChromatograms} object.} \item{param}{a \link{CentWaveParam} object specifying the settings for the peak detection. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for the description of arguments used for peak detection.} \item{...}{currently ignored.} + +\item{BPPARAM}{a parameter class specifying if and how parallel processing +should be performed (only for \code{XChromatograms} objects). It defaults to +\code{bpparam()}. See \code{\link[=bpparam]{bpparam()}} for more information.} } \value{ -If called on a \code{Chromatogram} object, the method returns a \code{matrix} with -the identified peaks. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for details on the matrix -content. +If called on a \code{Chromatogram} object, the method returns an \link{XChromatogram} +object with the identified peaks. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for details on +the peak matrix content. } \description{ -\code{findChromPeaks} on a \link{Chromatogram} or \link{Chromatograms} object with a +\code{findChromPeaks} on a \link{Chromatogram} or \link{MChromatograms} object with a \link{CentWaveParam} parameter object performs centWave-based peak detection on purely chromatographic data. See \link{centWave} for details on the method and \link{CentWaveParam} for details on the parameter class. Note that not all settings from the \code{CentWaveParam} will be used. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for the arguments used for peak detection on purely chromatographic data. + +After chromatographic peak detection, identified peaks can also be \emph{refined} +with the \code{\link[=refineChromPeaks]{refineChromPeaks()}} method, which can help to reduce peak +detection artifacts. } \examples{ -od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - mode = "onDisk") +## Loading a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) + +## +od <- as(filterFile(faahko_sub, 1L), "OnDiskMSnExp") ## Extract chromatographic data for a small m/z range chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] ## Identify peaks with default settings -pks <- findChromPeaks(chr, CentWaveParam()) -pks - -## Plot the identified peaks -plot(chr, type = "h") -rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#ff000020") - -## Modify the settings -cwp <- CentWaveParam(snthresh = 5, peakwidth = c(10, 60)) -pks <- findChromPeaks(chr, cwp) -pks - -plot(chr, type = "h") -rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#00ff0020") +xchr <- findChromPeaks(chr, CentWaveParam()) +xchr + +## Plot data and identified peaks. +plot(xchr) + +## Perform peak detection on an MChromatograms object +od3 <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), + system.file("cdf/KO/ko16.CDF", package = "faahKO"), + system.file("cdf/KO/ko18.CDF", package = "faahKO")), + mode = "onDisk") + +## Disable parallel processing for this example +register(SerialParam()) + +## Extract chromatograms for a m/z - retention time slice +chrs <- chromatogram(od3, mz = 344, rt = c(2500, 3500)) + +## Perform peak detection using CentWave +xchrs <- findChromPeaks(chrs, param = CentWaveParam()) +xchrs + +## Extract the identified chromatographic peaks +chromPeaks(xchrs) + +## plot the result +plot(xchrs) } \seealso{ \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for the downstream function and \link{centWave} diff --git a/man/findChromPeaks-Chromatogram-MatchedFilter.Rd b/man/findChromPeaks-Chromatogram-MatchedFilter.Rd index a2dbb2464..0b1151856 100644 --- a/man/findChromPeaks-Chromatogram-MatchedFilter.Rd +++ b/man/findChromPeaks-Chromatogram-MatchedFilter.Rd @@ -1,15 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-Chromatogram.R -\docType{methods} \name{findChromPeaks,Chromatogram,MatchedFilterParam-method} \alias{findChromPeaks,Chromatogram,MatchedFilterParam-method} \title{matchedFilter-based peak detection in purely chromatographic data} \usage{ -\S4method{findChromPeaks}{Chromatogram,MatchedFilterParam}(object, param, - ...) +\S4method{findChromPeaks}{Chromatogram,MatchedFilterParam}(object, param, ...) } \arguments{ -\item{object}{a \link{Chromatogram} or \link{Chromatograms} object.} +\item{object}{a \link{Chromatogram} or \link{MChromatograms} object.} \item{param}{a \link{MatchedFilterParam} object specifying the settings for the peak detection. See \code{\link[=peaksWithMatchedFilter]{peaksWithMatchedFilter()}} for the description of @@ -23,7 +21,7 @@ the identified peaks. See \code{\link[=peaksWithMatchedFilter]{peaksWithMatchedF matrix content. } \description{ -\code{findChromPeaks} on a \link{Chromatogram} or \link{Chromatograms} object with a +\code{findChromPeaks} on a \link{Chromatogram} or \link{MChromatograms} object with a \link{MatchedFilterParam} parameter object performs matchedFilter-based peak detection on purely chromatographic data. See \link{matchedFilter} for details on the method and \link{MatchedFilterParam} for details on the parameter class. @@ -33,29 +31,23 @@ on purely chromatographic data. } \examples{ -od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - mode = "onDisk") +## Loading a test data set with identified chromatographic peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) + +## +od <- as(filterFile(faahko_sub, 1L), "OnDiskMSnExp") ## Extract chromatographic data for a small m/z range chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] ## Identify peaks with default settings -pks <- findChromPeaks(chr, MatchedFilterParam()) -pks +xchr <- findChromPeaks(chr, MatchedFilterParam()) ## Plot the identified peaks -plot(chr, type = "h") -rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#ff000020") - -## Modify the settings -mfp <- MatchedFilterParam(fwhm = 60) -pks <- findChromPeaks(chr, mfp) -pks - -plot(chr, type = "h") -rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], col = "#00ff0020") +plot(xchr) } \seealso{ \code{\link[=peaksWithMatchedFilter]{peaksWithMatchedFilter()}} for the downstream function and diff --git a/man/findChromPeaks-centWave.Rd b/man/findChromPeaks-centWave.Rd index e2602ab6c..90f6e3055 100644 --- a/man/findChromPeaks-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -8,7 +8,6 @@ \alias{CentWaveParam-class} \alias{CentWaveParam} \alias{findChromPeaks,OnDiskMSnExp,CentWaveParam-method} -\alias{show,CentWaveParam-method} \alias{ppm,CentWaveParam-method} \alias{ppm} \alias{ppm<-,CentWaveParam-method} @@ -62,16 +61,31 @@ \alias{roiScales<-} \title{Chromatographic peak detection using the centWave method} \usage{ -CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, - prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, - mzdiff = -0.001, fitgauss = FALSE, noise = 0, - verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = numeric()) - -\S4method{findChromPeaks}{OnDiskMSnExp,CentWaveParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) - -\S4method{show}{CentWaveParam}(object) +CentWaveParam( + ppm = 25, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1L, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + roiList = list(), + firstBaselineCheck = TRUE, + roiScales = numeric(), + extendLengthMSW = FALSE +) + +\S4method{findChromPeaks}{OnDiskMSnExp,CentWaveParam}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + ... +) \S4method{ppm}{CentWaveParam}(object) @@ -184,12 +198,21 @@ be represented by a \code{list} of elements or a single row \code{data.frame}.} \item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} +data within regions of interest is checked to be above the first baseline. +In detail, a first rough estimate of the noise is calculated and peak +detection is performed only in regions in which multiple sequential +signals are higher than this first estimated baseline/noise level.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} defining the scale for each region of interest in \code{roiList} that should be used for the centWave-wavelets.} +\item{extendLengthMSW}{Option to force centWave to use all scales when +running centWave rather than truncating with the EIC length. Uses the "open" +method to extend the EIC to a integer base-2 length prior to being passed to +\code{convolve} rather than the default "reflect" method. See +https://github.com/sneumann/xcms/issues/445 for more information.} + \item{object}{For \code{findChromPeaks}: an \code{\link{OnDiskMSnExp}} object containing the MS- and all other experiment-relevant data. @@ -212,6 +235,8 @@ return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \item{msLevel}{\code{integer(1)} defining the MS level on which the peak detection should be performed. Defaults to \code{msLevel = 1}.} +\item{...}{ignored.} + \item{value}{The value for the slot.} \item{f}{For \code{integrate}: a \code{CentWaveParam} object.} @@ -309,8 +334,7 @@ Parallel processing (one process per sample) is supported and can \section{Slots}{ \describe{ -\item{\code{.__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed +\item{\code{ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,extendLengthMSW}}{See corresponding parameter above. Slots values should exclusively be accessed \emph{via} the corresponding getter and setter methods listed above.} }} @@ -318,17 +342,16 @@ the version from the class. Slots values should exclusively be accessed These methods and classes are part of the updated and modernized \code{xcms} user interface which will eventually replace the \code{\link{findPeaks}} methods. It supports peak detection on - \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}} - objects (both defined in the \code{MSnbase} package). All of the settings - to the centWave algorithm can be passed with a \code{CentWaveParam} - object. + \code{\link{OnDiskMSnExp}} objects (defined in the \code{MSnbase} + package). All of the settings to the centWave algorithm can be passed + with a \code{CentWaveParam} object. } \examples{ ## Create a CentWaveParam object. Note that the noise is set to 10000 to ## speed up the execution of the example - in a real use case the default ## value should be used, or it should be set to a reasonable value. -cwp <- CentWaveParam(ppm = 20, noise = 10000) +cwp <- CentWaveParam(ppm = 20, noise = 10000, prefilter = c(3, 10000)) ## Change snthresh parameter snthresh(cwp) <- 25 cwp @@ -340,7 +363,7 @@ library(faahKO) library(xcms) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) -raw_data <- readMSData(fls[1:2], mode = "onDisk") +raw_data <- readMSData(fls[1], mode = "onDisk") ## Perform the peak detection using the settings defined above. res <- findChromPeaks(raw_data, param = cwp) @@ -361,11 +384,12 @@ detection in purely chromatographic data. \code{\link{XCMSnExp}} for the object containing the results of the peak detection. -Other peak detection methods: \code{\link{chromatographic-peak-detection}}, - \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, - \code{\link{findChromPeaks-massifquant}}, - \code{\link{findChromPeaks-matchedFilter}}, - \code{\link{findPeaks-MSW}} +Other peak detection methods: +\code{\link{chromatographic-peak-detection}}, +\code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, +\code{\link{findChromPeaks-massifquant}}, +\code{\link{findChromPeaks-matchedFilter}}, +\code{\link{findPeaks-MSW}} } \author{ Ralf Tautenhahn, Johannes Rainer diff --git a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd index abb1560ce..fd2382dbf 100644 --- a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd +++ b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd @@ -8,7 +8,6 @@ \alias{CentWavePredIsoParam-class} \alias{CentWavePredIsoParam} \alias{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam-method} -\alias{show,CentWavePredIsoParam-method} \alias{snthreshIsoROIs,CentWavePredIsoParam-method} \alias{snthreshIsoROIs} \alias{snthreshIsoROIs<-,CentWavePredIsoParam-method} @@ -30,18 +29,35 @@ \alias{polarity<-} \title{Two-step centWave peak detection considering also isotopes} \usage{ -CentWavePredIsoParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, - prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, - mzdiff = -0.001, fitgauss = FALSE, noise = 0, - verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = numeric(), - snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, - mzIntervalExtension = TRUE, polarity = "unknown") - -\S4method{findChromPeaks}{OnDiskMSnExp,CentWavePredIsoParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) - -\S4method{show}{CentWavePredIsoParam}(object) +CentWavePredIsoParam( + ppm = 25, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1L, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + roiList = list(), + firstBaselineCheck = TRUE, + roiScales = numeric(), + snthreshIsoROIs = 6.25, + maxCharge = 3, + maxIso = 5, + mzIntervalExtension = TRUE, + polarity = "unknown" +) + +\S4method{findChromPeaks}{OnDiskMSnExp,CentWavePredIsoParam}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + ... +) \S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) @@ -122,7 +138,10 @@ be represented by a \code{list} of elements or a single row \code{data.frame}.} \item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} +data within regions of interest is checked to be above the first baseline. +In detail, a first rough estimate of the noise is calculated and peak +detection is performed only in regions in which multiple sequential +signals are higher than this first estimated baseline/noise level.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} defining the scale for each region of interest in \code{roiList} that @@ -168,6 +187,8 @@ return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \item{msLevel}{\code{integer(1)} defining the MS level on which the peak detection should be performed. Defaults to \code{msLevel = 1}.} +\item{...}{ignored.} + \item{value}{The value for the slot.} } \value{ @@ -234,17 +255,15 @@ Parallel processing (one process per sample) is supported and can \section{Slots}{ \describe{ -\item{\code{.__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity}}{See corresponding parameter above.} }} \note{ These methods and classes are part of the updated and modernized \code{xcms} user interface which will eventually replace the \code{\link{findPeaks}} methods. It supports chromatographic peak - detection on \code{\link{MSnExp}} and - \code{\link{OnDiskMSnExp}} objects (both defined in the + detection on + \code{\link{OnDiskMSnExp}} objects (defined in the \code{MSnbase} package). All of the settings to the algorithm can be passed with a \code{CentWavePredIsoParam} object. } @@ -266,11 +285,12 @@ The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core \code{\link{XCMSnExp}} for the object containing the results of the peak detection. -Other peak detection methods: \code{\link{chromatographic-peak-detection}}, - \code{\link{findChromPeaks-centWave}}, - \code{\link{findChromPeaks-massifquant}}, - \code{\link{findChromPeaks-matchedFilter}}, - \code{\link{findPeaks-MSW}} +Other peak detection methods: +\code{\link{chromatographic-peak-detection}}, +\code{\link{findChromPeaks-centWave}}, +\code{\link{findChromPeaks-massifquant}}, +\code{\link{findChromPeaks-matchedFilter}}, +\code{\link{findPeaks-MSW}} } \author{ Hendrik Treutler, Johannes Rainer diff --git a/man/findChromPeaks-massifquant.Rd b/man/findChromPeaks-massifquant.Rd index 70dd3bd09..08e635d07 100644 --- a/man/findChromPeaks-massifquant.Rd +++ b/man/findChromPeaks-massifquant.Rd @@ -8,7 +8,6 @@ \alias{MassifquantParam-class} \alias{MassifquantParam} \alias{findChromPeaks,OnDiskMSnExp,MassifquantParam-method} -\alias{show,MassifquantParam-method} \alias{ppm,MassifquantParam-method} \alias{ppm<-,MassifquantParam-method} \alias{peakwidth,MassifquantParam-method} @@ -51,17 +50,32 @@ \alias{withWave<-} \title{Chromatographic peak detection using the massifquant method} \usage{ -MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, - prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, - mzdiff = -0.001, fitgauss = FALSE, noise = 0, - verboseColumns = FALSE, criticalValue = 1.125, - consecMissedLimit = 2, unions = 1, checkBack = 0, - withWave = FALSE) - -\S4method{findChromPeaks}{OnDiskMSnExp,MassifquantParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) - -\S4method{show}{MassifquantParam}(object) +MassifquantParam( + ppm = 25, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + mzCenterFun = "wMean", + integrate = 1L, + mzdiff = -0.001, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + criticalValue = 1.125, + consecMissedLimit = 2, + unions = 1, + checkBack = 0, + withWave = FALSE +) + +\S4method{findChromPeaks}{OnDiskMSnExp,MassifquantParam}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + ... +) \S4method{ppm}{MassifquantParam}(object) @@ -237,6 +251,8 @@ return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \item{msLevel}{\code{integer(1)} defining the MS level on which the peak detection should be performed. Defaults to \code{msLevel = 1}.} +\item{...}{ignored.} + \item{value}{The value for the slot.} \item{f}{For \code{integrate}: a \code{MassifquantParam} object.} @@ -347,17 +363,17 @@ Parallel processing (one process per sample) is supported and can \section{Slots}{ \describe{ -\item{\code{.__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave}}{See corresponding parameter above. Slots values should +exclusively be accessed \emph{via} the corresponding getter and setter +methods listed above.} }} \note{ These methods and classes are part of the updated and modernized \code{xcms} user interface which will eventually replace the \code{\link{findPeaks}} methods. It supports chromatographic peak - detection on \code{\link{MSnExp}} and - \code{\link{OnDiskMSnExp}} objects (both defined in the + detection on + \code{\link{OnDiskMSnExp}} objects (defined in the \code{MSnbase} package). All of the settings to the massifquant and centWave algorithm can be passed with a \code{MassifquantParam} object. } @@ -365,8 +381,9 @@ These methods and classes are part of the updated and modernized ## Create a MassifquantParam object. mqp <- MassifquantParam() -## Change snthresh parameter +## Change snthresh prefilter parameters snthresh(mqp) <- 30 +prefilter(mqp) <- c(6, 10000) mqp ## Perform the peak detection using massifquant on the files from the @@ -376,7 +393,7 @@ library(faahKO) library(MSnbase) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) -raw_data <- readMSData(fls[1:2], mode = "onDisk") +raw_data <- readMSData(fls[1], mode = "onDisk") ## Perform the peak detection using the settings defined above. res <- findChromPeaks(raw_data, param = mqp) head(chromPeaks(res)) @@ -393,11 +410,12 @@ The \code{\link{do_findChromPeaks_massifquant}} core API function \code{\link{XCMSnExp}} for the object containing the results of the peak detection. -Other peak detection methods: \code{\link{chromatographic-peak-detection}}, - \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, - \code{\link{findChromPeaks-centWave}}, - \code{\link{findChromPeaks-matchedFilter}}, - \code{\link{findPeaks-MSW}} +Other peak detection methods: +\code{\link{chromatographic-peak-detection}}, +\code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, +\code{\link{findChromPeaks-centWave}}, +\code{\link{findChromPeaks-matchedFilter}}, +\code{\link{findPeaks-MSW}} } \author{ Christopher Conley, Johannes Rainer diff --git a/man/findChromPeaks-matchedFilter.Rd b/man/findChromPeaks-matchedFilter.Rd index d0a76846e..be85e19c3 100644 --- a/man/findChromPeaks-matchedFilter.Rd +++ b/man/findChromPeaks-matchedFilter.Rd @@ -8,7 +8,6 @@ \alias{MatchedFilterParam-class} \alias{MatchedFilterParam} \alias{findChromPeaks,OnDiskMSnExp,MatchedFilterParam-method} -\alias{show,MatchedFilterParam-method} \alias{binSize,MatchedFilterParam-method} \alias{binSize} \alias{binSize<-,MatchedFilterParam-method} @@ -49,15 +48,28 @@ \alias{index<-} \title{Peak detection in the chromatographic time domain} \usage{ -MatchedFilterParam(binSize = 0.1, impute = "none", - baseValue = numeric(), distance = numeric(), fwhm = 30, - sigma = fwhm/2.3548, max = 5, snthresh = 10, steps = 2, - mzdiff = 0.8 - binSize * steps, index = FALSE) - -\S4method{findChromPeaks}{OnDiskMSnExp,MatchedFilterParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) - -\S4method{show}{MatchedFilterParam}(object) +MatchedFilterParam( + binSize = 0.1, + impute = "none", + baseValue = numeric(), + distance = numeric(), + fwhm = 30, + sigma = fwhm/2.3548, + max = 5, + snthresh = 10, + steps = 2, + mzdiff = 0.8 - binSize * steps, + index = FALSE +) + +\S4method{findChromPeaks}{OnDiskMSnExp,MatchedFilterParam}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + ... +) \S4method{binSize}{MatchedFilterParam}(object) @@ -167,6 +179,8 @@ return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \item{msLevel}{\code{integer(1)} defining the MS level on which the peak detection should be performed. Defaults to \code{msLevel = 1}.} +\item{...}{ignored.} + \item{value}{The value for the slot.} \item{x}{For \code{max}: a \code{MatchedFilterParam} object.} @@ -261,17 +275,17 @@ Parallel processing (one process per sample) is supported and can \section{Slots}{ \describe{ -\item{\code{.__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index}}{See corresponding parameter above. Slots values should exclusively +be accessed \emph{via} the corresponding getter and setter methods listed +above.} }} \note{ These methods and classes are part of the updated and modernized \code{xcms} user interface which will eventually replace the \code{\link{findPeaks}} methods. It supports chromatographic peak - detection on \code{\link{MSnExp}} and - \code{\link{OnDiskMSnExp}} objects (both defined in the + detection on + \code{\link{OnDiskMSnExp}} objects (defined in the \code{MSnbase} package). All of the settings to the matchedFilter algorithm can be passed with a \code{MatchedFilterParam} object. } @@ -291,11 +305,10 @@ library(faahKO) library(MSnbase) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) -raw_data <- readMSData(fls[1:2], mode = "onDisk") +raw_data <- readMSData(fls[1], mode = "onDisk") ## Perform the chromatographic peak detection using the settings defined ## above. Note that we are also disabling parallel processing in this ## example by registering a "SerialParam" -register(SerialParam()) res <- findChromPeaks(raw_data, param = mfp) head(chromPeaks(res)) } @@ -315,11 +328,12 @@ peak detection in purely chromatographic data. \code{\link{XCMSnExp}} for the object containing the results of the chromatographic peak detection. -Other peak detection methods: \code{\link{chromatographic-peak-detection}}, - \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, - \code{\link{findChromPeaks-centWave}}, - \code{\link{findChromPeaks-massifquant}}, - \code{\link{findPeaks-MSW}} +Other peak detection methods: +\code{\link{chromatographic-peak-detection}}, +\code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, +\code{\link{findChromPeaks-centWave}}, +\code{\link{findChromPeaks-massifquant}}, +\code{\link{findPeaks-MSW}} } \author{ Colin A Smith, Johannes Rainer diff --git a/man/findChromPeaksIsolationWindow.Rd b/man/findChromPeaksIsolationWindow.Rd new file mode 100644 index 000000000..9158783a3 --- /dev/null +++ b/man/findChromPeaksIsolationWindow.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{findChromPeaksIsolationWindow} +\alias{findChromPeaksIsolationWindow} +\title{Data independent acquisition (DIA): peak detection in isolation windows} +\usage{ +findChromPeaksIsolationWindow( + object, + param, + msLevel = 2L, + isolationWindow = isolationWindowTargetMz(object), + ... +) +} +\arguments{ +\item{object}{\code{OnDiskMSnExp} or \code{XCMSnExp} object with the DIA data.} + +\item{param}{Peak detection parameter object, such as a +\linkS4class{CentWaveParam} object defining and configuring the chromographic +peak detection algorithm. +See also \code{\link[=findChromPeaks]{findChromPeaks()}} for more details.} + +\item{msLevel}{\code{integer(1)} specifying the MS level in which the peak +detection should be performed. By default \code{msLevel = 2L}.} + +\item{isolationWindow}{\code{factor} or similar defining the isolation windows in +which the peak detection should be performed with length equal to the +number of spectra in \code{object}.} + +\item{...}{currently not used.} +} +\value{ +An \code{XCMSnExp} object with the chromatographic peaks identified in spectra of +each isolation window from each file added to the \code{chromPeaks} matrix. +Isolation window definition for each identified peak are stored as additional +columns in \code{\link[=chromPeakData]{chromPeakData()}}. +} +\description{ +The \code{findChromPeaksIsolationWindow} function allows to perform a +chromatographic peak detection in MS level > 1 spectra of certain isolation +windows (e.g. SWATH pockets). The function performs a peak detection, +separately for all spectra belonging to the same isolation window and adds +them to the \code{\link[=chromPeaks]{chromPeaks()}} matrix of the result object, information about +the isolation window they were detected in is added to \code{\link[=chromPeakData]{chromPeakData()}}. +Note that peak detection with this method does not remove previously +identified chromatographic peaks (e.g. on MS1 level using the +\code{\link[=findChromPeaks]{findChromPeaks()}} function but adds newly identified peaks to the existing +\code{\link[=chromPeaks]{chromPeaks()}} matrix. + +Isolation windows can be defined with the \code{isolationWindow} parameter, that +by default uses the definition of \code{\link[=isolationWindowTargetMz]{isolationWindowTargetMz()}}, i.e. +chromatographic peak detection is performed for all spectra with the same +isolation window target m/z (seprarately for each file). The parameter +\code{param} allows to define and configure the peak detection algorithm (see +\code{\link[=findChromPeaks]{findChromPeaks()}} for more information). +} +\seealso{ +\code{\link[=reconstructChromPeakSpectra]{reconstructChromPeakSpectra()}} for the function to reconstruct +MS2 spectra for each MS1 chromatographic peak. +} +\author{ +Johannes Rainer, Michael Witting +} diff --git a/man/findMZ.Rd b/man/findMZ.Rd index feb325485..9c4182f9a 100644 --- a/man/findMZ.Rd +++ b/man/findMZ.Rd @@ -39,15 +39,16 @@ findMZ(object, find, ppmE=25, print=TRUE) \code{\link{findneutral}}, } \examples{ - \dontrun{ - library(msdata) - mzdatapath <- system.file("iontrap", package = "msdata") - mzdatafiles<-list.files(mzdatapath, pattern = "extracted.mzData", recursive = TRUE, full.names = TRUE) - xs <- xcmsSet(mzdatafiles, method = "MS1") - ##takes only one file from the file set - xfrag <- xcmsFragments(xs) - found<-findMZ(xfrag, 657.3433, 50) - } +\dontrun{ + library(msdata) + mzMLpath <- system.file("iontrap", package = "msdata") + mzMLfiles<-list.files(mzMLpath, pattern = "extracted.mzML", + recursive = TRUE, full.names = TRUE) + xs <- xcmsSet(mzMLfiles, method = "MS1") + ##takes only one file from the file set + xfrag <- xcmsFragments(xs) + found<-findMZ(xfrag, 657.3433, 50) +} } \author{H. Paul Benton, \email{hpaul.beonton08@imperial.ac.uk}} diff --git a/man/findPeaks-MSW.Rd b/man/findPeaks-MSW.Rd index 964ec70b7..a928c0bfd 100644 --- a/man/findPeaks-MSW.Rd +++ b/man/findPeaks-MSW.Rd @@ -8,7 +8,6 @@ \alias{MSWParam-class} \alias{MSWParam} \alias{findChromPeaks,OnDiskMSnExp,MSWParam-method} -\alias{show,MSWParam-method} \alias{snthresh,MSWParam-method} \alias{snthresh<-,MSWParam-method} \alias{verboseColumns,MSWParam-method} @@ -51,15 +50,28 @@ \alias{addParams<-} \title{Single-spectrum non-chromatography MS data peak detection} \usage{ -MSWParam(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, - 30, 2), seq(32, 64, 4)), nearbyPeak = TRUE, peakScaleRange = 5, - ampTh = 0.01, minNoiseLevel = ampTh/snthresh, ridgeLength = 24, - peakThr = NULL, tuneIn = FALSE, ...) - -\S4method{findChromPeaks}{OnDiskMSnExp,MSWParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) - -\S4method{show}{MSWParam}(object) +MSWParam( + snthresh = 3, + verboseColumns = FALSE, + scales = c(1, seq(2, 30, 2), seq(32, 64, 4)), + nearbyPeak = TRUE, + peakScaleRange = 5, + ampTh = 0.01, + minNoiseLevel = ampTh/snthresh, + ridgeLength = 24, + peakThr = NULL, + tuneIn = FALSE, + ... +) + +\S4method{findChromPeaks}{OnDiskMSnExp,MSWParam}( + object, + param, + BPPARAM = bpparam(), + return.type = "XCMSnExp", + msLevel = 1L, + ... +) \S4method{snthresh}{MSWParam}(object) @@ -131,15 +143,15 @@ of the peak in 2-D CWT coefficient matrix.} \item{peakThr}{numeric(1) with the minimum absolute intensity (above baseline) of peaks to be picked. If provided, the smoothing -function \code{\link{sav.gol}} function (in the \code{MassSpecWavelet}) -package is called to estimate the local intensity.} +Savitzky-Golay filter is used (in the \code{MassSpecWavelet}) +package to estimate the local intensity.} \item{tuneIn}{logical(1) whther to tune in the parameter estimation of the detected peaks.} \item{...}{Additional parameters to be passed to the -\code{\link{identifyMajorPeaks}} and -\code{\link{sav.gol}} functions from the +\code{\link{peakDetectionCWT}} and +\code{\link{identifyMajorPeaks}} functions from the \code{MassSpecWavelet} package.} \item{object}{For \code{findChromPeaks}: an @@ -229,7 +241,7 @@ The \code{findChromPeaks,OnDiskMSnExp,MSWParam} \code{addParams} slot of the object. This slot stores optional additional parameters to be passed to the \code{\link{identifyMajorPeaks}} and - \code{\link{sav.gol}} functions from the + \code{\link{peakDetectionCWT}} functions from the \code{MassSpecWavelet} package. } \details{ @@ -248,17 +260,15 @@ Parallel processing (one process per sample) is supported and can \section{Slots}{ \describe{ -\item{\code{.__classVersion__,snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams}}{See corresponding parameter above. \code{.__classVersion__} stores the version from the class. Slots values -should exclusively be accessed \emph{via} the corresponding getter and -setter methods listed above.} +\item{\code{snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams}}{See corresponding parameter above.} }} \note{ These methods and classes are part of the updated and modernized \code{xcms} user interface which will eventually replace the \code{\link{findPeaks}} methods. It supports peak detection on - \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}} - objects (both defined in the \code{MSnbase} package). All of the settings + \code{\link{OnDiskMSnExp}} + objects (defined in the \code{MSnbase} package). All of the settings to the algorithm can be passed with a \code{MSWParam} object. } \examples{ @@ -271,9 +281,9 @@ mp ## Loading a small subset of direct injection, single spectrum files library(msdata) -fticrf <- list.files(system.file("fticr", package = "msdata"), +fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), recursive = TRUE, full.names = TRUE) -fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") +fticr <- readMSData(fticrf[1], msLevel. = 1, mode = "onDisk") ## Perform the MSW peak detection on these: p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, @@ -289,11 +299,12 @@ The \code{\link{do_findPeaks_MSW}} core API function \code{\link{XCMSnExp}} for the object containing the results of the peak detection. -Other peak detection methods: \code{\link{chromatographic-peak-detection}}, - \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, - \code{\link{findChromPeaks-centWave}}, - \code{\link{findChromPeaks-massifquant}}, - \code{\link{findChromPeaks-matchedFilter}} +Other peak detection methods: +\code{\link{chromatographic-peak-detection}}, +\code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, +\code{\link{findChromPeaks-centWave}}, +\code{\link{findChromPeaks-massifquant}}, +\code{\link{findChromPeaks-matchedFilter}} } \author{ Joachim Kutzera, Steffen Neumann, Johannes Rainer diff --git a/man/findPeaks.MSW-xcmsRaw-method.Rd b/man/findPeaks.MSW-xcmsRaw-method.Rd index b074b3b11..bc1dae55a 100644 --- a/man/findPeaks.MSW-xcmsRaw-method.Rd +++ b/man/findPeaks.MSW-xcmsRaw-method.Rd @@ -1,13 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsRaw.R -\docType{methods} \name{findPeaks.MSW,xcmsRaw-method} \alias{findPeaks.MSW,xcmsRaw-method} \alias{findPeaks.MSW} \title{Peak detection for single-spectrum non-chromatography MS data} \usage{ -\S4method{findPeaks.MSW}{xcmsRaw}(object, snthresh = 3, - verbose.columns = FALSE, ...) +\S4method{findPeaks.MSW}{xcmsRaw}(object, snthresh = 3, verbose.columns = FALSE, ...) } \arguments{ \item{object}{The \code{\linkS4class{xcmsRaw}} object on which peak @@ -19,8 +17,8 @@ detection should be performed.} should be returned.} \item{...}{Additional parameters to be passed to the -\code{\link{identifyMajorPeaks}} and -\code{\link{sav.gol}} functions from the +\code{\link{peakDetectionCWT}} and +\code{\link{identifyMajorPeaks}} functions from the \code{MassSpecWavelet} package.} } \value{ diff --git a/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd b/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd index 1038580d3..d3febf1f8 100644 --- a/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd +++ b/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd @@ -10,7 +10,7 @@ \describe{ \item{object = "xcmsRaw"}{ \code{ - findPeaks.centWave(object, ppm=25, peakwidth=c(20,50), + findPeaks.centWave(object, ppm=25, peakwidth=c(20,50), prefilter=c(3,100), mzCenterFun="wMean", integrate=1, mzdiff=-0.001, fitgauss=FALSE, scanrange= numeric(), noise=0, sleep=0, verbose.columns=FALSE, xcmsPeaks, snthresh=6.25, maxcharge=3, maxiso=5, mzIntervalExtension=TRUE) } @@ -92,9 +92,10 @@ \code{sd} the standard deviation of local chromatographic noise. } \item{egauss}{RMSE of Gaussian fit} - \item{}{ - if \code{verbose.columns} is \code{TRUE} additionally : - } + + + if \code{verbose.columns} is \code{TRUE} additionally : + \item{mu}{Gaussian parameter mu} \item{sigma}{Gaussian parameter sigma} \item{h}{Gaussian parameter h} @@ -111,7 +112,7 @@ Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" BMC Bioinformatics 2008, 9:504\\ - Hendrik Treutler and Steffen Neumann. + Hendrik Treutler and Steffen Neumann. "Prediction, detection, and validation of isotope clusters in mass spectrometry data" Submitted to Metabolites 2016, Special Issue "Bioinformatics and Data Analysis" } diff --git a/man/findPeaks.centWave-methods.Rd b/man/findPeaks.centWave-methods.Rd index 95148d274..a0c5bd2cd 100644 --- a/man/findPeaks.centWave-methods.Rd +++ b/man/findPeaks.centWave-methods.Rd @@ -104,9 +104,10 @@ \code{sd} the standard deviation of local chromatographic noise. } \item{egauss}{RMSE of Gaussian fit} - \item{}{ - if \code{verbose.columns} is \code{TRUE} additionally : - } + + + if \code{verbose.columns} is \code{TRUE} additionally : + \item{mu}{Gaussian parameter mu} \item{sigma}{Gaussian parameter sigma} \item{h}{Gaussian parameter h} diff --git a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd index 9c921205d..ed8cd0cf0 100644 --- a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd +++ b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd @@ -109,9 +109,9 @@ \code{sd} the standard deviation of local chromatographic noise. } \item{egauss}{RMSE of Gaussian fit} - \item{}{ - if \code{verbose.columns} is \code{TRUE} additionally : - } + + if \code{verbose.columns} is \code{TRUE} additionally : + \item{mu}{Gaussian parameter mu} \item{sigma}{Gaussian parameter sigma} \item{h}{Gaussian parameter h} diff --git a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd index 4d5ce0e3a..cccda048b 100644 --- a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd +++ b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd @@ -1,15 +1,23 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsRaw.R -\docType{methods} \name{findPeaks.matchedFilter,xcmsRaw-method} \alias{findPeaks.matchedFilter,xcmsRaw-method} \alias{findPeaks.matchedFilter} \title{Peak detection in the chromatographic time domain} \usage{ -\S4method{findPeaks.matchedFilter}{xcmsRaw}(object, fwhm = 30, - sigma = fwhm/2.3548, max = 5, snthresh = 10, step = 0.1, - steps = 2, mzdiff = 0.8 - step * steps, index = FALSE, sleep = 0, - scanrange = numeric()) +\S4method{findPeaks.matchedFilter}{xcmsRaw}( + object, + fwhm = 30, + sigma = fwhm/2.3548, + max = 5, + snthresh = 10, + step = 0.1, + steps = 2, + mzdiff = 0.8 - step * steps, + index = FALSE, + sleep = 0, + scanrange = numeric() +) } \arguments{ \item{object}{The \code{\linkS4class{xcmsRaw}} object on which peak detection diff --git a/man/findneutral.Rd b/man/findneutral.Rd index 1e1b76e7d..74f5bbb6f 100644 --- a/man/findneutral.Rd +++ b/man/findneutral.Rd @@ -39,15 +39,16 @@ findneutral(object, find, ppmE=25, print=TRUE) \code{\link{findMZ}}, } \examples{ - \dontrun{ - library(msdata) - mzdatapath <- system.file("iontrap", package = "msdata") - mzdatafiles<-list.files(mzdatapath, pattern = "extracted.mzData", recursive = TRUE, full.names = TRUE) - xs <- xcmsSet(mzdatafiles, method = "MS1") - ##takes only one file from the file set - xfrag <- xcmsFragments(xs) - found<-findneutral(xfrag, 58.1455, 50) - } +\dontrun{ + library(msdata) + mzMLpath <- system.file("iontrap", package = "msdata") + mzMLfiles<-list.files(mzMLpath, pattern = "extracted.mzML", + recursive = TRUE, full.names = TRUE) + xs <- xcmsSet(mzMLfiles, method = "MS1") + ##takes only one file from the file set + xfrag <- xcmsFragments(xs) + found<-findneutral(xfrag, 58.1455, 50) +} } \author{H. Paul Benton, \email{hpbenton@scripps.edu}} diff --git a/man/group.mzClust.Rd b/man/group.mzClust.Rd index 94e1c1014..07d771491 100644 --- a/man/group.mzClust.Rd +++ b/man/group.mzClust.Rd @@ -33,11 +33,12 @@ \examples{ \dontrun{ library(msdata) -mzdatapath <- system.file("fticr", package = "msdata") -mzdatafiles <- list.files(mzdatapath, recursive = TRUE, full.names = TRUE) +mzMLpath <- system.file("fticr-mzML", package = "msdata") +mzMLfiles <- list.files(mzMLpath, recursive = TRUE, full.names = TRUE) -xs <- xcmsSet(method="MSW", files=mzdatafiles, scales=c(1,7), SNR.method='data.mean' , winSize.noise=500, - peakThr=80000, amp.Th=0.005) +xs <- xcmsSet(method="MSW", files=mzMLfiles, scales=c(1,7), + SNR.method='data.mean' , winSize.noise=500, + peakThr=80000, amp.Th=0.005) xsg <- group(xs, method="mzClust") } diff --git a/man/group.nearest.Rd b/man/group.nearest.Rd index f40d87c50..b08675f45 100644 --- a/man/group.nearest.Rd +++ b/man/group.nearest.Rd @@ -41,40 +41,44 @@ } \examples{ - \dontrun{library(xcms) - library(faahKO) ## These files do not have this problem to correct for but just for an example - cdfpath <- system.file("cdf", package = "faahKO") - cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) +\dontrun{library(xcms) + library(faahKO) + ## These files do not have this problem to correct for + ## but just for an example + cdfpath <- system.file("cdf", package = "faahKO") + cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - xset<-xcmsSet(cdffiles) + xset<-xcmsSet(cdffiles) - gxset<-group(xset, method="nearest") - ## this is the same as - # gxset<-group.nearest(xset) - nrow(gxset@groups) == 1096 ## the number of features before minFrac + gxset<-group(xset, method="nearest") + nrow(gxset@groups) == 1096 ## the number of features before minFrac - post.minFrac<-function(object, minFrac=0.5){ - ix.minFrac<-sapply(1:length(unique(sampclass(object))), function(x, object, mf){ - meta<-groups(object) - minFrac.idx<-numeric(length=nrow(meta)) - idx<-which(meta[,levels(sampclass(object))[x]] >= mf*length(which(levels(sampclass(object))[x] == sampclass(object)) )) - minFrac.idx[idx]<-1 - return(minFrac.idx) - }, object, minFrac) - ix.minFrac<-as.logical(apply(ix.minFrac, 1, sum)) - ix<-which(ix.minFrac == TRUE) - return(ix) - } + post.minFrac<-function(object, minFrac=0.5){ + ix.minFrac<-sapply(1:length(unique(sampclass(object))), + function(x, object, mf){ + meta<-groups(object) + minFrac.idx<-numeric(length=nrow(meta)) + idx<-which( + meta[,levels(sampclass(object))[x]] >= + mf*length(which(levels(sampclass(object))[x] + == sampclass(object)) )) + minFrac.idx[idx]<-1 + return(minFrac.idx) + }, object, minFrac) + ix.minFrac<-as.logical(apply(ix.minFrac, 1, sum)) + ix<-which(ix.minFrac == TRUE) + return(ix) + } - ## using the above function we can get a post processing minFrac - idx<-post.minFrac(gxset) + ## using the above function we can get a post processing minFrac + idx<-post.minFrac(gxset) - gxset.post<-gxset ## copy the xcmsSet object - gxset.post@groupidx<-gxset@groupidx[idx] - gxset.post@groups<-gxset@groups[idx,] + gxset.post<-gxset ## copy the xcmsSet object + gxset.post@groupidx<-gxset@groupidx[idx] + gxset.post@groups<-gxset@groups[idx,] - nrow(gxset.post@groups) == 465 ## this is the number of features after minFrac - } + nrow(gxset.post@groups) == 465 ## number of features after minFrac +} } diff --git a/man/groupChromPeaks-density.Rd b/man/groupChromPeaks-density.Rd index cdc2c58e6..fd16eb8a4 100644 --- a/man/groupChromPeaks-density.Rd +++ b/man/groupChromPeaks-density.Rd @@ -6,7 +6,6 @@ \alias{groupChromPeaks-density} \alias{PeakDensityParam-class} \alias{PeakDensityParam} -\alias{show,PeakDensityParam-method} \alias{sampleGroups,PeakDensityParam-method} \alias{sampleGroups} \alias{sampleGroups<-,PeakDensityParam-method} @@ -32,11 +31,14 @@ \alias{groupChromPeaks,XCMSnExp,PeakDensityParam-method} \title{Peak grouping based on time dimension peak densities} \usage{ -PeakDensityParam(sampleGroups = numeric(), bw = 30, - minFraction = 0.5, minSamples = 1, binSize = 0.25, - maxFeatures = 50) - -\S4method{show}{PeakDensityParam}(object) +PeakDensityParam( + sampleGroups = numeric(), + bw = 30, + minFraction = 0.5, + minSamples = 1, + binSize = 0.25, + maxFeatures = 50 +) \S4method{sampleGroups}{PeakDensityParam}(object) @@ -62,7 +64,7 @@ PeakDensityParam(sampleGroups = numeric(), bw = 30, \S4method{maxFeatures}{PeakDensityParam}(object) <- value -\S4method{groupChromPeaks}{XCMSnExp,PeakDensityParam}(object, param) +\S4method{groupChromPeaks}{XCMSnExp,PeakDensityParam}(object, param, msLevel = 1L, add = FALSE) } \arguments{ \item{sampleGroups}{A vector of the same length than samples defining the @@ -74,7 +76,7 @@ same group).} \item{bw}{\code{numeric(1)} defining the bandwidth (standard deviation ot the smoothing kernel) to be used. This argument is passed to the -\code{\link{density}} method.} +[density() method.} \item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples in at least one sample group in which the peaks have to be present to be @@ -90,84 +92,94 @@ in mz dimension.} \item{maxFeatures}{\code{numeric(1)} with the maximum number of peak groups to be identified in a single mz slice.} -\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object - containing the results from a previous peak detection analysis (see - \code{\link{findChromPeaks}}). +\item{object}{For \code{groupChromPeaks}: an \link{XCMSnExp} object +containing the results from a previous peak detection analysis (see +\code{\link[=findChromPeaks]{findChromPeaks()}}). - For all other methods: a \code{PeakDensityParam} object.} +\if{html}{\out{
}}\preformatted{For all other methods: a `PeakDensityParam` object. +}\if{html}{\out{
}}} \item{value}{The value for the slot.} \item{param}{A \code{PeakDensityParam} object containing all settings for the peak grouping algorithm.} + +\item{msLevel}{\code{integer(1)} (default \code{msLevel = 1L}) defining the MS level +on which the correspondence should be performed. It is required that +chromatographic peaks of the respective MS level are present.} + +\item{add}{\code{logical(1)} (default \code{add = FALSE}) allowing to perform an +additional round of correspondence (e.g. on a different MS level) and +add features to the already present feature definitions.} } \value{ The \code{PeakDensityParam} function returns a - \code{PeakDensityParam} class instance with all of the settings - specified for chromatographic peak alignment based on peak densities. - Note that argument `sampleGroups` is mandatory and should represent - either the sample grouping in the experiment. It's length has to match - the number of sample in the experiments. +\code{PeakDensityParam} class instance with all of the settings +specified for chromatographic peak alignment based on peak densities. +Note that argument \code{sampleGroups} is mandatory and should represent +either the sample grouping in the experiment. It's length has to match +the number of sample in the experiments. -For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the +For \code{groupChromPeaks}: a \link{XCMSnExp} object with the results of the correspondence analysis. The definition of the resulting -mz-rt features can be accessed with the \code{\link{featureDefinitions}} -method. +mz-rt features can be accessed with the \code{\link[=featureDefinitions]{featureDefinitions()}} method } \description{ This method performs performs correspondence (chromatographic - peak grouping) based on the density (distribution) of identified peaks - along the retention time axis within slices of overlapping mz ranges. - All peaks (from the same or from different samples) being close on the - retention time axis are grouped into a feature (\emph{peak group}). +peak grouping) based on the density (distribution) of identified peaks +along the retention time axis within slices of overlapping mz ranges. +All peaks (from the same or from different samples) being close on the +retention time axis are grouped into a feature (\emph{peak group}). -The \code{PeakDensityParam} class allows to specify all - settings for the peak grouping based on peak densities along the time - dimension. Instances should be created with the \code{PeakDensityParam} - constructor. +The \code{PeakDensityParam} class allows to specify all settings for the peak +grouping based on peak densities along the time dimension. Instances should +be created with the \code{\link[=PeakDensityParam]{PeakDensityParam()}} constructor. -\code{sampleGroups},\code{sampleGroups<-}: getter and setter - for the \code{sampleGroups} slot of the object. Its length should match - the number of samples in the experiment and it should not contain - \code{NA}s. +\code{sampleGroups},\verb{sampleGroups<-}: getter and setter +for the \code{sampleGroups} slot of the object. Its length should match +the number of samples in the experiment and it should not contain +\code{NA}s. -\code{bw},\code{bw<-}: getter and setter for the \code{bw} slot - of the object. +\code{bw},\verb{bw<-}: getter and setter for the \code{bw} slot +of the object. -\code{minFraction},\code{minFraction<-}: getter and setter for - the \code{minFraction} slot of the object. +\code{minFraction},\verb{minFraction<-}: getter and setter for +the \code{minFraction} slot of the object. -\code{minSamples},\code{minSamples<-}: getter and setter for the - \code{minSamples} slot of the object. +\code{minSamples},\verb{minSamples<-}: getter and setter for the +\code{minSamples} slot of the object. -\code{binSize},\code{binSize<-}: getter and setter for the - \code{binSize} slot of the object. +\code{binSize},\verb{binSize<-}: getter and setter for the +\code{binSize} slot of the object. -\code{maxFeatures},\code{maxFeatures<-}: getter and setter for - the \code{maxFeatures} slot of the object. +\code{maxFeatures},\verb{maxFeatures<-}: getter and setter for +the \code{maxFeatures} slot of the object. -\code{groupChromPeaks,XCMSnExp,PeakDensityParam}: +\verb{groupChromPeaks,XCMSnExp,PeakDensityParam}: performs correspondence (peak grouping within and across samples) within in mz dimension overlapping slices of MS data based on the density distribution of the identified chromatographic peaks in the slice along the time axis. + +The correspondence analysis can be performed on chromatographic peaks of +any MS level (if present and if chromatographic peak detection has been +performed for that MS level) defining features combining these peaks. The +MS level can be selected with the parameter \code{msLevel}. By default, calling +\code{groupChromPeaks} will remove any previous correspondence results. This can +be disabled with \code{add = TRUE}, which will add newly defined features to +already present feature definitions. } \section{Slots}{ \describe{ -\item{\code{.__classVersion__,sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures}}{See +corresponding parameter above.} }} \note{ These methods and classes are part of the updated and modernized - \code{xcms} user interface which will eventually replace the - \code{\link{group}} methods. All of the settings to the algorithm - can be passed with a \code{PeakDensityParam} object. - -Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause -all eventually present previous correspondence results to be dropped. +\code{xcms} user interface. All of the settings to the algorithm +can be passed with a \code{PeakDensityParam} object. } \examples{ @@ -180,20 +192,15 @@ p ############################## ## Chromatographic peak detection and grouping. ## -## Below we perform first a peak detection (using the matchedFilter -## method) on some of the test files from the faahKO package followed by -## a peak grouping using the density method. -library(faahKO) -library(MSnbase) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") +## Disable parallel processing for this example +register(SerialParam()) -## Perform the chromatographic peak detection using the matchedFilter method. -mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -res <- findChromPeaks(raw_data, param = mfp) +res <- faahko_sub head(chromPeaks(res)) ## The number of peaks identified per sample: @@ -218,24 +225,27 @@ processHistory(res) Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -\emph{Anal. Chem.} 2006, 78:779-787. +Anal. Chem. 2006, 78:779-787. } \seealso{ -The \code{\link{do_groupChromPeaks_density}} core - API function and \code{\link{group.density}} for the old user interface. +The \code{\link[=do_groupChromPeaks_density]{do_groupChromPeaks_density()}} core API function and \code{\link[=group.density]{group.density()}} +for the old user interface. + +\code{\link[=plotChromPeakDensity]{plotChromPeakDensity()}} to plot peak densities and evaluate different +algorithm settings. + +\code{\link[=featureDefinitions]{featureDefinitions()}} and \code{\link[=featureValues]{featureValues()}} for methods to access the +features (i.e. the peak grouping results). -\code{\link{plotChromPeakDensity}} to plot peak densities and - evaluate different algorithm settings. - \code{\link{featureDefinitions}} and - \code{\link{featureValues,XCMSnExp-method}} for methods to access the - features (i.e. the peak grouping results). +\link{XCMSnExp} for the object containing the results of the correspondence. -\code{\link{XCMSnExp}} for the object containing the results of - the correspondence. +\code{\link[=plotChromPeakDensity]{plotChromPeakDensity()}} for plotting chromatographic peak density with the +possibility to test different parameter settings. -Other peak grouping methods: \code{\link{groupChromPeaks-mzClust}}, - \code{\link{groupChromPeaks-nearest}}, - \code{\link{groupChromPeaks}} +Other peak grouping methods: +\code{\link{groupChromPeaks-mzClust}}, +\code{\link{groupChromPeaks-nearest}}, +\code{\link{groupChromPeaks}()} } \author{ Colin Smith, Johannes Rainer diff --git a/man/groupChromPeaks-mzClust.Rd b/man/groupChromPeaks-mzClust.Rd index 7e5096cdf..68e3d921e 100644 --- a/man/groupChromPeaks-mzClust.Rd +++ b/man/groupChromPeaks-mzClust.Rd @@ -6,7 +6,6 @@ \alias{groupChromPeaks-mzClust} \alias{MzClustParam-class} \alias{MzClustParam} -\alias{show,MzClustParam-method} \alias{sampleGroups,MzClustParam-method} \alias{sampleGroups<-,MzClustParam-method} \alias{ppm,MzClustParam-method} @@ -22,10 +21,13 @@ \alias{groupChromPeaks,XCMSnExp,MzClustParam-method} \title{High resolution peak grouping for single spectra samples} \usage{ -MzClustParam(sampleGroups = numeric(), ppm = 20, absMz = 0, - minFraction = 0.5, minSamples = 1) - -\S4method{show}{MzClustParam}(object) +MzClustParam( + sampleGroups = numeric(), + ppm = 20, + absMz = 0, + minFraction = 0.5, + minSamples = 1 +) \S4method{sampleGroups}{MzClustParam}(object) @@ -47,7 +49,7 @@ MzClustParam(sampleGroups = numeric(), ppm = 20, absMz = 0, \S4method{minSamples}{MzClustParam}(object) <- value -\S4method{groupChromPeaks}{XCMSnExp,MzClustParam}(object, param) +\S4method{groupChromPeaks}{XCMSnExp,MzClustParam}(object, param, msLevel = 1L) } \arguments{ \item{sampleGroups}{A vector of the same length than samples defining the @@ -71,78 +73,83 @@ considered as a peak group (feature).} least one sample group in which the peaks have to be detected to be considered a peak group (feature).} -\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object - containing the results from a previous chromatographic peak detection - analysis (see \code{\link{findChromPeaks}}). +\item{object}{For \code{groupChromPeaks}: an \link{XCMSnExp} object containing the +results from a previous chromatographic peak detection analysis (see +\code{\link[=findChromPeaks]{findChromPeaks()}}). - For all other methods: a \code{MzClustParam} object.} +\if{html}{\out{
}}\preformatted{For all other methods: a `MzClustParam` object. +}\if{html}{\out{
}}} \item{value}{The value for the slot.} \item{param}{A \code{MzClustParam} object containing all settings for the peak grouping algorithm.} + +\item{msLevel}{\code{integer(1)} defining the MS level. Currently only MS level +1 is supported.} } \value{ -The \code{MzClustParam} function returns a - \code{MzClustParam} class instance with all of the settings - specified for high resolution single spectra peak alignment. +The \code{MzClustParam} function returns a \code{MzClustParam} class instance with +all of the settings specified for high resolution single spectra peak +alignment. -For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the -results of the peak grouping step (i.e. the features). These can be -accessed with the \code{\link{featureDefinitions}} method. +For \code{groupChromPeaks}: a \link{XCMSnExp} object with the results of the peak +grouping step (i.e. the features). These can be accessed with the +\code{\link[=featureDefinitions]{featureDefinitions()}} method. } \description{ -This method performs high resolution correspondence for single - spectra samples. +This method performs high resolution correspondence for single spectra +samples. -The \code{MzClustParam} class allows to specify all - settings for the peak grouping based on the \emph{mzClust} algorithm. - Instances should be created with the \code{MzClustParam} constructor. +The \code{MzClustParam} class allows to specify all settings for the peak +grouping based on the \emph{mzClust} algorithm. +Instances should be created with the \code{MzClustParam} constructor. -\code{sampleGroups},\code{sampleGroups<-}: getter and setter - for the \code{sampleGroups} slot of the object. +\code{sampleGroups},\verb{sampleGroups<-}: getter and setter +for the \code{sampleGroups} slot of the object. -\code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} - slot of the object. +\code{ppm},\verb{ppm<-}: getter and setter for the \code{ppm} +slot of the object. -\code{absMz},\code{absMz<-}: getter and setter for the - \code{absMz} slot of the object. +\code{absMz},\verb{absMz<-}: getter and setter for the +\code{absMz} slot of the object. -\code{minFraction},\code{minFraction<-}: getter and setter for - the \code{minFraction} slot of the object. +\code{minFraction},\verb{minFraction<-}: getter and setter for +the \code{minFraction} slot of the object. -\code{minSamples},\code{minSamples<-}: getter and setter for the - \code{minSamples} slot of the object. +\code{minSamples},\verb{minSamples<-}: getter and setter for the +\code{minSamples} slot of the object. -\code{groupChromPeaks,XCMSnExp,MzClustParam}: -performs high resolution peak grouping for single spectrum -metabolomics data. +\verb{groupChromPeaks,XCMSnExp,MzClustParam}: performs high resolution peak +grouping for single spectrum metabolomics data. } \section{Slots}{ \describe{ -\item{\code{.__classVersion__,sampleGroups,ppm,absMz,minFraction,minSamples}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{sampleGroups,ppm,absMz,minFraction,minSamples}}{See corresponding +parameter above.} }} \note{ These methods and classes are part of the updated and modernized - \code{xcms} user interface which will eventually replace the - \code{\link{group}} methods. All of the settings to the algorithm - can be passed with a \code{MzClustParam} object. +\code{xcms} user interface which will eventually replace the +\code{\link[=group]{group()}} methods. All of the settings to the algorithm +can be passed with a \link{MzClustParam} object. Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause - all eventually present previous correspondence results to be dropped. +all eventually present previous correspondence results to be dropped. } \examples{ ## Loading a small subset of direct injection, single spectrum files library(msdata) -fticrf <- list.files(system.file("fticr", package = "msdata"), +fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), recursive = TRUE, full.names = TRUE) fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") +## Disable parallel processing for this example +register(SerialParam()) + ## Perform the MSW peak detection on these: p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, SNR.method = "data.mean", winSize.noise = 500) @@ -161,22 +168,23 @@ featureDefinitions(fticr) } \references{ Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill -and David F. Grant\cr \emph{Alignment of high resolution mass spectra: -development of a heuristic approach for metabolomics}.\cr Metabolomics, +and David F. Grant\cr Alignment of high resolution mass spectra: +development of a heuristic approach for metabolomics.\cr Metabolomics, Vol. 2, No. 2, 75-83 (2006) } \seealso{ -The \code{\link{do_groupPeaks_mzClust}} core API function and - \code{\link{group.mzClust}} for the old user interface. - \code{\link{featureDefinitions}} and - \code{\link{featureValues,XCMSnExp-method}} for methods to access peak - grouping results (i.e. the features). - -\code{\link{XCMSnExp}} for the object containing the results of - the peak grouping. - -Other peak grouping methods: \code{\link{groupChromPeaks-density}}, - \code{\link{groupChromPeaks-nearest}}, - \code{\link{groupChromPeaks}} +The \code{\link[=do_groupPeaks_mzClust]{do_groupPeaks_mzClust()}} core API function and \code{\link[=group.mzClust]{group.mzClust()}} for +the old user interface. + +\code{\link[=featureDefinitions]{featureDefinitions()}} and \code{\link[=featureValues]{featureValues()}} for methods to access peak +grouping results (i.e. the features). + +\link{XCMSnExp} for the object containing the results of +the peak grouping. + +Other peak grouping methods: +\code{\link{groupChromPeaks-density}}, +\code{\link{groupChromPeaks-nearest}}, +\code{\link{groupChromPeaks}()} } \concept{peak grouping methods} diff --git a/man/groupChromPeaks-nearest.Rd b/man/groupChromPeaks-nearest.Rd index 276b0ccdb..18263c1a3 100644 --- a/man/groupChromPeaks-nearest.Rd +++ b/man/groupChromPeaks-nearest.Rd @@ -6,7 +6,6 @@ \alias{groupChromPeaks-nearest} \alias{NearestPeaksParam-class} \alias{NearestPeaksParam} -\alias{show,NearestPeaksParam-method} \alias{sampleGroups,NearestPeaksParam-method} \alias{sampleGroups<-,NearestPeaksParam-method} \alias{mzVsRtBalance,NearestPeaksParam-method} @@ -26,10 +25,13 @@ \alias{groupChromPeaks,XCMSnExp,NearestPeaksParam-method} \title{Peak grouping based on proximity in the mz-rt space} \usage{ -NearestPeaksParam(sampleGroups = numeric(), mzVsRtBalance = 10, - absMz = 0.2, absRt = 15, kNN = 10) - -\S4method{show}{NearestPeaksParam}(object) +NearestPeaksParam( + sampleGroups = numeric(), + mzVsRtBalance = 10, + absMz = 0.2, + absRt = 15, + kNN = 10 +) \S4method{sampleGroups}{NearestPeaksParam}(object) @@ -51,7 +53,7 @@ NearestPeaksParam(sampleGroups = numeric(), mzVsRtBalance = 10, \S4method{kNN}{NearestPeaksParam}(object) <- value -\S4method{groupChromPeaks}{XCMSnExp,NearestPeaksParam}(object, param) +\S4method{groupChromPeaks}{XCMSnExp,NearestPeaksParam}(object, param, msLevel = 1L, add = FALSE) } \arguments{ \item{sampleGroups}{A vector of the same length than samples defining the @@ -72,76 +74,86 @@ two peaks.} \item{kNN}{\code{numeric(1)} representing the number of nearest neighbors to check.} -\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object - containing the results from a previous chromatographic peak detection - analysis (see \code{\link{findChromPeaks}}). +\item{object}{For \code{groupChromPeaks}: an \link{XCMSnExp} object containing the +results from a previous chromatographic peak detection +analysis (see \code{\link[=findChromPeaks]{findChromPeaks()}}). - For all other methods: a \code{NearestPeaksParam} object.} +\if{html}{\out{
}}\preformatted{For all other methods: a `NearestPeaksParam` object. +}\if{html}{\out{
}}} \item{value}{The value for the slot.} -\item{param}{A \code{NearestPeaksParam} object containing all settings for +\item{param}{A \code{PeakDensityParam} object containing all settings for the peak grouping algorithm.} + +\item{msLevel}{\code{integer(1)} defining the MS level. Currently only MS level +1 is supported.} + +\item{add}{\code{logical(1)} (default \code{add = FALSE}) allowing to perform an +additional round of correspondence (e.g. on a different MS level) and +add features to the already present feature definitions.} } \value{ The \code{NearestPeaksParam} function returns a \code{NearestPeaksParam} class instance with all of the settings specified for peak alignment based on peak proximity. -For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the -results of the peak grouping/correspondence step (i.e. the mz-rt -features). These can be accessed with the -\code{\link{featureDefinitions}} method. +For \code{groupChromPeaks}: a \link{XCMSnExp} object with the results of the peak +grouping/correspondence step (i.e. the mz-rt features). These can be +accessed with the \code{\link[=featureDefinitions]{featureDefinitions()}} method. } \description{ This method is inspired by the grouping algorithm of mzMine - [Katajamaa 2006] and performs correspondence based on proximity of peaks - in the space spanned by retention time and mz values. - The method creates first a \emph{master peak list} consisting of all - chromatographic peaks from the sample in which most peaks were - identified, and starting from that, calculates distances to peaks from - the sample with the next most number of peaks. If peaks are closer than - the defined threshold they are grouped together. +(Katajamaa 2006) and performs correspondence based on proximity of peaks +in the space spanned by retention time and mz values. +The method creates first a \emph{master peak list} consisting of all +chromatographic peaks from the sample in which most peaks were +identified, and starting from that, calculates distances to peaks from +the sample with the next most number of peaks. If peaks are closer than +the defined threshold they are grouped together. The \code{NearestPeaksParam} class allows to specify all - settings for the peak grouping based on the \emph{nearest} algorithm. - Instances should be created with the \code{NearestPeaksParam} constructor. +settings for the peak grouping based on the \emph{nearest} algorithm. +Instances should be created with the \code{NearestPeaksParam} constructor. -\code{sampleGroups},\code{sampleGroups<-}: getter and setter - for the \code{sampleGroups} slot of the object. +\code{sampleGroups},\verb{sampleGroups<-}: getter and setter +for the \code{sampleGroups} slot of the object. -\code{mzVsRtBalance},\code{mzVsRtBalance<-}: getter and setter - for the \code{mzVsRtBalance} slot of the object. +\code{mzVsRtBalance},\verb{mzVsRtBalance<-}: getter and setter +for the \code{mzVsRtBalance} slot of the object. -\code{absMz},\code{absMz<-}: getter and setter for the - \code{absMz} slot of the object. +\code{absMz},\verb{absMz<-}: getter and setter for the +\code{absMz} slot of the object. -\code{absRt},\code{absRt<-}: getter and setter for the - \code{absRt} slot of the object. +\code{absRt},\verb{absRt<-}: getter and setter for the +\code{absRt} slot of the object. -\code{kNN},\code{kNN<-}: getter and setter for the - \code{kNN} slot of the object. +\code{kNN},\verb{kNN<-}: getter and setter for the +\code{kNN} slot of the object. -\code{groupChromPeaks,XCMSnExp,NearestPeaksParam}: +\verb{groupChromPeaks,XCMSnExp,NearestPeaksParam}: performs peak grouping based on the proximity between chromatographic peaks from different samples in the mz-rt range. + +The correspondence analysis can be performed on chromatographic peaks of +any MS level (if present and if chromatographic peak detection has been +performed for that MS level) defining features combining these peaks. The +MS level can be selected with the parameter \code{msLevel}. By default, calling +\code{groupChromPeaks} will remove any previous correspondence results. This can +be disabled with \code{add = TRUE}, which will add newly defined features to +already present feature definitions. } \section{Slots}{ \describe{ -\item{\code{.__classVersion__,sampleGroups,mzVsRtBalance,absMz,absRt,kNN}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} +\item{\code{sampleGroups,mzVsRtBalance,absMz,absRt,kNN}}{See corresponding parameter +above.} }} \note{ These methods and classes are part of the updated and modernized - \code{xcms} user interface which will eventually replace the - \code{\link{group}} methods. All of the settings to the algorithm - can be passed with a \code{NearestPeaksParam} object. - -Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause -all eventually present previous alignment results to be dropped. +\code{xcms} user interface. All of the settings to the algorithm +can be passed with a \code{NearestPeaksParam} object. } \examples{ @@ -149,23 +161,14 @@ all eventually present previous alignment results to be dropped. p <- NearestPeaksParam(kNN = 3) p -############################## -## Chromatographic peak detection and grouping. -## -## Below we perform first a chromatographic peak detection (using the -## matchedFilter method) on some of the test files from the faahKO package -## followed by a peaks grouping using the "nearest" method. -library(faahKO) -library(MSnbase) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") +res <- faahko_sub -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") - -## Perform the peak detection using the matchedFilter method. -mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -res <- findChromPeaks(raw_data, param = mfp) +## Disable parallel processing for this example +register(SerialParam()) head(chromPeaks(res)) ## The number of peaks identified per sample: @@ -188,20 +191,20 @@ processHistory(res) \references{ Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for processing and visualization of mass spectrometry based molecular profile -data. \emph{Bioinformatics} 2006, 22:634-636. +data. Bioinformatics 2006, 22:634-636. } \seealso{ -The \code{\link{do_groupChromPeaks_nearest}} core - API function and \code{\link{group.nearest}} for the old user interface. - \code{\link{featureDefinitions}} and - \code{\link{featureValues,XCMSnExp-method}} for methods to access - peak grouping results (i.e. the features). - -\code{\link{XCMSnExp}} for the object containing the results of - the peak grouping. - -Other peak grouping methods: \code{\link{groupChromPeaks-density}}, - \code{\link{groupChromPeaks-mzClust}}, - \code{\link{groupChromPeaks}} +The \code{\link[=do_groupChromPeaks_nearest]{do_groupChromPeaks_nearest()}} core API function. + +\code{\link[=featureDefinitions]{featureDefinitions()}} and \code{\link[=featureValues]{featureValues()}} for methods to access +peak grouping results (i.e. the features). + +\link{XCMSnExp} for the object containing the results of +the peak grouping. + +Other peak grouping methods: +\code{\link{groupChromPeaks-density}}, +\code{\link{groupChromPeaks-mzClust}}, +\code{\link{groupChromPeaks}()} } \concept{peak grouping methods} diff --git a/man/groupChromPeaks.Rd b/man/groupChromPeaks.Rd index 47eb81b29..c90a7c07c 100644 --- a/man/groupChromPeaks.Rd +++ b/man/groupChromPeaks.Rd @@ -28,14 +28,19 @@ The \code{groupChromPeaks} method(s) perform the correspondence, } } \seealso{ +\code{\link{featureDefinitions}} and +\code{\link{featureValues,XCMSnExp-method}} for methods to access peak +grouping results. + +\code{\link{featureChromatograms}} to extract ion chromatograms for each +feature. + \code{\link{group}} for the \emph{old} peak grouping methods. - \code{\link{featureDefinitions}} and - \code{\link{featureValues,XCMSnExp-method}} for methods to access peak - grouping results. -Other peak grouping methods: \code{\link{groupChromPeaks-density}}, - \code{\link{groupChromPeaks-mzClust}}, - \code{\link{groupChromPeaks-nearest}} +Other peak grouping methods: +\code{\link{groupChromPeaks-density}}, +\code{\link{groupChromPeaks-mzClust}}, +\code{\link{groupChromPeaks-nearest}} } \author{ Johannes Rainer diff --git a/man/groupFeatures-abundance-correlation.Rd b/man/groupFeatures-abundance-correlation.Rd new file mode 100644 index 000000000..8c58c76ad --- /dev/null +++ b/man/groupFeatures-abundance-correlation.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-group-features.R +\name{groupFeatures-abundance-correlation} +\alias{groupFeatures-abundance-correlation} +\alias{groupFeatures,XCMSnExp,AbundanceSimilarityParam-method} +\title{Compounding/feature grouping based on similarity of abundances across samples} +\usage{ +\S4method{groupFeatures}{XCMSnExp,AbundanceSimilarityParam}( + object, + param, + msLevel = 1L, + method = c("medret", "maxint", "sum"), + value = "into", + intensity = "into", + filled = TRUE, + ... +) +} +\arguments{ +\item{object}{\code{\link[=XCMSnExp]{XCMSnExp()}} object containing also correspondence results.} + +\item{param}{\code{AbudanceSimilarityParam} object with the settings for the +method. See \code{\link[=AbundanceSimilarityParam]{AbundanceSimilarityParam()}} for details on the grouping +method and its parameters.} + +\item{msLevel}{\code{integer(1)} defining the MS level on which the features +should be grouped.} + +\item{method}{\code{character(1)} passed to the \code{featureValues} call. See +\code{\link[=featureValues]{featureValues()}} for details. Defaults to \code{method = "medret"}.} + +\item{value}{\code{character(1)} passed to the \code{featureValues} call. See +\code{\link[=featureValues]{featureValues()}} for details. Defaults to \code{value = "into"}.} + +\item{intensity}{\code{character(1)} passed to the \code{featureValues} call. See +\code{\link[=featureValues]{featureValues()}} for details. Defaults to \code{intensity = "into"}.} + +\item{filled}{\code{logical(1)} whether filled-in values should be included in +the correlation analysis. Defaults to \code{filled = TRUE}.} + +\item{...}{additional parameters passed to the \code{groupFeatures} method for +\code{matrix}.} +} +\value{ +input \code{XCMSnExp} with feature group definitions added to a column +\code{"feature_group"} in its \code{featureDefinitions} data frame. +} +\description{ +Features from the same originating compound are expected to have similar +intensities across samples. This method this groups features based on +similarity of abundances (i.e. \emph{feature values}) across samples. +See also \code{\link[=AbundanceSimilarityParam]{AbundanceSimilarityParam()}} for additional information and details. + +This help page lists parameters specific for \code{xcms} result objects (i.e. the +\code{\link[=XCMSnExp]{XCMSnExp()}} object). Documentation of the parameters for the similarity +calculation is available in the \code{\link[=AbundanceSimilarityParam]{AbundanceSimilarityParam()}} help page in +the \code{MsFeatures} package. +} +\examples{ + +library(MsFeatures) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Group chromatographic peaks across samples +xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) + +## Group features based on correlation of feature values (integrated +## peak area) across samples. Note that there are many missing values +## in the feature value which influence grouping of features in the present +## data set. +xodg_grp <- groupFeatures(xodg, + param = AbundanceSimilarityParam(threshold = 0.8)) +table(featureDefinitions(xodg_grp)$feature_group) + +## Group based on the maximal peak intensity per feature +xodg_grp <- groupFeatures(xodg, + param = AbundanceSimilarityParam(threshold = 0.8, value = "maxo")) +table(featureDefinitions(xodg_grp)$feature_group) +} +\seealso{ +feature-grouping for a general overview. + +Other feature grouping methods: +\code{\link{groupFeatures-eic-similarity}}, +\code{\link{groupFeatures-similar-rtime}} +} +\author{ +Johannes Rainer +} +\concept{feature grouping methods} diff --git a/man/groupFeatures-eic-similarity.Rd b/man/groupFeatures-eic-similarity.Rd new file mode 100644 index 000000000..bb55348c3 --- /dev/null +++ b/man/groupFeatures-eic-similarity.Rd @@ -0,0 +1,174 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-group-features.R +\name{groupFeatures-eic-similarity} +\alias{groupFeatures-eic-similarity} +\alias{EicSimilarityParam-class} +\alias{EicSimilarityParam} +\alias{groupFeatures,XCMSnExp,EicSimilarityParam-method} +\title{Compounding/feature grouping based on similarity of extracted ion chromatograms} +\usage{ +EicSimilarityParam( + threshold = 0.9, + n = 1, + onlyPeak = TRUE, + value = c("maxo", "into"), + groupFun = groupSimilarityMatrix, + ALIGNFUN = alignRt, + ALIGNFUNARGS = list(tolerance = 0, method = "closest"), + FUN = cor, + FUNARGS = list(use = "pairwise.complete.obs"), + ... +) + +\S4method{groupFeatures}{XCMSnExp,EicSimilarityParam}(object, param, msLevel = 1L) +} +\arguments{ +\item{threshold}{\code{numeric(1)} with the minimal required similarity score to +group featues. This is passed to the \code{groupFun} function.} + +\item{n}{\code{numeric(1)} defining the total number of samples per feature group +on which this similarity calculation should be performed. This value is +rounded up to the next larger integer value.} + +\item{onlyPeak}{\code{logical(1)} whether the correlation should be performed only +on the signals within the identified chromatographic peaks +(\code{onlyPeak = TRUE}, default) or all the signal from the extracted ion +chromatogram.} + +\item{value}{\code{character(1)} defining whether samples should be grouped based +on the sum of the maximal peak intensity (\code{value = "maxo"}, the default) +or the integrated peak area (\code{value = "into"}) for a feature.} + +\item{groupFun}{\code{function} defining the function to be used to group rows +based on a pairwise similarity matrix. Defaults to +\code{\link[=groupSimilarityMatrix]{groupSimilarityMatrix()}}.} + +\item{ALIGNFUN}{\code{function} defining the function to be used to \emph{align} +chromatograms prior similarity calculation. Defaults to +\code{ALIGNFUN = alignRt}. See \code{\link[=alignRt]{alignRt()}} and \code{\link[=compareChromatograms]{compareChromatograms()}} for +more information.} + +\item{ALIGNFUNARGS}{\strong{named} \code{list} with arguments for \code{ALIGNFUN}. +Defaults to \code{ALIGNFUNARGS = list(tolerance = 0, method = "closest")}.} + +\item{FUN}{\code{function} defining the function to be used to calculate a +similarity between (aligned) chromatograms. Defaults to \code{FUN = cor}. +See \code{\link[=cor]{cor()}} and \code{\link[=compareChromatograms]{compareChromatograms()}} for more information.} + +\item{FUNARGS}{\strong{named} \code{list} with arguments for \code{FUN}. Defaults to +\code{FUN = list(use = "pairwise.complete.obs")}.} + +\item{...}{for \code{EicSimilarityParam}: additional arguments to be passed to +\code{groupFun} and \code{featureChromatograms} (such as \code{expandRt} to expand the +retention time range of each feature).} + +\item{object}{\code{\link[=XCMSnExp]{XCMSnExp()}} object containing also correspondence results.} + +\item{param}{\code{EicSimilarityParam} object with the settings for the method.} + +\item{msLevel}{\code{integer(1)} defining the MS level on which the features +should be grouped.} +} +\value{ +input \code{XCMSnExp} with feature groups added (i.e. in column +\code{"feature_group"} of its \code{featureDefinitions} data frame. +} +\description{ +Features from the same originating compound are expected to share their +elution pattern (i.e. chromatographic peak shape) with it. +Thus, this methods allows to group features based on similarity of their +extracted ion chromatograms (EICs). The similarity calculation is performed +separately for each sample with the similarity score being aggregated across +samples for the final generation of the similarity matrix on which the +grouping (considering parameter \code{threshold}) will be performed. + +The \code{\link[=compareChromatograms]{compareChromatograms()}} function is used for similarity calculation +which by default calculates the Pearson's correlation coefficient. The +settings for \code{compareChromatograms} can be specified with parameters +\code{ALIGNFUN}, \code{ALIGNFUNARGS}, \code{FUN} and \code{FUNARGS}. \code{ALIGNFUN} defaults to +\code{\link[=alignRt]{alignRt()}} and is the function used to \emph{align} the chromatograms before +comparison. \code{ALIGNFUNARGS} allows to specify additional arguments for the +\code{ALIGNFUN} function. It defaults to +\code{ALIGNFUNARGS = list(tolerance = 0, method = "closest")} which ensures that +data points from the same spectrum (scan, i.e. with the same retention time) +are compared between the EICs from the same sample. Parameter \code{FUN} defines +the function to calculate the similarity score and defaults to \code{FUN = cor} +and \code{FUNARGS} allows to pass additional arguments to this function (defaults +to \code{FUNARGS = list(use = "pairwise.complete.obs")}. See also +\code{\link[=compareChromatograms]{compareChromatograms()}} for more information. + +The grouping of features based on the EIC similarity matrix is performed +with the function specified with parameter \code{groupFun} which defaults to +\code{groupFun = groupSimilarityMatrix} which groups all rows (features) in the +similarity matrix with a similarity score larger than \code{threshold} into the +same cluster. This creates clusters of features in which \strong{all} features +have a similarity score \verb{>= threshold} with \strong{any} other feature in that +cluster. See \code{\link[=groupSimilarityMatrix]{groupSimilarityMatrix()}} for details. Additional parameters to +that function can be passed with the \code{...} argument. + +This feature grouping should be called \strong{after} an initial feature +grouping by retention time (see \code{\link[=SimilarRtimeParam]{SimilarRtimeParam()}}). The feature groups +defined in columns \code{"feature_group"} of \code{featureDefinitions(object)} (for +features matching \code{msLevel}) will be used and refined by this method. +Features with a value of \code{NA} in \code{featureDefinitions(object)$feature_group} +will be skipped/not considered for feature grouping. +} +\note{ +While being possible to be performed on the full data set without prior +feature grouping, this is not suggested for the following reasons: I) the +selection of the top \code{n} samples with the highest signal for the +\emph{feature group} will be biased by very abundant compounds as this is +performed on the full data set (i.e. the samples with the highest overall +intensities are used for correlation of all features) and II) it is +computationally much more expensive because a pairwise correlation between +all features has to be performed. + +It is also suggested to perform the correlation on a subset of samples +per feature with the highest intensities of the peaks (for that feature) +although it would also be possible to run the correlation on all samples by +setting \code{n} equal to the total number of samples in the data set. EIC +correlation should however be performed ideally on samples in which the +original compound is highly abundant to avoid correlation of missing values +or noisy peak shapes as much as possible. + +By default also the signal which is outside identified chromatographic peaks +is excluded from the correlation. +} +\examples{ + +library(MsFeatures) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Group chromatographic peaks across samples +xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) + +## Performing a feature grouping based on EIC similarities on a single +## sample +xodg_grp <- groupFeatures(xodg, param = EicSimilarityParam(n = 1)) + +table(featureDefinitions(xodg_grp)$feature_group) + +## Usually it is better to perform this correlation on pre-grouped features +## e.g. based on similar retention time. +xodg_grp <- groupFeatures(xodg, param = SimilarRtimeParam(diffRt = 4)) +xodg_grp <- groupFeatures(xodg_grp, param = EicSimilarityParam(n = 1)) + +table(featureDefinitions(xodg_grp)$feature_group) +} +\seealso{ +feature-grouping for a general overview. + +Other feature grouping methods: +\code{\link{groupFeatures-abundance-correlation}}, +\code{\link{groupFeatures-similar-rtime}} +} +\author{ +Johannes Rainer +} +\concept{feature grouping methods} diff --git a/man/groupFeatures-similar-rtime.Rd b/man/groupFeatures-similar-rtime.Rd new file mode 100644 index 000000000..1578c5404 --- /dev/null +++ b/man/groupFeatures-similar-rtime.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-group-features.R +\name{groupFeatures-similar-rtime} +\alias{groupFeatures-similar-rtime} +\alias{groupFeatures,XCMSnExp,SimilarRtimeParam-method} +\title{Compounding/feature grouping based on similar retention times} +\usage{ +\S4method{groupFeatures}{XCMSnExp,SimilarRtimeParam}(object, param, msLevel = 1L, ...) +} +\arguments{ +\item{object}{\code{\link[=XCMSnExp]{XCMSnExp()}} object containing also correspondence results.} + +\item{param}{\code{SimilarRtimeParam} object with the settings for the method. See +\code{\link[MsFeatures:groupFeatures-similar-rtime]{MsFeatures::SimilarRtimeParam()}} for details and options.} + +\item{msLevel}{\code{integer(1)} defining the MS level on which the features +should be grouped.} + +\item{...}{passed to the \code{groupFeatures} function on numeric values.} +} +\value{ +input \code{XCMSnExp} with feature groups added (i.e. in column +\code{"feature_group"} of its \code{featureDefinitions} data frame. +} +\description{ +Group features based on similar retention time. This method is supposed to be +used as an initial \emph{crude} grouping of features based on the median retention +time of all their chromatographic peaks. All features with a difference in +their retention time which is \code{<=} parameter \code{diffRt} of the parameter object +are grouped together. If a column \code{"feature_group"} is found in +\code{\link[=featureDefinitions]{featureDefinitions()}} this is further sub-grouped by this method. + +See \code{\link[MsFeatures:groupFeatures-similar-rtime]{MsFeatures::SimilarRtimeParam()}} in \code{MsFeatures} for more details. +} +\examples{ + +library(MsFeatures) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Group chromatographic peaks across samples +xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3))) + +## Group features based on similar retention time (i.e. difference <= 2 seconds) +xodg_grp <- groupFeatures(xodg, param = SimilarRtimeParam(diffRt = 2)) + +## Feature grouping get added to the featureDefinitions in column "feature_group" +head(featureDefinitions(xodg_grp)$feature_group) + +table(featureDefinitions(xodg_grp)$feature_group) +length(unique(featureDefinitions(xodg_grp)$feature_group)) + +## Using an alternative groupiing method that creates larger groups +xodg_grp <- groupFeatures(xodg, + param = SimilarRtimeParam(diffRt = 2, groupFun = MsCoreUtils::group)) + +length(unique(featureDefinitions(xodg_grp)$feature_group)) +} +\seealso{ +Other feature grouping methods: +\code{\link{groupFeatures-abundance-correlation}}, +\code{\link{groupFeatures-eic-similarity}} +} +\author{ +Johannes Rainer +} +\concept{feature grouping methods} diff --git a/man/groupOverlaps.Rd b/man/groupOverlaps.Rd new file mode 100644 index 000000000..24b256b20 --- /dev/null +++ b/man/groupOverlaps.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-utils.R +\name{groupOverlaps} +\alias{groupOverlaps} +\title{Group overlapping ranges} +\usage{ +groupOverlaps(xmin, xmax) +} +\arguments{ +\item{xmin}{\code{numeric} (same length than \code{xmax}) with the lower boundary of +the range.} + +\item{xmax}{\code{numeric} (same length than \code{xmin}) with the upper boundary of +the range.} +} +\value{ +\code{list} with the indices of grouped elements. +} +\description{ +\code{groupOverlaps} identifies overlapping ranges in the input data and groups +them by returning their indices in \code{xmin} \code{xmax}. +} +\examples{ + +x <- c(2, 12, 34.2, 12.4) +y <- c(3, 16, 35, 36) + +groupOverlaps(x, y) +} +\author{ +Johannes Rainer +} diff --git a/man/groupnames-XCMSnExp.Rd b/man/groupnames-XCMSnExp.Rd index d6ce09aa4..44aeccb3b 100644 --- a/man/groupnames-XCMSnExp.Rd +++ b/man/groupnames-XCMSnExp.Rd @@ -1,12 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R -\docType{methods} \name{groupnames,XCMSnExp-method} \alias{groupnames,XCMSnExp-method} \title{Generate unique group (feature) names based on mass and retention time} \usage{ -\S4method{groupnames}{XCMSnExp}(object, mzdec = 0, rtdec = 0, - template = NULL) +\S4method{groupnames}{XCMSnExp}(object, mzdec = 0, rtdec = 0, template = NULL) } \arguments{ \item{object}{\code{XCMSnExp} object containing correspondence results.} @@ -22,7 +20,7 @@ be emulated.} } \value{ \code{character} with unique names for each feature in \code{object}. The -format is \code{M(m/z)T(time in seconds)}. +format is \verb{M(m/z)T(time in seconds)}. } \description{ \code{groupnames} generates names for the identified features from the diff --git a/man/highlightChromPeaks.Rd b/man/highlightChromPeaks.Rd index 6df94f75a..14f83995e 100644 --- a/man/highlightChromPeaks.Rd +++ b/man/highlightChromPeaks.Rd @@ -5,9 +5,18 @@ \title{Add definition of chromatographic peaks to an extracted chromatogram plot} \usage{ -highlightChromPeaks(x, rt, mz, border = rep("00000040", - length(fileNames(x))), lwd = 1, col = NA, type = c("rect", - "point"), whichPeaks = c("any", "within", "apex_within"), ...) +highlightChromPeaks( + x, + rt, + mz, + peakIds = character(), + border = rep("00000040", length(fileNames(x))), + lwd = 1, + col = NA, + type = c("rect", "point", "polygon"), + whichPeaks = c("any", "within", "apex_within"), + ... +) } \arguments{ \item{x}{For \code{highlightChromPeaks}: \code{XCMSnExp} object with the @@ -19,13 +28,18 @@ retention time range from which peaks should be extracted and plotted.} \item{mz}{\code{numeric(2)} with the mz range from which the peaks should be extracted and plotted.} -\item{border}{colors to be used to color the border of the rectangles. Has to -be equal to the number of samples in \code{x}.} +\item{peakIds}{\code{character} defining the IDs (i.e. rownames of the peak +in the \code{chromPeaks} table) of the chromatographic peaks to be +highlighted in a plot.} + +\item{border}{colors to be used to color the border of the rectangles/peaks. +Has to be equal to the number of samples in \code{x}.} \item{lwd}{\code{numeric(1)} defining the width of the line/border.} \item{col}{For \code{highlightChromPeaks}: color to be used to fill the -rectangle.} +rectangle (if \code{type = "rect"}) or the peak +(for \code{type = "polygon"}).} \item{type}{the plotting type. See \code{\link{plot}} in base grapics for more details. @@ -33,7 +47,10 @@ For \code{highlightChromPeaks}: \code{character(1)} defining how the peak should be highlighted: \code{type = "rect"} draws a rectangle representing the peak definition, \code{type = "point"} indicates a chromatographic peak with a single point at the position of the peak's -\code{"rt"} and \code{"maxo"}.} +\code{"rt"} and \code{"maxo"} and \code{type = "polygon"} will highlight +the peak shape. For \code{type = "polygon"} the color of the border and +area can be defined with parameters \code{"border"} and \code{"col"}, +respectively.} \item{whichPeaks}{\code{character(1)} specifying how peaks are called to be located within the region defined by \code{mz} and \code{rt}. Can be @@ -51,32 +68,33 @@ function.} The \code{highlightChromPeaks} function adds chromatographic peak definitions to an existing plot, such as one created by the \code{plot} method on a \code{\link{Chromatogram}} or -\code{\link{Chromatograms}} object. +\code{\link{MChromatograms}} object. } \examples{ -## Read some files from the faahKO package. -library(xcms) -library(faahKO) -faahko_3_files <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) - -od <- readMSData(faahko_3_files, mode = "onDisk") +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") -## Peak detection using the 'matchedFilter' method. Note that we are using a -## larger binSize to reduce the runtime of the example. -xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) +## Disable parallel processing for this example +register(SerialParam()) ## Extract the ion chromatogram for one chromatographic peak in the data. -chrs <- chromatogram(xod, rt = c(2700, 2900), mz = 335) +chrs <- chromatogram(faahko_sub, rt = c(2700, 2900), mz = 335) plot(chrs) ## Extract chromatographic peaks for the mz/rt range (if any). -chromPeaks(xod, rt = c(2700, 2900), mz = 335) +chromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335) ## Highlight the chromatographic peaks in the area -highlightChromPeaks(xod, rt = c(2700, 2900), mz = 335) +## Show the peak definition with a rectangle +highlightChromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335) + +## Color the actual peak +highlightChromPeaks(faahko_sub, rt = c(2700, 2900), mz = 335, + col = c("#ff000020", "#00ff0020"), type = "polygon") } \author{ Johannes Rainer diff --git a/man/imputeLinInterpol.Rd b/man/imputeLinInterpol.Rd index e720ade7e..3a348693c 100644 --- a/man/imputeLinInterpol.Rd +++ b/man/imputeLinInterpol.Rd @@ -4,8 +4,13 @@ \alias{imputeLinInterpol} \title{Impute values for empty elements in a vector using linear interpolation} \usage{ -imputeLinInterpol(x, baseValue, method = "lin", distance = 1L, - noInterpolAtEnds = FALSE) +imputeLinInterpol( + x, + baseValue, + method = "lin", + distance = 1L, + noInterpolAtEnds = FALSE +) } \arguments{ \item{x}{A numeric vector with eventual missing (\code{NA}) values.} diff --git a/man/imputeRowMin.Rd b/man/imputeRowMin.Rd index 484efb871..ec9effea9 100644 --- a/man/imputeRowMin.Rd +++ b/man/imputeRowMin.Rd @@ -4,7 +4,7 @@ \alias{imputeRowMin} \title{Replace missing values with a proportion of the row minimum} \usage{ -imputeRowMin(x, min_fraction = 1) +imputeRowMin(x, min_fraction = 1/2) } \arguments{ \item{x}{\code{matrix} with abundances, rows being features/metabolites and @@ -37,7 +37,8 @@ head(imputeRowMin(mat, min_fraction = 1/8)) \seealso{ \code{imputeLCMD} package for more left censored imputation functions. -Other imputation functions: \code{\link{imputeRowMinRand}} +Other imputation functions: +\code{\link{imputeRowMinRand}()} } \author{ Johannes Rainer diff --git a/man/imputeRowMinRand.Rd b/man/imputeRowMinRand.Rd index c1cf79e7d..f2d7368b5 100644 --- a/man/imputeRowMinRand.Rd +++ b/man/imputeRowMinRand.Rd @@ -4,29 +4,52 @@ \alias{imputeRowMinRand} \title{Impute missing values with random numbers based on the row minimum} \usage{ -imputeRowMinRand(x, min_fraction = 1/8, sd_fraction = 1, abs = TRUE) +imputeRowMinRand( + x, + method = c("mean_sd", "from_to"), + min_fraction = 1/2, + min_fraction_from = 1/1000, + sd_fraction = 1, + abs = TRUE +) } \arguments{ \item{x}{\code{matrix} with abundances, rows being features/metabolites and columns samples.} +\item{method}{method \code{character(1)} defining the imputation method. +See description for details. Defaults to \code{method = "mean_sd"}.} + \item{min_fraction}{\code{numeric(1)} with the fraction of the row minimum that -should be used to replace \code{NA} values in that row.} +should be used to replace \code{NA} values in that row in case that \code{mean_sd} +method is specified. When using \code{from_to} method, this value will be the +one used to calculate the maximum value for replace \code{NA} values in that row.} + +\item{min_fraction_from}{\code{numeric(1)} with the fraction of the row minimum +that should be used to calculate the minimum value for replace \code{NA} values +in that row. This parameter is used only in case that \code{from_to} method is +specified.} \item{sd_fraction}{\code{numeric(1)} factor to reduce the estimated standard -deviation.} +deviation. This parameter is used only in case that \code{mean_sd} method is +specified.} \item{abs}{\code{logical(1)} to force imputed values to be strictly positive.} } \description{ -Replace missing values with random numbers from a normal distribution based +Replace missing values with random numbers. +When using the \code{method = "mean_sd"}, random numbers will be generated +from a normal distribution based on (a fraction of) the row min and a standard deviation estimated from the linear relationship between row standard deviation and mean of the full data set. Parameter \code{sd_fraction} allows to further reduce the estimated standard deviation. +When using the method \code{method = "from_to"}, random numbers between 2 specific values +will be generated. } \details{ -Imputed values are taken from a normal distribution with mean being a +For method \strong{mean_sd}, imputed +values are taken from a normal distribution with mean being a user defined fraction of the row minimum and the standard deviation estimated for that mean based on the linear relationship between row standard deviations and row means in the full matrix \code{x}. @@ -36,6 +59,9 @@ values, the standard deviation for the random number generation is estimated ignoring the intercept of the linear model estimating the relationship between standard deviation and mean. If \code{abs = TRUE} \code{NA} values are replaced with the absolute value of the random values. + +For method \strong{from_to}, imputed values are taken between 2 user defined +fractions of the row minimum. } \examples{ @@ -52,17 +78,20 @@ sds <- apply(mat, MARGIN = 1, sd, na.rm = TRUE) plot(mns, sds) abline(lm(sds ~ mns)) -mat_imp <- imputeRowMinRand(mat) +mat_imp_meansd <- imputeRowMinRand(mat, method = "mean_sd") +mat_imp_fromto <- imputeRowMinRand(mat, method = "from_to") head(mat) -head(mat_imp) +head(mat_imp_meansd) +head(mat_imp_fromto) } \seealso{ \code{imputeLCMD} package for more left censored imputation functions. -Other imputation functions: \code{\link{imputeRowMin}} +Other imputation functions: +\code{\link{imputeRowMin}()} } \author{ -Johannes Rainer +Johannes Rainer, Mar Garcia-Aloy } \concept{imputation functions} diff --git a/man/isolationWindowTargetMz-OnDiskMSnExp-method.Rd b/man/isolationWindowTargetMz-OnDiskMSnExp-method.Rd new file mode 100644 index 000000000..81a6d4c57 --- /dev/null +++ b/man/isolationWindowTargetMz-OnDiskMSnExp-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-OnDiskMSnExp.R +\name{isolationWindowTargetMz,OnDiskMSnExp-method} +\alias{isolationWindowTargetMz,OnDiskMSnExp-method} +\alias{isolationWindowTargetMz} +\title{Extract isolation window target m/z definition} +\usage{ +\S4method{isolationWindowTargetMz}{OnDiskMSnExp}(object) +} +\arguments{ +\item{object}{\linkS4class{OnDiskMSnExp} object.} +} +\value{ +a \code{numeric} of length equal to the number of spectra in \code{object} with +the isolation window target m/z or \code{NA} if not specified/available. +} +\description{ +\code{isolationWindowTargetMz} extracts the isolation window target m/z definition +for each spectrum in \code{object}. +} +\author{ +Johannes Rainer +} diff --git a/man/manualChromPeaks.Rd b/man/manualChromPeaks.Rd new file mode 100644 index 000000000..eae37c6b3 --- /dev/null +++ b/man/manualChromPeaks.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{manualChromPeaks} +\alias{manualChromPeaks} +\alias{manualFeatures} +\title{Manual peak integration and feature definition} +\usage{ +manualChromPeaks( + object, + chromPeaks = matrix(), + samples = seq_along(fileNames(object)), + BPPARAM = bpparam(), + msLevel = 1L +) + +manualFeatures(object, peakIdx = list(), msLevel = 1L) +} +\arguments{ +\item{object}{\code{XCMSnExp} or \code{OnDiskMSnExp} object.} + +\item{chromPeaks}{\code{matrix} defining the boundaries of the chromatographic +peaks, one row per chromatographic peak, columns \code{"mzmin"}, \code{"mzmax"}, +\code{"rtmin"} and \code{"rtmax"} defining the m/z and retention time region of +each peak.} + +\item{samples}{optional \code{integer} to select samples in which the peak +integration should be performed. By default performed in all samples.} + +\item{BPPARAM}{parallel processing settings (see \code{\link[=bpparam]{bpparam()}} for details).} + +\item{msLevel}{\code{integer(1)} defining the MS level in which peak integration +should be performed.} + +\item{peakIdx}{for \code{nabbyakFeatyres}: \code{list} of \code{integer} vectors with the +indices of chromatographic peaks in the object's \code{chromPeaks} matrix +that should be grouped into features.} +} +\value{ +\code{XCMSnExp} with the manually added chromatographic peaks or features. +} +\description{ +The \code{manualChromPeaks} function allows to manually define chromatographic +peaks which are added to the object's \code{chromPeaks} matrix. In contrast to +\code{\link[=findChromPeaks]{findChromPeaks()}}, no \emph{peak detection} is performed (e.g. using an +algorithm such as \emph{centWave}) but the peak is added as defined by the user. +Note that a peak will not be added if no signal (intensity) was found in a +sample within the provided boundaries. + +Because chromatographic peaks are added to eventually previously identified +peaks, it is suggested to run \code{\link[=refineChromPeaks]{refineChromPeaks()}} with the +\code{\link[=MergeNeighboringPeaksParam]{MergeNeighboringPeaksParam()}} approach to merge potentially overlapping +peaks. + +The \code{manualFeatures} function allows to manually group identified +chromatographic peaks into features by providing their index in the +object's \code{chromPeaks} matrix. +} +\author{ +Johannes Rainer +} diff --git a/man/overlappingFeatures.Rd b/man/overlappingFeatures.Rd index c16b87f44..efa9e6ad4 100644 --- a/man/overlappingFeatures.Rd +++ b/man/overlappingFeatures.Rd @@ -33,23 +33,24 @@ are overlapping. the m/z - rt space. } \examples{ -## Load 2 test files. -data <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - system.file("cdf/KO/ko16.CDF", package = "faahKO")), - mode = "onDisk") -## Perform peak detection; parameters set to reduce processing speed -data <- findChromPeaks(data, CentWaveParam(noise = 10000, snthresh = 40)) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) ## Correspondence analysis -data <- groupChromPeaks(data, param = PeakDensityParam(sampleGroups = c(1, 1))) +xdata <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = c(1, 1, 1))) ## Identify overlapping features -overlappingFeatures(data) +overlappingFeatures(xdata) ## Identify features that are separated on retention time by less than ## 2 minutes -overlappingFeatures(data, expandRt = 60) +overlappingFeatures(xdata, expandRt = 60) } \author{ Johannes Rainer diff --git a/man/peaksWithCentWave.Rd b/man/peaksWithCentWave.Rd index f28a2e4a7..fc78b8dba 100644 --- a/man/peaksWithCentWave.Rd +++ b/man/peaksWithCentWave.Rd @@ -4,9 +4,20 @@ \alias{peaksWithCentWave} \title{Identify peaks in chromatographic data using centWave} \usage{ -peaksWithCentWave(int, rt, peakwidth = c(20, 50), snthresh = 10, - prefilter = c(3, 100), integrate = 1, fitgauss = FALSE, - noise = 0, verboseColumns = FALSE, firstBaselineCheck = TRUE, ...) +peaksWithCentWave( + int, + rt, + peakwidth = c(20, 50), + snthresh = 10, + prefilter = c(3, 100), + integrate = 1, + fitgauss = FALSE, + noise = 0, + verboseColumns = FALSE, + firstBaselineCheck = TRUE, + extendLengthMSW = FALSE, + ... +) } \arguments{ \item{int}{\code{numeric} with intensity values.} @@ -21,7 +32,7 @@ expected peak width.} Peaks with a signal to noise ratio < \code{snthresh} are omitted.} \item{prefilter}{\code{numeric(2)} (\code{c(k, I)}): only regions of interest with at -least \code{k} centroids with signal \code{>= I} are returned in the first +least \code{k} centroids with signal \verb{>= I} are returned in the first step.} \item{integrate}{\code{numeric(1)}, integration method. For \code{integrate = 1} peak @@ -41,7 +52,13 @@ the \emph{regions of interest}).} columns should be returned.} \item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous data within -regions of interest is checked to be above the first baseline.} +regions of interest is checked to be above the first baseline. In detail, +a first \emph{rough} estimate of the noise is calculated and peak detection +is performed only in regions in which multiple sequential signals are +higher than this first estimated baseline/noise level.} + +\item{extendLengthMSW}{\code{logical(1)}. If \code{TRUE} the "open" method of EIC +extension is used, rather than the default "reflect" method.} \item{...}{currently ignored.} } @@ -56,7 +73,7 @@ columns: \item \code{"intb"}: per-peak baseline corrected integrated peak intensity. \item \code{"maxo"}: maximum (original) intensity of the peak. \item \code{"sn"}: signal to noise ratio of the peak defined as -\code{(maxo - baseline)/sd} with \code{sd} being the standard defiatio of the local +\code{(maxo - baseline)/sd} with \code{sd} being the standard deviation of the local chromatographic noise. } @@ -96,7 +113,7 @@ od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), ## Extract chromatographic data for a small m/z range mzr <- c(272.1, 272.2) -chr <- chromatogram(od, mz = mzr)[1, 1] +chr <- chromatogram(od, mz = mzr, rt = c(3000, 3300))[1, 1] int <- intensity(chr) rt <- rtime(chr) @@ -117,7 +134,8 @@ rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], \link{centWave} for a detailed description of the peak detection method. -Other peak detection functions for chromatographic data: \code{\link{peaksWithMatchedFilter}} +Other peak detection functions for chromatographic data: +\code{\link{peaksWithMatchedFilter}()} } \author{ Johannes Rainer diff --git a/man/peaksWithMatchedFilter.Rd b/man/peaksWithMatchedFilter.Rd index d33937d77..9004842cf 100644 --- a/man/peaksWithMatchedFilter.Rd +++ b/man/peaksWithMatchedFilter.Rd @@ -4,8 +4,15 @@ \alias{peaksWithMatchedFilter} \title{Identify peaks in chromatographic data using matchedFilter} \usage{ -peaksWithMatchedFilter(int, rt, fwhm = 30, sigma = fwhm/2.3548, - max = 20, snthresh = 10, ...) +peaksWithMatchedFilter( + int, + rt, + fwhm = 30, + sigma = fwhm/2.3548, + max = 20, + snthresh = 10, + ... +) } \arguments{ \item{int}{\code{numeric} with intensity values.} @@ -48,12 +55,16 @@ on chromatographic data (i.e. with only intensities and retention time). } \examples{ -## Read one file from the faahKO package -od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - mode = "onDisk") +## Load the test file +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Subset to one file and drop identified chromatographic peaks +data <- dropChromPeaks(filterFile(faahko_sub, 1)) ## Extract chromatographic data for a small m/z range -chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] +chr <- chromatogram(data, mz = c(272.1, 272.3), rt = c(3000, 3200))[1, 1] pks <- peaksWithMatchedFilter(intensity(chr), rtime(chr)) pks @@ -67,7 +78,8 @@ rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], ybottom = c(0, 0), \link{matchedFilter} for a detailed description of the peak detection method. -Other peak detection functions for chromatographic data: \code{\link{peaksWithCentWave}} +Other peak detection functions for chromatographic data: +\code{\link{peaksWithCentWave}()} } \author{ Johannes Rainer diff --git a/man/plotAdjustedRtime.Rd b/man/plotAdjustedRtime.Rd index d2fd96e06..9e1b43b35 100644 --- a/man/plotAdjustedRtime.Rd +++ b/man/plotAdjustedRtime.Rd @@ -4,11 +4,21 @@ \alias{plotAdjustedRtime} \title{Visualization of alignment results} \usage{ -plotAdjustedRtime(object, col = "#00000080", lty = 1, type = "l", - adjustedRtime = TRUE, xlab = ifelse(adjustedRtime, yes = - expression(rt[adj]), no = expression(rt[raw])), - ylab = expression(rt[adj] - rt[raw]), peakGroupsCol = "#00000060", - peakGroupsPch = 16, peakGroupsLty = 3, ylim, ...) +plotAdjustedRtime( + object, + col = "#00000080", + lty = 1, + lwd = 1, + type = "l", + adjustedRtime = TRUE, + xlab = ifelse(adjustedRtime, yes = expression(rt[adj]), no = expression(rt[raw])), + ylab = expression(rt[adj] - rt[raw]), + peakGroupsCol = "#00000060", + peakGroupsPch = 16, + peakGroupsLty = 3, + ylim, + ... +) } \arguments{ \item{object}{A \code{\link{XCMSnExp}} object with the alignment results.} @@ -18,6 +28,8 @@ samples.} \item{lty}{line type to be used for the lines of the individual samples.} +\item{lwd}{line width to be used for the lines of the individual samples.} + \item{type}{plot type to be used. See help on the \code{par} function for supported values.} @@ -54,39 +66,27 @@ time (y-axis) for each file along the (adjusted or raw) retention time groups) used for the alignment are shown. } \examples{ -## Below we perform first a peak detection (using the matchedFilter -## method) on some of the test files from the faahKO package followed by -## a peak grouping and retention time adjustment using the "peak groups" -## method -library(faahKO) -library(xcms) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) - -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") - -## Perform the peak detection using the matchedFilter method. -mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) -res <- findChromPeaks(raw_data, param = mfp) + +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) ## Performing the peak grouping using the "peak density" method. -p <- PeakDensityParam(sampleGroups = c(1, 1)) -res <- groupChromPeaks(res, param = p) +p <- PeakDensityParam(sampleGroups = c(1, 1, 1)) +res <- groupChromPeaks(faahko_sub, param = p) ## Perform the retention time adjustment using peak groups found in both ## files. fgp <- PeakGroupsParam(minFraction = 1) res <- adjustRtime(res, param = fgp) -## Visualize the impact of the alignment. We show both versions of the plot, -## with the raw retention times on the x-axis (top) and with the adjusted -## retention times (bottom). -par(mfrow = c(2, 1)) +## Visualize the impact of the alignment. plotAdjustedRtime(res, adjusted = FALSE) grid() -plotAdjustedRtime(res) -grid() } \seealso{ \code{\link{adjustRtime}} for all retention time correction/ diff --git a/man/plotChromPeakDensity.Rd b/man/plotChromPeakDensity.Rd index 267dfbf5f..76653f57d 100644 --- a/man/plotChromPeakDensity.Rd +++ b/man/plotChromPeakDensity.Rd @@ -1,13 +1,24 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/functions-XCMSnExp.R -\name{plotChromPeakDensity} +% Please edit documentation in R/methods-XCMSnExp.R +\name{plotChromPeakDensity,XCMSnExp-method} +\alias{plotChromPeakDensity,XCMSnExp-method} \alias{plotChromPeakDensity} \title{Plot chromatographic peak density along the retention time axis} \usage{ -plotChromPeakDensity(object, mz, rt, param, simulate = TRUE, - col = "#00000080", xlab = "retention time", ylab = "sample", - xlim = range(rt), main = NULL, type = c("any", "within", - "apex_within"), ...) +\S4method{plotChromPeakDensity}{XCMSnExp}( + object, + mz, + rt, + param, + simulate = TRUE, + col = "#00000080", + xlab = "retention time", + ylab = "sample", + xlim = range(rt), + main = NULL, + type = c("any", "within", "apex_within"), + ... +) } \arguments{ \item{object}{A \link{XCMSnExp} object with identified @@ -79,46 +90,25 @@ specified \code{mz} slice at their retention time (x-axis) and sample in which they were detected (y-axis). The density function is plotted as a black line. Parameters for the \code{density} function are taken from the \code{param} object. Grey rectangles indicate which chromatographic peaks -would be grouped into a feature by the \code{peak density} correspondence +would be grouped into a feature by the \verb{peak density} correspondence method. Parameters for the algorithm are also taken from \code{param}. See \code{\link[=groupChromPeaks-density]{groupChromPeaks-density()}} for more information about the algorithm and its supported settings. } \examples{ -## Below we perform first a peak detection (using the centWave -## method) on some of the test files from the faahKO package. -library(faahKO) -library(xcms) -fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, - full.names = TRUE) - -## Reading 2 of the KO samples -raw_data <- readMSData(fls[1:2], mode = "onDisk") - -## Perform the peak detection using the centWave method (settings are tuned -## to speed up example execution) -res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 3000, snthresh = 40)) - -## Align the samples using obiwarp -res <- adjustRtime(res, param = ObiwarpParam()) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") ## Plot the chromatographic peak density for a specific mz range to evaluate ## different peak density correspondence settings. mzr <- c(305.05, 305.15) -plotChromPeakDensity(res, mz = mzr, pch = 16, - param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(res))))) - -## Use a larger bandwidth -plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60, - sampleGroups = rep(1, length(fileNames(res)))), pch = 16) -## Neighboring peaks are now fused into one. +plotChromPeakDensity(faahko_sub, mz = mzr, pch = 16, + param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(faahko_sub))))) -## Require the chromatographic peak to be present in all samples of a group -plotChromPeakDensity(res, mz = mzr, pch = 16, - param = PeakDensityParam(minFraction = 1, - sampleGroups = rep(1, length(fileNames(res))))) } \seealso{ \code{\link[=groupChromPeaks-density]{groupChromPeaks-density()}} for details on the diff --git a/man/plotChromPeaks.Rd b/man/plotChromPeaks.Rd index c7c9346d6..3d82777a5 100644 --- a/man/plotChromPeaks.Rd +++ b/man/plotChromPeaks.Rd @@ -5,13 +5,30 @@ \alias{plotChromPeakImage} \title{General visualizations of peak detection results} \usage{ -plotChromPeaks(x, file = 1, xlim = NULL, ylim = NULL, add = FALSE, - border = "#00000060", col = NA, xlab = "retention time", - ylab = "mz", main = NULL, ...) - -plotChromPeakImage(x, binSize = 30, xlim = NULL, log = FALSE, - xlab = "retention time", yaxt = par("yaxt"), - main = "Chromatographic peak counts", ...) +plotChromPeaks( + x, + file = 1, + xlim = NULL, + ylim = NULL, + add = FALSE, + border = "#00000060", + col = NA, + xlab = "retention time", + ylab = "mz", + main = NULL, + ... +) + +plotChromPeakImage( + x, + binSize = 30, + xlim = NULL, + log = FALSE, + xlab = "retention time", + yaxt = par("yaxt"), + main = "Chromatographic peak counts", + ... +) } \arguments{ \item{x}{\code{\link{XCMSnExp}} object.} @@ -85,27 +102,20 @@ and \code{par(lty = 2)}, respectively. } \examples{ -## Perform peak detection on two files from the faahKO package. -library(xcms) -library(faahKO) -faahko_file <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) - -od <- readMSData(faahko_file, mode = "onDisk") - -## Peak detection using the 'matchedFilter' method. Note that we are using a -## larger binSize to reduce the runtime of the example. -xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") ## plotChromPeakImage: plot an image for the identified peaks per file -plotChromPeakImage(xod) +plotChromPeakImage(faahko_sub) ## Show all detected chromatographic peaks from the first file -plotChromPeaks(xod) +plotChromPeaks(faahko_sub) ## Plot all detected peaks from the second file and restrict the plot to a ## mz-rt slice -plotChromPeaks(xod, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) +plotChromPeaks(faahko_sub, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) } \seealso{ \code{\link{highlightChromPeaks}} for the function to highlight diff --git a/man/plotChromatogramsOverlay.Rd b/man/plotChromatogramsOverlay.Rd new file mode 100644 index 000000000..d079c1fb1 --- /dev/null +++ b/man/plotChromatogramsOverlay.Rd @@ -0,0 +1,175 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-MChromatograms.R +\name{plotChromatogramsOverlay} +\alias{plotChromatogramsOverlay} +\alias{plotChromatogramsOverlay,MChromatograms-method} +\alias{plotChromatogramsOverlay,XChromatograms-method} +\title{Plot multiple chromatograms into the same plot} +\usage{ +\S4method{plotChromatogramsOverlay}{MChromatograms}( + object, + col = "#00000060", + type = "l", + main = NULL, + xlab = "rtime", + ylab = "intensity", + xlim = numeric(), + ylim = numeric(), + stacked = 0, + transform = identity, + ... +) + +\S4method{plotChromatogramsOverlay}{XChromatograms}( + object, + col = "#00000060", + type = "l", + main = NULL, + xlab = "rtime", + ylab = "intensity", + xlim = numeric(), + ylim = numeric(), + peakType = c("polygon", "point", "rectangle", "none"), + peakBg = NULL, + peakCol = NULL, + peakPch = 1, + stacked = 0, + transform = identity, + ... +) +} +\arguments{ +\item{object}{\code{\link[=MChromatograms]{MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} + +\item{col}{definition of the color in which the chromatograms should be +drawn. Can be of length 1 or equal to \code{nrow(object)} to plot each +overlayed chromatogram in a different color.} + +\item{type}{\code{character(1)} defing the type of the plot. By default +(\code{type = "l"}) each chromatogram is drawn as a line.} + +\item{main}{optional title of the plot. If not defined, the range of m/z +values is used.} + +\item{xlab}{\code{character(1)} defining the x-axis label.} + +\item{ylab}{\code{character(1)} defining the y-axis label.} + +\item{xlim}{optional \code{numeric(2)} defining the x-axis limits.} + +\item{ylim}{optional \code{numeric(2)} defining the y-axis limits.} + +\item{stacked}{\code{numeric(1)} defining the part (proportion) of the y-axis to +use to \emph{stack} EICs depending on their m/z values. If \code{stacked = 0} (the +default) no stacking is performed. With \code{stacked = 1} half of the y-axis +is used for stacking and half for the intensity y-axis (i.e. the ratio +between stacking and intensity y-axis is 1:1). Note that if \code{stacking} +is different from 0 no y-axis and label are drawn.} + +\item{transform}{\code{function} to transform the intensity values before +plotting. Defaults to \code{transform = identity} which plots the data as it +is. With \code{transform = log10} intensity values would be log10 transformed +before plotting.} + +\item{...}{optional arguments to be passed to the plotting functions (see +help on the base R \code{plot} function.} + +\item{peakType}{if \code{object} is a \code{XChromatograms} object: how chromatographic +peaks should be drawn: \code{peakType = "polygon"} (the default): label the +full chromatographic peak area, \code{peakType = "rectangle"}: indicate the +chromatographic peak by a rectangle and \code{peakType = "point"}: label the +chromatographic peaks' apex position with a point.} + +\item{peakBg}{if \code{object} is a \code{XChromatograms} object: definition of +background color(s) for each chromatographic peak. Has to be either of +length 1 or equal to the number of peaks in \code{object}. If not specified, +the peak will be drawn in the color defined by \code{col}.} + +\item{peakCol}{if \code{object} is a \code{XChromatograms} object: definition of +color(s) for each chromatographic peak. Has to be either of length 1 or +equal to the number of peaks in \code{object}. If not specified, the peak will +be drawn in the color defined by \code{col}.} + +\item{peakPch}{if \code{object} is a \code{XChromatograms} object: \emph{point character} to +be used to label the apex position of the chromatographic peak if +\code{peakType = "point"}.} +} +\value{ +silently returns a \code{list} (length equal to \code{ncol(object)} of +\code{numeric} (length equal to \code{nrow(object)}) with the y position of +each EIC. +} +\description{ +\code{plotOverlay} draws chromatographic peak data from multiple (different) +extracted ion chromatograms (EICs) into the same plot. This allows to +directly compare the peak shape of these EICs in the same sample. In +contrast to the \code{plot} function for \code{\link[=MChromatograms]{MChromatograms()}} object, which draws +the data from the same EIC across multiple samples in the same plot, this +function draws the different EICs from the same sample into the same plot. + +If \code{plotChromatogramsOverlay} is called on a \code{XChromatograms} object any +present chromatographic peaks will also be highlighted/drawn depending on the +parameters \code{peakType}, \code{peakCol}, \code{peakBg} and \code{peakPch} (see also help on +the \code{plot} function for \code{XChromatogram()} object for details). +} +\examples{ + +## Load preprocessed data and extract EICs for some features. +library(xcms) +data(xdata) +## Update the path to the files for the local system +dirname(xdata) <- c(rep(system.file("cdf", "KO", package = "faahKO"), 4), + rep(system.file("cdf", "WT", package = "faahKO"), 4)) +## Subset to the first 3 files. +xdata <- filterFile(xdata, 1:3, keepFeatures = TRUE) + +## Define features for which to extract EICs +fts <- c("FT097", "FT163", "FT165") +chrs <- featureChromatograms(xdata, features = fts) + +plotChromatogramsOverlay(chrs) + +## plot the overlay of EICs in the first sample +plotChromatogramsOverlay(chrs[, 1]) + +## Define a different color for each feature (row in chrs). By default, also +## all chromatographic peaks of a feature is labeled in the same color. +plotChromatogramsOverlay(chrs[, 1], + col = c("#ff000040", "#00ff0040", "#0000ff40")) + +## Alternatively, we can define a color for each individual chromatographic +## peak and provide this with the `peakBg` and `peakCol` parameters. +chromPeaks(chrs[, 1]) + +## Use a color for each of the two identified peaks in that sample +plotChromatogramsOverlay(chrs[, 1], + col = c("#ff000040", "#00ff0040", "#0000ff40"), + peakBg = c("#ffff0020", "#00ffff20")) + +## Plotting the data in all samples. +plotChromatogramsOverlay(chrs, + col = c("#ff000040", "#00ff0040", "#0000ff40")) + +## Creating a "stacked" EIC plot: the EICs are placed along the y-axis +## relative to their m/z value. With `stacked = 1` the y-axis is split in +## half, the lower half being used for the stacking of the EICs, the upper +## half being used for the *original* intensity axis. +res <- plotChromatogramsOverlay(chrs[, 1], stacked = 1, + col = c("#ff000040", "#00ff0040", "#0000ff40")) +## add horizontal lines for the m/z values of each EIC +abline(h = res[[1]], col = "grey", lty = 2) + +## Note that this type of visualization is different than the conventional +## plot function for chromatographic data, which will draw the EICs for +## multiple samples into the same plot +plot(chrs) + +## Converting the object to a MChromatograms without detected peaks +chrs <- as(chrs, "MChromatograms") + +plotChromatogramsOverlay(chrs, + col = c("#ff000040", "#00ff0040", "#0000ff40")) +} +\author{ +Johannes Rainer +} diff --git a/man/plotFeatureGroups.Rd b/man/plotFeatureGroups.Rd new file mode 100644 index 000000000..bcad600c0 --- /dev/null +++ b/man/plotFeatureGroups.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-group-features.R +\name{plotFeatureGroups} +\alias{plotFeatureGroups} +\title{Plot feature groups in the m/z-retention time space} +\usage{ +plotFeatureGroups( + x, + xlim = numeric(), + ylim = numeric(), + xlab = "retention time", + ylab = "m/z", + pch = 4, + col = "#00000060", + type = "o", + main = "Feature groups", + featureGroups = character() +) +} +\arguments{ +\item{x}{\code{\link[=XCMSnExp]{XCMSnExp()}} object with grouped features (i.e. after calling +\code{\link[=groupFeatures]{groupFeatures()}}.} + +\item{xlim}{\code{numeric(2)} with the lower and upper limit for the x-axis.} + +\item{ylim}{\code{numeric(2)} with the lower and upper limit for the y-axis.} + +\item{xlab}{\code{character(1)} with the label for the x-axis.} + +\item{ylab}{\code{character(1)} with the label for the y-axis.} + +\item{pch}{the plotting character. Defaults to \code{pch = 4} i.e. plotting +features as crosses. See \code{\link[=par]{par()}} for more information.} + +\item{col}{color to be used to draw the features. At present only a single +color is supported.} + +\item{type}{plotting type (see \code{\link[=par]{par()}}). Defaults to \code{type = "o"} which +draws each feature as a point and connecting the features of the same +feature group with a line.} + +\item{main}{\code{character(1)} with the title of the plot.} + +\item{featureGroups}{optional \code{character} of feature group IDs to draw only +specified feature group(s). If not provided, all feature groups are +drawn.} +} +\description{ +\code{plotFeatureGroups} visualizes defined feature groups in the m/z by +retention time space. Features are indicated by points with features from +the same feature group being connected by a line. See \code{\link[=featureGroups]{featureGroups()}} +for details on and options for feature grouping. +} +\author{ +Johannes Rainer +} diff --git a/man/plotMsData.Rd b/man/plotMsData.Rd index f0d23147a..228487411 100644 --- a/man/plotMsData.Rd +++ b/man/plotMsData.Rd @@ -4,9 +4,14 @@ \alias{plotMsData} \title{DEPRECATED: Create a plot that combines a XIC and a mz/rt 2D plot for one sample} \usage{ -plotMsData(x, main = "", cex = 1, mfrow = c(2, 1), +plotMsData( + x, + main = "", + cex = 1, + mfrow = c(2, 1), grid.color = "lightgrey", - colramp = colorRampPalette(rev(brewer.pal(9, "YlGnBu")))) + colramp = colorRampPalette(rev(brewer.pal(9, "YlGnBu"))) +) } \arguments{ \item{x}{\code{data.frame} such as returned by the \code{\link[=extractMsData]{extractMsData()}} function. diff --git a/man/profGenerate.Rd b/man/profGenerate.Rd index b2d0211fd..8dab2c35e 100755 --- a/man/profGenerate.Rd +++ b/man/profGenerate.Rd @@ -16,15 +16,20 @@ } \usage{ profBin(x, y, num, xstart = min(x), xend = max(x), param = list()) -profBinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param = list()) +profBinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, + param = list()) profBinLin(x, y, num, xstart = min(x), xend = max(x), param = list()) -profBinLinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param = list()) +profBinLinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, + param = list()) profBinLinBase(x, y, num, xstart = min(x), xend = max(x), param = list()) -profBinLinBaseM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param = list()) +profBinLinBaseM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, + param = list()) profIntLin(x, y, num, xstart = min(x), xend = max(x), param = list()) -profIntLinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param = list()) +profIntLinM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, + param = list()) profMaxIdx(x, y, num, xstart = min(x), xend = max(x), param = list()) -profMaxIdxM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param = list()) +profMaxIdxM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, + param = list()) } \arguments{ \item{x}{numeric vector of value positions} @@ -84,17 +89,17 @@ profMaxIdxM(x, y, zidx, num, xstart = min(x), xend = max(x), NAOK = FALSE, param is double. } \examples{ - \dontrun{ - library(faahKO) - cdfpath <- system.file("cdf", package = "faahKO") - cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - xraw <- xcmsRaw(cdffiles[1]) +\dontrun{ + library(faahKO) + cdfpath <- system.file("cdf", package = "faahKO") + cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) + xraw <- xcmsRaw(cdffiles[1]) - image(xraw) ## not how with intLin the intensity's blur - profMethod(xraw) <- "bin" - image(xraw) ## now with 'bin' there is no blurring good for centroid data - ##try binlinbase for profile data - } + image(xraw) ## not how with intLin the intensity's blur + profMethod(xraw) <- "bin" + image(xraw) ## now with 'bin' there is no blurring good for centroid data + ##try binlinbase for profile data +} } \author{Colin A. Smith, \email{csmith@scripps.edu}} \keyword{manip} diff --git a/man/profMat-xcmsSet.Rd b/man/profMat-xcmsSet.Rd index c15ca9b10..7f197e0ff 100644 --- a/man/profMat-xcmsSet.Rd +++ b/man/profMat-xcmsSet.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsRaw.R -\docType{methods} \name{profMat-xcmsSet} \alias{profMat-xcmsSet} \alias{profile-matrix} @@ -8,8 +7,7 @@ \alias{profMat,xcmsRaw-method} \title{The profile matrix} \usage{ -\S4method{profMat}{xcmsRaw}(object, method, step, baselevel, basespace, - mzrange.) +\S4method{profMat}{xcmsRaw}(object, method, step, baselevel, basespace, mzrange.) } \arguments{ \item{object}{The \code{\linkS4class{xcmsRaw}} object.} diff --git a/man/pval.Rd b/man/pval.Rd index 7ddf2cbe1..ffcaaf5bc 100755 --- a/man/pval.Rd +++ b/man/pval.Rd @@ -17,8 +17,5 @@ pval(X, classlabel, teststat) A numeric vector of p-values. } \author{Colin A. Smith, \email{csmith@scripps.edu}} -\seealso{ - \code{\link{mt.teststat}} -} \keyword{univar} \keyword{internal} diff --git a/man/reconstructChromPeakSpectra.Rd b/man/reconstructChromPeakSpectra.Rd new file mode 100644 index 000000000..2b476150f --- /dev/null +++ b/man/reconstructChromPeakSpectra.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{reconstructChromPeakSpectra} +\alias{reconstructChromPeakSpectra} +\title{Data independent acquisition (DIA): reconstruct MS2 spectra} +\usage{ +reconstructChromPeakSpectra( + object, + expandRt = 0, + diffRt = 2, + minCor = 0.8, + intensity = "maxo", + peakId = rownames(chromPeaks(object, msLevel = 1L)), + BPPARAM = bpparam(), + return.type = c("Spectra", "MSpectra") +) +} +\arguments{ +\item{object}{\code{XCMSnExp} with identified chromatographic peaks.} + +\item{expandRt}{\code{numeric(1)} allowing to expand the retention time range +for extracted ion chromatograms by a constant value (for the peak +shape correlation). Defaults to \code{expandRt = 0} hence correlates only +the signal included in the identified chromatographic peaks.} + +\item{diffRt}{\code{numeric(1)} defining the maximal allowed difference between +the retention time of the chromatographic peak (apex) and the retention +times of MS2 chromatographic peaks (apex) to consider them as +representing candidate fragments of the original ion.} + +\item{minCor}{\code{numeric(1)} defining the minimal required correlation +coefficient for MS2 chromatographic peaks to be considered for MS2 +spectrum reconstruction.} + +\item{intensity}{\code{character(1)} defining the column in the \code{chromPeaks} +matrix that should be used for the intensities of the reconstructed +spectra's peaks. The same value from the MS1 chromatographic peaks will +be used as \code{precursorIntensity} of the resulting spectra.} + +\item{peakId}{optional \code{character} vector with peak IDs (i.e. rownames of +\code{chromPeaks}) of MS1 peaks for which MS2 spectra should be reconstructed. +By default they are reconstructed for all MS1 chromatographic peaks.} + +\item{BPPARAM}{parallel processing setup. See \code{\link[=bpparam]{bpparam()}} for more +information.} + +\item{return.type}{\code{character(1)} defining the type of the returned object. +Only \code{return.type = "Spectra"} is supported, \code{return.type = "MSpectra"} +is deprecated.} +} +\value{ +\itemize{ +\item \code{Spectra} object (defined in the \code{Spectra} package) with the +reconstructed MS2 spectra for all MS1 peaks in \code{object}. Contains +empty spectra (i.e. without m/z and intensity values) for MS1 peaks for +which reconstruction was not possible (either no MS2 signal was recorded +or the correlation of the MS2 chromatographic peaks with the MS1 +chromatographic peak was below threshold \code{minCor}. Spectra variables +\code{"ms2_peak_id"} and \code{"ms2_peak_cor"} (of type \code{\link[=CharacterList]{CharacterList()}} +and \code{\link[=NumericList]{NumericList()}} with length equal to the number of peaks per +reconstructed MS2 spectrum) providing the IDs and the correlation of the +MS2 chromatographic peaks from which the MS2 spectrum was reconstructed. +As retention time the median retention times of all MS2 chromatographic +peaks used for the spectrum reconstruction is reported. The MS1 +chromatographic peak intensity is reported as the reconstructed +spectrum's \code{precursorIntensity} value (see parameter \code{intensity} above). +} +} +\description{ +\emph{Reconstructs} MS2 spectra for each MS1 chromatographic peak (if possible) +for data independent acquisition (DIA) data (such as SWATH). See the +\emph{LC-MS/MS analysis} vignette for more details and examples. +} +\details{ +In detail, the function performs for each MS1 chromatographic peak: +\itemize{ +\item Identify all MS2 chromatographic peaks from the isolation window +containing the m/z of the ion (i.e. the MS1 chromatographic peak) with +approximately the same retention time than the MS1 peak (accepted rt shift +can be specified with the \code{diffRt} parameter). +\item Correlate the peak shapes of the candidate MS2 chromatographic peaks with +the peak shape of the MS1 peak retaining only MS2 chromatographic peaks +for which the correlation is \verb{> minCor}. +\item Reconstruct the MS2 spectrum using the m/z of all above selected MS2 +chromatographic peaks and their intensity (either \code{"maxo"} or \code{"into"}). +Each MS2 chromatographic peak selected for an MS1 peak will thus represent +one \strong{mass peak} in the reconstructed spectrum. +} + +The resulting \code{Spectra} object provides also the peak IDs of the MS2 +chromatographic peaks for each spectrum as well as their correlation value +with spectra variables \emph{ms2_peak_id} and \emph{ms2_peak_cor}. +} +\seealso{ +\code{\link[=findChromPeaksIsolationWindow]{findChromPeaksIsolationWindow()}} for the function to perform MS2 +peak detection in DIA isolation windows and for examples. +} +\author{ +Johannes Rainer, Michael Witting +} diff --git a/man/refineChromPeaks-clean.Rd b/man/refineChromPeaks-clean.Rd new file mode 100644 index 000000000..555edb89f --- /dev/null +++ b/man/refineChromPeaks-clean.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Params.R, R/methods-XCMSnExp.R +\name{CleanPeaksParam} +\alias{CleanPeaksParam} +\alias{refineChromPeaks,XCMSnExp,CleanPeaksParam-method} +\alias{refineChromPeaks} +\alias{CleanPeaksParam-class} +\alias{show,CleanPeaksParam-method} +\title{Remove chromatographic peaks with too large rt width} +\usage{ +CleanPeaksParam(maxPeakwidth = 10) + +\S4method{refineChromPeaks}{XCMSnExp,CleanPeaksParam}(object, param = CleanPeaksParam(), msLevel = 1L) +} +\arguments{ +\item{maxPeakwidth}{for \code{CleanPeaksParam}: \code{numeric(1)} defining the maximal +allowed peak width (in retention time).} + +\item{object}{\link{XCMSnExp} object with identified chromatographic peaks.} + +\item{param}{\code{CleanPeaksParam} object defining the settings for the method.} + +\item{msLevel}{\code{integer} defining for which MS level(s) the chromatographic +peaks should be cleaned.} +} +\value{ +\code{XCMSnExp} object with chromatographic peaks exceeding the specified +maximal retention time width being removed. +} +\description{ +Remove chromatographic peaks with a retention time range larger than the +provided maximal acceptable width (\code{maxPeakwidth}). +} +\note{ +\code{refineChromPeaks} methods will always remove feature definitions, because +a call to this method can change or remove identified chromatographic peaks, +which may be part of features. +} +\examples{ + +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Distribution of chromatographic peak widths +quantile(chromPeaks(faahko_sub)[, "rtmax"] - chromPeaks(faahko_sub)[, "rtmin"]) + +## Remove all chromatographic peaks with a width larger 60 seconds +data <- refineChromPeaks(faahko_sub, param = CleanPeaksParam(60)) + +quantile(chromPeaks(data)[, "rtmax"] - chromPeaks(data)[, "rtmin"]) +} +\seealso{ +Other chromatographic peak refinement methods: +\code{\link{FilterIntensityParam}}, +\code{\link{MergeNeighboringPeaksParam}} +} +\author{ +Johannes Rainer +} +\concept{chromatographic peak refinement methods} diff --git a/man/refineChromPeaks-filter-intensity.Rd b/man/refineChromPeaks-filter-intensity.Rd new file mode 100644 index 000000000..226b62d24 --- /dev/null +++ b/man/refineChromPeaks-filter-intensity.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Params.R, R/methods-XCMSnExp.R +\name{FilterIntensityParam} +\alias{FilterIntensityParam} +\alias{refineChromPeaks,XCMSnExp,FilterIntensityParam-method} +\alias{FilterIntensityParam-class} +\alias{show,FilterIntensityParam-method} +\title{Remove chromatographic peaks based on intensity} +\usage{ +FilterIntensityParam(threshold = 0, nValues = 1L, value = "maxo") + +\S4method{refineChromPeaks}{XCMSnExp,FilterIntensityParam}( + object, + param = FilterIntensityParam(), + msLevel = 1L, + BPPARAM = bpparam() +) +} +\arguments{ +\item{threshold}{\code{numeric(1)} defining the minimal required intensity for +a peak to be retained. Defaults to \code{threshold = 0}.} + +\item{nValues}{\code{integer(1)} defining the number of data points (per +chromatographic peak) that have to be \verb{>= threshold}. Defaults to +\code{nValues = 1}.} + +\item{value}{\code{character(1)} specifying the column in \code{\link[=chromPeaks]{chromPeaks()}} that +should be used for the comparison. This is ignored for \code{nValues > 1}.} + +\item{object}{\link{XCMSnExp} object with identified chromatographic peaks.} + +\item{param}{\code{FilterIntensityParam} object defining the settings for +the method.} + +\item{msLevel}{\code{integer(1)} defining the MS level in which peaks should be +filtered.} + +\item{BPPARAM}{parameter object to set up parallel processing. Uses the +default parallel processing setup returned by \code{bpparam()}. See +\code{\link[=bpparam]{bpparam()}} for details and examples.} +} +\value{ +\code{XCMSnExp} object with filtererd chromatographic peaks. +} +\description{ +Remove chromatographic peaks with intensities below the specified threshold. +By default, with \code{nValues = 1}, all peaks with an intensity +\verb{>= threshold} are retained. Parameter \code{value} allows to specify the column of +the \code{\link[=chromPeaks]{chromPeaks()}} matrix that should be used for the filtering (defaults to +\code{value = "maxo"} and thus evaluating the maximal intensity for each peak). +With \code{nValues > 1} it is possible to keep only peaks that have \code{nValues} +intensities \verb{>= threshold}. Note that this requires data import from the +original MS files and run time of the call can thus be significantly larger. +Also, for \code{nValues > 1} parameter \code{value} is ignored. +} +\examples{ + +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Remove all peaks with a maximal intensity below 50000 +res <- refineChromPeaks(faahko_sub, param = FilterIntensityParam(threshold = 50000)) + +nrow(chromPeaks(faahko_sub)) +nrow(chromPeaks(res)) + +all(chromPeaks(res)[, "maxo"] > 50000) + +## Keep only chromatographic peaks that have 3 signals above 20000; we +## perform this on the data of a single file. +xdata <- filterFile(faahko_sub) + +res <- refineChromPeaks(xdata, FilterIntensityParam(threshold = 20000, nValues = 3)) +nrow(chromPeaks(xdata)) +nrow(chromPeaks(res)) +} +\seealso{ +Other chromatographic peak refinement methods: +\code{\link{CleanPeaksParam}}, +\code{\link{MergeNeighboringPeaksParam}} +} +\author{ +Johannes Rainer, Mar Garcia-Aloy +} +\concept{chromatographic peak refinement methods} diff --git a/man/refineChromPeaks-merge.Rd b/man/refineChromPeaks-merge.Rd new file mode 100644 index 000000000..fa5ed89f4 --- /dev/null +++ b/man/refineChromPeaks-merge.Rd @@ -0,0 +1,161 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Params.R, R/methods-XCMSnExp.R +\name{MergeNeighboringPeaksParam} +\alias{MergeNeighboringPeaksParam} +\alias{refineChromPeaks,XCMSnExp,MergeNeighboringPeaksParam-method} +\alias{MergeNeighboringPeaksParam-class} +\alias{show,MergeNeighboringPeaksParam-method} +\title{Merge neighboring and overlapping chromatographic peaks} +\usage{ +MergeNeighboringPeaksParam( + expandRt = 2, + expandMz = 0, + ppm = 10, + minProp = 0.75 +) + +\S4method{refineChromPeaks}{XCMSnExp,MergeNeighboringPeaksParam}( + object, + param = MergeNeighboringPeaksParam(), + msLevel = 1L, + BPPARAM = bpparam() +) +} +\arguments{ +\item{expandRt}{\code{numeric(1)} defining by how many seconds the retention time +window is expanded on both sides to check for overlapping peaks.} + +\item{expandMz}{\code{numeric(1)} constant value by which the m/z range of each +chromatographic peak is expanded (on both sides!) to check for +overlapping peaks.} + +\item{ppm}{\code{numeric(1)} defining a m/z relative value (in parts per million) +by which the m/z range of each chromatographic peak is expanded +to check for overlapping peaks.} + +\item{minProp}{\code{numeric(1)} between \code{0} and \code{1} representing the proporion +of intensity to be required for peaks to be joined. See description for +more details. The default (\code{minProp = 0.75}) means that peaks are only +joined if the signal half way between then is larger 75\% of the smallest +of the two peak's \code{"maxo"} (maximal intensity at peak apex).} + +\item{object}{\link{XCMSnExp} object with identified chromatographic peaks.} + +\item{param}{\code{MergeNeighboringPeaksParam} object defining the settings for +the method.} + +\item{msLevel}{\code{integer} defining for which MS level(s) the chromatographic +peaks should be merged.} + +\item{BPPARAM}{parameter object to set up parallel processing. Uses the +default parallel processing setup returned by \code{bpparam()}. See +\code{\link[=bpparam]{bpparam()}} for details and examples.} +} +\value{ +\code{XCMSnExp} object with chromatographic peaks matching the defined +conditions being merged. +} +\description{ +Peak detection sometimes fails to identify a chromatographic peak correctly, +especially for broad peaks and if the peak shape is irregular (mostly for +HILIC data). In such cases several smaller peaks are reported. Also, peak +detection can result in partially or completely overlapping peaks. To reduce +such peak detection artifacts, this function merges chromatographic peaks +which are overlapping or close in rt and m/z dimension considering also the +measured signal intensities in the region between them. + +Chromatographic peaks are first expanded in m/z and retention time dimension +(based on parameters \code{expandMz}, \code{ppm} and \code{expandRt}) and subsequently +grouped into sets of merge candidates if they are (after expansion) +overlapping in both m/z and rt (within the same sample). +Candidate peaks are merged if the average intensity of the 3 data +points in the middle position between them (i.e. at half the distance between +\code{"rtmax"} of the first and \code{"rtmin"} of the second peak) is larger than a +certain proportion (\code{minProp}) of the smaller maximal intensity (\code{"maxo"}) +of both peaks. In cases in which this calculated mid point is \strong{not} +located between the apexes of the two peaks (e.g. if the peaks are largely +overlapping) the average signal intensity at half way between the apexes is +used instead. Candidate peaks are not joined if all 3 data points between +them have \code{NA} intensities. +The joined peaks get the \code{"mz"}, \code{"rt"}, \code{"sn"} and \code{"maxo"} values from +the peak with the largest signal (\code{"maxo"}) as well as its row in the +metadata data frame of the peak (\code{chromPeakData}). The \code{"rtmin"}, \code{"rtmax"} +of the merged peaks are updated and \code{"into"} is recalculated based on all +the signal between \code{"rtmin"} and \code{"rtmax"} of the new merged peak. See +details for information on the \code{"mzmin"} and \code{"mzmax"} values of the merged +peak. +} +\details{ +For each set of candidate peaks an ion chromatogram is +extracted using the range of retention times and m/z values of these peaks. +The m/z range for the extracted ion chromatogram is expanded by \code{expandMz} +and \code{ppm} (on both sides) to reduce the possibility of missing signal +intensities between candidate peaks (variance of measured m/z values for +lower intensities is larger than for higher intensities and thus data points +not being part of identified chromatographic peaks tend to have m/z values +outside of the m/z range of the candidate peaks - especially for ToF +instruments). This also ensures that all data points from the same ion are +considered for the peak integration of merged peaks. The smallest and largest +m/z value of all data points used in the peak integration of the merged peak +are used as the merged peak's m/z range (i.e. columns \code{"mzmin"} and \code{"mzmax"}). +} +\note{ +Note that \strong{each} peak gets expanded by \code{expandMz} and \code{expandRt}, thus +peaks differing by \code{2 * expandMz} (or \code{expandRt}) will be identified as +\emph{overlapping}. As an example: m/z max of one peak is 12.2, m/z min of +another one is 12.4, if \code{expandMz = 0.1} the m/z max of the first peak +will be 12.3 and the m/z min of the second one 12.3, thus both are +considered overlapping. + +\code{refineChromPeaks} methods will always remove feature definitions, because +a call to this method can change or remove identified chromatographic peaks, +which may be part of features. + +Merging of chromatographic peaks is performed along the retention time axis, +i.e. candidate peaks are first ordered by their \code{"rtmin"} value. The signals +at half way between the first and the second candidate peak are then compared +to the smallest \code{"maxo"} of both and the two peaks are then merged if the +average signal between the peaks is larger \code{minProp}. For merging any +additional peak in a candidate peak list the \code{"maxo"} of that peak and the +newly merged peak are considered. +} +\examples{ + +## Load a test data set with detected peaks +data(faahko_sub) +## Update the path to the files for the local system +dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") + +## Disable parallel processing for this example +register(SerialParam()) + +## Subset to a single file +xd <- filterFile(faahko_sub, file = 1) + +## Example of a split peak that will be merged +mzr <- 305.1 + c(-0.01, 0.01) +chr <- chromatogram(xd, mz = mzr, rt = c(2700, 3700)) +plot(chr) + +## Combine the peaks +res <- refineChromPeaks(xd, param = MergeNeighboringPeaksParam(expandRt = 4)) +chr_res <- chromatogram(res, mz = mzr, rt = c(2700, 3700)) +plot(chr_res) + +## Example of a peak that was not merged, because the signal between them +## is lower than the cut-off minProp +mzr <- 496.2 + c(-0.01, 0.01) +chr <- chromatogram(xd, mz = mzr, rt = c(3200, 3500)) +plot(chr) +chr_res <- chromatogram(res, mz = mzr, rt = c(3200, 3500)) +plot(chr_res) +} +\seealso{ +Other chromatographic peak refinement methods: +\code{\link{CleanPeaksParam}}, +\code{\link{FilterIntensityParam}} +} +\author{ +Johannes Rainer, Mar Garcia-Aloy +} +\concept{chromatographic peak refinement methods} diff --git a/man/removeIntensity-Chromatogram.Rd b/man/removeIntensity-Chromatogram.Rd new file mode 100644 index 000000000..18cc89a16 --- /dev/null +++ b/man/removeIntensity-Chromatogram.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-Chromatogram.R, +% R/methods-MChromatograms.R, R/methods-XChromatogram.R +\name{removeIntensity,Chromatogram-method} +\alias{removeIntensity,Chromatogram-method} +\alias{removeIntensity} +\alias{removeIntensity,MChromatograms-method} +\alias{removeIntensity,XChromatogram-method} +\title{Remove intensities from chromatographic data} +\usage{ +\S4method{removeIntensity}{Chromatogram}(object, which = "below_threshold", threshold = 0) + +\S4method{removeIntensity}{MChromatograms}(object, which = "below_threshold", threshold = 0) + +\S4method{removeIntensity}{XChromatogram}( + object, + which = c("below_threshold", "outside_chromPeak"), + threshold = 0 +) +} +\arguments{ +\item{object}{an object representing chromatographic data. Can be a +\code{\link[=Chromatogram]{Chromatogram()}}, \code{\link[=MChromatograms]{MChromatograms()}}, \code{\link[=XChromatogram]{XChromatogram()}} or +\code{\link[=XChromatograms]{XChromatograms()}} object.} + +\item{which}{\code{character(1)} defining the condition to remove intensities. +See description for details and options.} + +\item{threshold}{\code{numeric(1)} defining the threshold below which intensities +are removed (if \code{which = "below_threshold"}).} +} +\value{ +the input object with matching intensities being replaced by \code{NA}. +} +\description{ +\code{removeIntensities} allows to remove intensities from chromatographic data +matching certain conditions (depending on parameter \code{which}). The +intensities are actually not \emph{removed} but replaced with \code{NA_real_}. To +actually \strong{remove} the intensities (and the associated retention times) +use \code{\link[=clean]{clean()}} afterwards. + +Parameter \code{which} allows to specify which intensities should be replaced by +\code{NA_real_}. By default (\code{which = "below_threshod"} intensities below +\code{threshold} are removed. If \code{x} is a \code{XChromatogram} or \code{XChromatograms} +object (and hence provides also chromatographic peak definitions within the +object) \code{which = "outside_chromPeak"} can be selected which removes any +intensity which is outside the boundaries of identified chromatographic +peak(s) in the chromatographic data. + +Note that \code{\link[=filterIntensity]{filterIntensity()}} might be a better approach to subset/filter +chromatographic data. +} +\examples{ + +chr <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + +## Remove all intensities below 20 +res <- removeIntensity(chr, threshold = 20) +intensity(res) +} +\author{ +Johannes Rainer +} diff --git a/man/rla.Rd b/man/rla.Rd index 0f52ed4ac..cf5cabdd7 100644 --- a/man/rla.Rd +++ b/man/rla.Rd @@ -3,10 +3,7 @@ \name{rla} \alias{rla} \alias{rowRla} -\title{Calculate relative log abundances - -\code{rla} calculates the relative log abundances (RLA, see reference) on a -\code{numeric} vector.} +\title{Calculate relative log abundances} \usage{ rla(x, group, log.transform = TRUE) @@ -28,12 +25,8 @@ Set to \code{log.transform = FALSE} if \code{x} is already in log scale.} the same dimensions than \code{x} (for \code{rowRla}). } \description{ -Calculate relative log abundances - \code{rla} calculates the relative log abundances (RLA, see reference) on a \code{numeric} vector. - -\code{rowRla} calculates row-wise RLAs. } \details{ The RLA is defines as the (log) abundance of an analyte relative diff --git a/man/showError-xcmsSet-method.Rd b/man/showError-xcmsSet-method.Rd index d841f8df9..b0af4f6a3 100644 --- a/man/showError-xcmsSet-method.Rd +++ b/man/showError-xcmsSet-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsSet.R -\docType{methods} \name{showError,xcmsSet-method} \alias{showError,xcmsSet-method} \alias{showError} diff --git a/man/specDist.cosine-methods.Rd b/man/specDist.cosine-methods.Rd index 007416a9d..35c6e6bd8 100644 --- a/man/specDist.cosine-methods.Rd +++ b/man/specDist.cosine-methods.Rd @@ -3,16 +3,17 @@ \alias{specDist.cosine} \alias{specDist.cosine,matrix,matrix-method} \title{a Distance function based on matching peaks} -\description{This method calculates the distance of two sets of peaks using the cosine-distance.} +\description{This method calculates the distance of two sets of peaks + using the cosine-distance.} \section{Methods}{ \describe{ -\item{peakTable1 = "matrix", peakTable2 = "matrix"}{ - \code{ - specDist.cosine(peakTable1, peakTable2, mzabs = 0.001, mzppm = 10, - mzExp = 0.6, intExp = 3, nPdiff = 2, nPmin = 8, - symmetric = FALSE) + \item{peakTable1 = "matrix", peakTable2 = "matrix"}{ + \code{ + specDist.cosine(peakTable1, peakTable2, mzabs = 0.001, mzppm = 10, + mzExp = 0.6, intExp = 3, nPdiff = 2, nPmin = 8, + symmetric = FALSE) } -} + } }} \details{ The result is the cosine-distance of the product from weighted factors @@ -35,7 +36,8 @@ peaks were found) the return-value is NA. \item{nPmin}{the minimum absolute sum of peaks from both praktables} } \usage{ -specDist.cosine(peakTable1, peakTable2, mzabs=0.001, mzppm=10, mzExp=0.6, intExp=3, nPdiff=2, nPmin=8, symmetric=FALSE) +specDist.cosine(peakTable1, peakTable2, mzabs=0.001, mzppm=10, mzExp=0.6, + intExp=3, nPdiff=2, nPmin=8, symmetric=FALSE) } \author{Joachim Kutzera, \email{jkutzer@ipb-halle.de}} \keyword{methods} diff --git a/man/specDist.meanMZmatch-methods.Rd b/man/specDist.meanMZmatch-methods.Rd index f0eb8d531..5c85df7b7 100644 --- a/man/specDist.meanMZmatch-methods.Rd +++ b/man/specDist.meanMZmatch-methods.Rd @@ -32,7 +32,8 @@ were found) the return-value is NA. \item{matchrate}{the weight for value two} } \usage{ -specDist.meanMZmatch(peakTable1, peakTable2, matchdist=1, matchrate=1, mzabs=0.001, mzppm=10, symmetric=TRUE) +specDist.meanMZmatch(peakTable1, peakTable2, matchdist=1, matchrate=1, + mzabs=0.001, mzppm=10, symmetric=TRUE) } \author{Joachim Kutzera, \email{jkutzer@ipb-halle.de}} \keyword{methods} diff --git a/man/split.xcmsRaw.Rd b/man/split.xcmsRaw.Rd index 1b2fa8121..06b72d027 100755 --- a/man/split.xcmsRaw.Rd +++ b/man/split.xcmsRaw.Rd @@ -28,7 +28,7 @@ \value{ A list of \code{xcmsRaw} objects. } -\author{Steffen Neumann, \email{sneumann(at)ipb-halle.de}} +\author{Steffen Neumann, \email{sneumann@ipb-halle.de}} \seealso{ \code{\link{xcmsRaw-class}} } diff --git a/man/stitch-methods.Rd b/man/stitch-methods.Rd index d281d4958..26ca62634 100644 --- a/man/stitch-methods.Rd +++ b/man/stitch-methods.Rd @@ -50,27 +50,29 @@ } \author{Paul Benton, \email{hpaul.benton08@imperial.ac.uk}} \examples{ - \dontrun{library(xcms) - library(faahKO) ## These files do not have this problem to correct for but just for an example - cdfpath <- system.file("cdf", package = "faahKO") - cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - xr<-xcmsRaw(cdffiles[1]) - xr - ##Lets assume that the lockmass starts at 1 and is every 100 scans - lockMass<-xcms:::makeacqNum(xr, freq=100, start=1) - ## these are equcal - lockmass<-AutoLockMass(xr) - ob<-stitch(xr, lockMass) - ob +\dontrun{library(xcms) + library(faahKO) + ## These files do not have this problem to correct for but just + ## for an example + cdfpath <- system.file("cdf", package = "faahKO") + cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) + xr<-xcmsRaw(cdffiles[1]) + xr + ##Lets assume that the lockmass starts at 1 and is every 100 scans + lockMass<-xcms:::makeacqNum(xr, freq=100, start=1) + ## these are equcal + lockmass<-AutoLockMass(xr) + ob<-stitch(xr, lockMass) + ob - #plot the old data before correction - foo<-rawEIC(xr, m=c(200,210), scan=c(80,140)) - plot(foo$scan, foo$intensity, type="h") + ## plot the old data before correction + foo<-rawEIC(xr, m=c(200,210), scan=c(80,140)) + plot(foo$scan, foo$intensity, type="h") - #plot the new corrected data to see what changed - foo<-rawEIC(ob, m=c(200,210), scan=c(80,140)) - plot(foo$scan, foo$intensity, type="h") - } + ## plot the new corrected data to see what changed + foo<-rawEIC(ob, m=c(200,210), scan=c(80,140)) + plot(foo$scan, foo$intensity, type="h") +} } \keyword{manip} \keyword{methods} diff --git a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd index 702085deb..1bc020a97 100644 --- a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd +++ b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsRaw.R -\docType{methods} \name{[,xcmsRaw,logicalOrNumeric,missing,missing-method} \alias{[,xcmsRaw,logicalOrNumeric,missing,missing-method} \alias{subset-xcmsRaw} @@ -37,7 +36,7 @@ Only subsetting by scan index in increasing order or by a logical \examples{ ## Load a test file file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -xraw <- xcmsRaw(file) +xraw <- xcmsRaw(file, profstep = 0) ## The number of scans/spectra: length(xraw@scantime) diff --git a/man/updateObject-xcmsSet-method.Rd b/man/updateObject-xcmsSet-method.Rd index 726025943..08f418ca9 100644 --- a/man/updateObject-xcmsSet-method.Rd +++ b/man/updateObject-xcmsSet-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsSet.R -\docType{methods} \name{updateObject,xcmsSet-method} \alias{updateObject,xcmsSet-method} \title{Update an \code{\linkS4class{xcmsSet}} object} diff --git a/man/writeMSData-XCMSnExp-character-method.Rd b/man/writeMSData-XCMSnExp-character-method.Rd index 83e1bb873..29edc5005 100644 --- a/man/writeMSData-XCMSnExp-character-method.Rd +++ b/man/writeMSData-XCMSnExp-character-method.Rd @@ -1,13 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R -\docType{methods} \name{writeMSData,XCMSnExp,character-method} \alias{writeMSData,XCMSnExp,character-method} \title{Export MS data to mzML/mzXML files} \usage{ -\S4method{writeMSData}{XCMSnExp,character}(object, file, - outformat = c("mzml", "mzxml"), copy = FALSE, - software_processing = NULL, ...) +\S4method{writeMSData}{XCMSnExp,character}( + object, + file, + outformat = c("mzml", "mzxml"), + copy = FALSE, + software_processing = NULL, + ... +) } \arguments{ \item{object}{\link{XCMSnExp} object with the mass spectrometry data.} diff --git a/man/xcms-deprecated.Rd b/man/xcms-deprecated.Rd index f67e1d7c9..e2d0539b7 100644 --- a/man/xcms-deprecated.Rd +++ b/man/xcms-deprecated.Rd @@ -1,9 +1,5 @@ \name{xcms-deprecated} \alias{xcms-deprecated} -\alias{extractChromatograms} -\alias{extractChromatograms,OnDiskMSnExp-method} -\alias{extractChromatograms,XCMSnExp-method} -\alias{plotChromatogram} \title{Deprecated functions in package \sQuote{xcms}} \description{ @@ -16,24 +12,13 @@ The following functions/methods are deprecated. \itemize{ - \item{\code{xcmsPapply}: this function is no longer available and the use - of \code{\link{bplapply}} is suggested.} - \item{\code{profBin}, \code{profBinM}, \code{profBinLin}, \code{profBinLinM}, \code{profBinLinBase}, \code{profBinLinBaseM} have been deprecated and \code{\link{binYonX}}} in combination with \code{\link{imputeLinInterpol}} should be used instead. - \item{\code{extractChromatograms}}: replaced by \code{\link{chromatogram}}. - - \item{\code{plotChromatogram}}: replaced by \code{plot} method for - \code{\link{Chromatogram}} or - \code{\link{Chromatograms}} objects. - \item{\code{extractMsData}}: replaced by \code{as(x, "data.frame")}. \item{\code{plotMsData}}: replaced by \code{plot(x, type = "XIC")}. } } - - diff --git a/man/xcmsFragments-class.Rd b/man/xcmsFragments-class.Rd index ee1f019ca..87e3f97a5 100755 --- a/man/xcmsFragments-class.Rd +++ b/man/xcmsFragments-class.Rd @@ -43,14 +43,7 @@ } } } -\references{ - A parallel effort in metabolite profiling data sharing: - \url{http://metlin.scripps.edu/} -} \author{S. Neumann, J. Kutzera} -\note{ - No notes yet. -} \seealso{ \code{\link{xcmsRaw}} } diff --git a/man/xcmsPapply.Rd b/man/xcmsPapply.Rd deleted file mode 100644 index b602e5bbc..000000000 --- a/man/xcmsPapply.Rd +++ /dev/null @@ -1,79 +0,0 @@ -\name{xcmsPapply} -\alias{xcmsPapply} -\title{\emph{Deprecated}: xcmsPapply } -\description{ - This function is deprecated, use \code{\link{bplapply}} - instead. - - An apply-like function which uses Rmpi to distribute - the processing evenly across a cluster. Will use a non-MPI - version if distributed processing is not available. -} -\usage{ - xcmsPapply(arg_sets, papply_action, papply_commondata = list(), - show_errors = TRUE, do_trace = FALSE, also_trace = c()) -} -\arguments{ - \item{arg_sets}{ a list, where each item will be given as an argument - to papply\_action } - \item{papply_action}{ A function which takes one argument. It will be - called on each element of arg\_sets } - \item{papply_commondata}{ A list containing the names and values of - variables to be accessible to the papply\_action. - 'attach' is used locally to import this list. } - \item{show_errors}{ If set to TRUE, overrides Rmpi's default, and - messages for errors which occur in R slaves are - produced. } - \item{do_trace}{ If set to TRUE, causes the papply\_action function to - be traced. i.e. Each statement is output before it is - executed by the slaves. } - \item{also_trace}{ If supplied an array of function names, as strings, - tracing will also occur for the specified functions. } - -} -\details{ - Similar to apply and lapply, applies a function to all items - of a list, and returns a list with the corresponding results. - - Uses Rmpi to implement a pull idiom in order to distribute - the processing evenly across a cluster. - If Rmpi is not available, or there are no slaves, - implements this as a non-parallel algorithm. - - \code{xcmsPapply} is a modified version of the papply function from package papply 0.2 (Duane Currie). - Parts of the slave function were wrapped in try() to make it failsafe and progress output was added. - - Make sure \code{Rmpi} was installed properly by executing the example below. - \code{Rmpi} was tested with - \itemize{ - \item OpenMPI : Unix, \url{http://www.open-mpi.org/}, don't forget to export \code{MPI_ROOT} before installing Rmpi e.g. \code{export MPI_ROOT=/usr/lib/openmpi} - \item DeinoMPI : Windows, \url{http://mpi.deino.net/}, also see \url{http://www.stats.uwo.ca/faculty/yu/Rmpi/} - } -} - -\value{ - A list of return values from papply\_action. Each value corresponds to - the element of arg\_sets used as a parameter to papply\_action -} -\references{ \url{http://ace.acadiau.ca/math/ACMMaC/software/papply/} } -\author{ Duane Currie , modified by Ralf Tautenhahn . } -\note{ Does not support distributing recursive calls in parallel. If - papply is used inside papply\_action, it will call a non-parallel - version } - -\examples{ -\dontrun{ -library(Rmpi) -library(xcms) - -number_lists <- list(1:10,4:40,2:27) - -mpi.spawn.Rslaves(nslaves=2) - -results <- xcmsPapply(number_lists,sum) -results - -mpi.close.Rslaves() - -} -} diff --git a/man/xcmsRaw-class.Rd b/man/xcmsRaw-class.Rd index f68cb9221..073bdbb32 100755 --- a/man/xcmsRaw-class.Rd +++ b/man/xcmsRaw-class.Rd @@ -12,6 +12,8 @@ \alias{sortMz,xcmsRaw-method} \alias{sortMz} \alias{levelplot} +\alias{findmzROI} +\alias{findmzROI,xcmsRaw-method} \title{Class xcmsRaw, a class for handling raw data} \description{ @@ -209,22 +211,20 @@ \item{sortMz}{ \code{signature(object = "xcmsRaw")}: sort the data points by increasing m/z for each scan - } - \item{stitch}{ - \code{signature(object = "xcmsRaw")}: Raw data correction for - lock mass calibration gaps. - } - } -} -\references{ - A parallel effort in metabolite profiling data sharing: - \url{http://metlin.scripps.edu/} + } + \item{stitch}{ + \code{signature(object = "xcmsRaw")}: Raw data correction for + lock mass calibration gaps. + } + \item{findmzROI}{ + \code{signature(object = "xcmsRaw")}: + internal function to identify regions of interest in the raw + data as part of the first step of centWave-based peak detection. + } + } } \author{Colin A. Smith, \email{csmith@scripps.edu}, Johannes Rainer \email{johannes.rainer@eurac.edu}} -\note{ - No notes yet. -} \seealso{ \code{\link{xcmsRaw}}, \code{\link{subset-xcmsRaw}} for subsetting by spectra. } diff --git a/man/xcmsRaw.Rd b/man/xcmsRaw.Rd index 3d8656a34..160adb836 100755 --- a/man/xcmsRaw.Rd +++ b/man/xcmsRaw.Rd @@ -45,7 +45,7 @@ deepCopy(object) } \references{ NetCDF file format: - \url{http://my.unidata.ucar.edu/content/software/netcdf/} + \url{https://www.unidata.ucar.edu/software/netcdf/} \url{http://www.astm.org/Standards/E2077.htm} \url{http://www.astm.org/Standards/E2078.htm} @@ -61,33 +61,34 @@ deepCopy(object) } \author{Colin A. Smith, \email{csmith@scripps.edu}} \examples{ - \dontrun{ - library(xcms) - library(faahKO) - cdfpath <- system.file("cdf", package = "faahKO") - cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - xr<-xcmsRaw(cdffiles[1]) - xr - ##This gives some information about the file - names(attributes(xr)) - ## Lets have a look at the structure of the object +\dontrun{ + library(xcms) + library(faahKO) + cdfpath <- system.file("cdf", package = "faahKO") + cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) + xr<-xcmsRaw(cdffiles[1]) + xr + ##This gives some information about the file + names(attributes(xr)) + ## Lets have a look at the structure of the object - str(xr) - ##same but with a preview of each slot in the object - ##SO... lets have a look at how this works - head(xr@scanindex) - #[1] 0 429 860 1291 1718 2140 - xr@env$mz[425:430] - #[1] 596.3 597.0 597.3 598.1 599.3 200.1 - ##We can see that the 429 index is the last mz of scan 1 therefore... + str(xr) + ##same but with a preview of each slot in the object + ##SO... lets have a look at how this works + head(xr@scanindex) + ##[1] 0 429 860 1291 1718 2140 + xr@env$mz[425:430] + ##[1] 596.3 597.0 597.3 598.1 599.3 200.1 + ##We can see that the 429 index is the last mz of scan 1 therefore... - mz.scan1<-xr@env$mz[(1+xr@scanindex[1]):xr@scanindex[2]] - intensity.scan1<-xr@env$intensity[(1+xr@scanindex[1]):xr@scanindex[2]] - plot(mz.scan1, intensity.scan1, type="h", main=paste("Scan 1 of file", basename(cdffiles[1]), sep="")) - ##the easier way :p - scan1<-getScan(xr, 1) - head(scan1) - plotScan(xr, 1) + mz.scan1<-xr@env$mz[(1+xr@scanindex[1]):xr@scanindex[2]] + intensity.scan1<-xr@env$intensity[(1+xr@scanindex[1]):xr@scanindex[2]] + plot(mz.scan1, intensity.scan1, type="h", + main=paste("Scan 1 of file", basename(cdffiles[1]), sep="")) + ##the easier way :p + scan1<-getScan(xr, 1) + head(scan1) + plotScan(xr, 1) } } diff --git a/man/xcmsSet-class.Rd b/man/xcmsSet-class.Rd index 3e8f9348c..75be7acdb 100755 --- a/man/xcmsSet-class.Rd +++ b/man/xcmsSet-class.Rd @@ -286,14 +286,7 @@ slot (e.g. use \code{xset$name} on a \code{xcmsSet} object called \dQuote{xset} to extract the values from a column named \dQuote{name} in the \code{phenoData} slot). } -\references{ - A parallel effort in metabolite profiling data sharing: - \url{http://metlin.scripps.edu/} -} \author{Colin A. Smith, \email{csmith@scripps.edu}, Johannes Rainer \email{johannes.rainer@eurac.edu}} -\note{ - No notes yet. -} \seealso{ \code{\link{xcmsSet}} } diff --git a/man/xcmsSet.Rd b/man/xcmsSet.Rd index a3d6f9e3c..cd7b8acf4 100755 --- a/man/xcmsSet.Rd +++ b/man/xcmsSet.Rd @@ -101,7 +101,6 @@ xcmsSet(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL, \code{\link{findPeaks}}, \code{\link{profStep}}, \code{\link{profMethod}}, - \code{\link{profBin}}, - \code{\link{xcmsPapply}} + \code{\link{profBin}} } \keyword{file} diff --git a/man/xdata.Rd b/man/xdata.Rd new file mode 100644 index 000000000..7fefdfd5a --- /dev/null +++ b/man/xdata.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xdata.R +\name{xdata} +\alias{xdata} +\title{LC-MS preprocessing result test data} +\description{ +The `xdata` variable represent the results from a `xcms`-based +pre-processing of an LC-MS untargeted metabolomics data set. The raw data +files are provided in the `faahKO` package. The pre-processing of this data +set is described in detail in the *xcms* vignette of the `xcms` package. +} diff --git a/readme.org b/readme.org deleted file mode 100644 index 823bc9ffd..000000000 --- a/readme.org +++ /dev/null @@ -1,39 +0,0 @@ -#+HTML:

- - -* The =xcms3= package - -The =xcms3= package is an updated and partially re-written version of the =xcms= -package. The version /3/ was selected to avoid confusions with the /xcms2/ -(http://pubs.acs.org/doi/abs/10.1021/ac800795f) software. While providing all -of the original software's functionality, =xcms3= aims at: - -1) Better integration into the Bioconductor framework: - - Make use and extend classes defined in the =MSnbase= package. - - Implement class versioning (Biobase's =Versioned= class). - - Use =BiocParallel= for parallel processing. -2) Implementation of validation methods for all classes to ensure data - integrity. -3) Easier and faster access to raw spectra data. -4) Cleanup of the source code: - - Remove obsolete and redundant functionality (=getEIC=, =rawEIC= etc). - - Unify interfaces, i.e. implement a layer of base functions accessing all - analysis methods (which are implemented in C, C++ or R). -5) Using a more consistent naming scheme of methods that follows established - naming conventions (e.g. =correspondence= instead of =grouping=). -6) Update, improve and extend the documentation. -7) Establishing a layer of base R-functions that interface all analysis - methods. These should take M/Z, retention time (or scan index) and intensity - values as input along with optional arguments for the downstream functions - (implemented in C, C++ or R). The input arguments should be basic R objects - (numeric vectors) thus enabling easy integration of analysis methods in other - R packages. -8) The user interface's analysis methods should take the (raw) data object and a - parameter class, that is used for dispatching to the corresponding analysis - algorithm. - -Discussions and suggestions are welcome: -https://github.com/sneumann/xcms/issues - -For more information see also the [[file:vignettes/new_functionality.Rmd]] file, for -usage and description the [[file:vignettes/xcms.Rmd]] vignette. diff --git a/src/massifquant/nmath.h b/src/massifquant/nmath.h index e1ee22397..114232786 100644 --- a/src/massifquant/nmath.h +++ b/src/massifquant/nmath.h @@ -74,7 +74,7 @@ void R_CheckUserInterrupt(void); #include #define _(String) gettext (String) #else -#define _(String) (String) +#define _(String) (char *)(String) #endif #else diff --git a/src/mzROI.c b/src/mzROI.c index f50cc138c..045bc0c94 100644 --- a/src/mzROI.c +++ b/src/mzROI.c @@ -188,7 +188,7 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, double ddev = (pickOptions->dev * fMass); int lpos = lower_bound( fMass - ddev,mzval,0,mzLength->mzval); int hpos = upper_bound( fMass + ddev,mzval,lpos,mzLength->mzval - lpos); - + if (lpos > mzLength->mzval-1) lpos = mzLength->mzval -1; if (hpos > mzLength->mzval-1) @@ -217,10 +217,10 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, mzval[i].kI++; } } // for - + // if not found if (wasfound == FALSE) { // no, create new ROI for mz - + lpos=-1;hpos=-1; int doInsert=FALSE; if ((scan < LastScan) && (scanbuf->nextScanLength > 0)) {// check next scan @@ -231,7 +231,7 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, { ddev = (pickOptions->dev * scanbuf->nextScan[i]); double ddiff = fabs(fMass - scanbuf->nextScan[i]); - + if (ddiff <= ddev) { doInsert=TRUE; @@ -241,7 +241,7 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, } } else doInsert=TRUE; - + if (doInsert == TRUE) { // get pos. for insert int i = lower_bound(fMass,mzval,0,mzLength->mzval); @@ -252,7 +252,7 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, // insert element if (n>0) memmove(mzval + i +1, mzval + i, n*sizeof(struct mzROIStruct)); - + mzval[i].mz = fMass; mzval[i].mzmin = fMass; mzval[i].mzmax = fMass; @@ -264,11 +264,11 @@ struct mzROIStruct * insertpeak(const double fMass, const double fInten, mzval[i].kI = 1; else mzval[i].kI = 0; mzval[i].deleteMe = FALSE; - + mzLength->mzval++; } } - + return(mzval); } @@ -545,6 +545,58 @@ SEXP getMZ(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, SEXP scanrange return(res); } +SEXP getWeightedMZ(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, + SEXP scanrange, SEXP lastscan) { + double *pmz, *p_res, mzrangeFrom, mzrangeTo, *pintensity, numerator, + denominator; + int i,*pscanindex,scanrangeFrom, scanrangeTo,ilastScan,nmz,ctScan,buflength; + SEXP res; + pmz = REAL(mz); + pintensity = REAL(intensity); + nmz = GET_LENGTH(mz); + pscanindex = INTEGER(scanindex); + int firstScan = 1; // is always 1 + ilastScan = INTEGER(lastscan)[0]; + mzrangeFrom = REAL(mzrange)[0]; + mzrangeTo = REAL(mzrange)[1]; + scanrangeFrom = INTEGER(scanrange)[0]; + scanrangeTo = INTEGER(scanrange)[1]; + if ((scanrangeFrom < firstScan) || (scanrangeFrom > ilastScan) || + (scanrangeTo < firstScan) || (scanrangeTo > ilastScan)) + error("Error in scanrange \n"); + + buflength = scanrangeTo - scanrangeFrom +1; + PROTECT(res = NEW_NUMERIC(buflength)); + p_res = NUMERIC_POINTER(res); + + i=0; + for (ctScan = scanrangeFrom; ctScan <= scanrangeTo; ctScan++) { + int idx,idx1,idx2; + idx1 = pscanindex[ctScan -1] +1; + if (ctScan == ilastScan) idx2 = nmz-1; + else idx2 = pscanindex[ctScan]; + int idx1b = lowerBound(mzrangeFrom, pmz, idx1-1, idx2-idx1-1); + int idx2b = upperBound(mzrangeTo, pmz, idx1b, idx2-idx1b-1); + + numerator = 0.0; + denominator = 0.0; + p_res[i] = 0.0; + for (idx = idx1b; idx <= idx2b; idx++) { + double mzval = pmz[idx]; + if ((mzval <= mzrangeTo) && (mzval >= mzrangeFrom)) { + numerator = numerator + mzval * pintensity[idx]; + denominator = denominator + pintensity[idx]; + } + } + if (denominator > 0) { + p_res[i] = numerator / denominator; + } + i++; + } + UNPROTECT(1); + return(res); +} + SEXP findmzROI(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, SEXP scanrange, SEXP lastscan, SEXP dev, SEXP minEntries, SEXP prefilter, SEXP noise) { diff --git a/tests/testthat.R b/tests/testthat.R index 9231d5023..45745e014 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -3,14 +3,11 @@ library(xcms) library(faahKO) library(msdata) -attr(faahko, "filepaths") <- sapply( - as.list(basename(attr(faahko, "filepaths"))), - function(x) system.file("cdf", if (length(grep("ko",x)) > 0) "KO" else "WT", - x, package = "faahKO")) if (.Platform$OS.type == "unix") { - prm <- MulticoreParam(2) + prm <- MulticoreParam(3) } else { - prm <- SnowParam(2) + # prm <- SnowParam(3) + prm <- SerialParam() } register(bpstart(prm)) @@ -19,17 +16,13 @@ faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), system.file('cdf/KO/ko16.CDF', package = "faahKO"), system.file('cdf/KO/ko18.CDF', package = "faahKO")) -## An xcmsRaw for the first file: -faahko_xr_1 <- xcmsRaw(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - profstep = 0) faahko_od <- readMSData(faahko_3_files, mode = "onDisk") -faahko_xod <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 10000, - snthresh = 40)) -faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), - method = "centWave", noise = 10000, snthresh = 40) -faahko_xsg <- group(faahko_xs) -## Doing also the retention time correction etc +faahko_xod <- findChromPeaks( + faahko_od, param = CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000))) od_x <- faahko_od +mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +od_chrs <- chromatogram(od_x, mz = mzr) xod_x <- faahko_xod pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) xod_xg <- groupChromPeaks(xod_x, param = pdp) @@ -37,32 +30,40 @@ xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) xod_xgrg <- groupChromPeaks(xod_xgr, param = pdp) xod_r <- adjustRtime(as(od_x, "XCMSnExp"), param = ObiwarpParam()) -faahko_grouped_filled <- fillPeaks(group(faahko)) -faahko_grouped_retcor_filled <- - fillPeaks(group(retcor(group(updateObject(faahko))))) - +xod_chr <- findChromPeaks(filterMz(filterRt(od_x, rt = c(2500, 3500)), + mz = c(334.9, 344.1)), + param = CentWaveParam()) + microtofq_fs <- c(system.file("microtofq/MM14.mzML", package = "msdata"), system.file("microtofq/MM8.mzML", package = "msdata")) -microtofq_xr <- xcmsRaw(microtofq_fs[1], profstep = 0) microtofq_od <- readMSData(microtofq_fs, mode = "onDisk") ## Direct injection data: -fticrf <- list.files(system.file("fticr", package = "msdata"), +fticrf <- list.files(system.file("fticr-mzML", package = "msdata"), recursive = TRUE, full.names = TRUE) fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk") fticr_xod <- findChromPeaks(fticr, MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, SNR.method = "data.mean", winSize.noise = 500)) -fticr_xs <- xcmsSet(method="MSW", files=fticrf[1:2], scales=c(1,7), - SNR.method='data.mean' , winSize.noise=500, - peakThr=80000, amp.Th=0.005) +## Pesticide data +fl <- system.file("TripleTOF-SWATH", "PestMix1_SWATH.mzML", package = "msdata") +pest_swth <- readMSData(fl, mode = "onDisk") +cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 20), prefilter = c(3, 1000)) +pest_swth <- findChromPeaks(pest_swth, param = cwp) +pest_swth <- findChromPeaksIsolationWindow(pest_swth, param = cwp) + +fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", package = "msdata") +pest_dda <- readMSData(fl, mode = "onDisk") +pest_dda <- findChromPeaks(pest_dda, param = cwp) -fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO"), - system.file('cdf/KO/ko19.CDF', package = "faahKO")) -xs_1 <- xcmsSet(fs, profparam = list(step = 0), method = "centWave", - noise = 10000, snthresh = 50) +## Sciex test data. +## fl <- dir(system.file("sciex", package = "msdata"), full.names = TRUE) +## sciex_data <- readMSData(fl, mode = "onDisk") +## sciex_data <- pickPeaks(sciex_data) test_check("xcms") + +bpstop(prm) + diff --git a/tests/testthat/test_Chromatogram.R b/tests/testthat/test_Chromatogram.R index 43956c6cb..3f81a68fd 100644 --- a/tests/testthat/test_Chromatogram.R +++ b/tests/testthat/test_Chromatogram.R @@ -1,42 +1,49 @@ -test_that("extractChromatograms is deprecated", { - expect_warning(chrs <- extractChromatograms(filterFile(od_x, file = 2))) - expect_warning(plotChromatogram(chrs)) -}) +## test_that("extractChromatograms is deprecated", { + skip_on_os(os = "windows", arch = "i386") + +## expect_warning(chrs <- extractChromatograms( +## filterRt(filterFile(od_x, file = 2), c(2500, 2600)))) +## expect_warning(plotChromatogram(chrs)) +## }) test_that("chromatogram works", { + skip_on_os(os = "windows", arch = "i386") + ## OnDiskMSnExp ## TIC - chrs <- chromatogram(filterFile(od_x, file = 2)) + od_tmp <- filterFile(filterRt(od_x, c(2500, 3000)), file = 2) + chrs <- chromatogram(od_tmp) plot(chrs) - spctr <- spectra(filterFile(od_x, file = 2)) + spctr <- spectra(od_tmp) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) expect_equal(intensity(chrs[1, 1]), ints) expect_equal(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## BPC - chrs <- chromatogram(filterFile(od_x, file = 2), - aggregationFun = "max") + chrs <- chromatogram(od_tmp, aggregationFun = "max") ints <- unlist(lapply(spctr, function(z) return(max(intensity(z))))) expect_equal(intensity(chrs[1, 1]), ints) expect_equal(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## XCMSnExp - xod_x <- faahko_xod - chrs <- chromatogram(filterFile(xod_x, file = 2)) + xod_x <- filterFile(filterRt(faahko_xod, c(2500, 3000)), file = 2) + chrs <- chromatogram(xod_x) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) expect_equal(intensity(chrs[1, 1]), ints) expect_equal(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## BPC - chrs <- chromatogram(filterFile(xod_x, file = 2), - aggregationFun = "max") + chrs <- chromatogram(xod_x, aggregationFun = "max") ints <- unlist(lapply(spctr, function(z) - return(max(intensity(z))))) + return(max(intensity(z))))) expect_equal(intensity(chrs[1, 1]), ints) expect_equal(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## with adjusted retention times. chrs <- chromatogram(filterFile(xod_xgr, file = 2), adjustedRtime = FALSE, aggregationFun = "max") + spctr <- spectra(filterFile(xod_xgr, file = 2, keepAdjustedRtime = FALSE)) + ints <- unlist(lapply(spctr, function(z) + return(max(intensity(z))))) expect_equal(intensity(chrs[1, 1]), ints) expect_equal(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) chrs <- chromatogram(filterFile(xod_xgr, file = 2, @@ -47,18 +54,15 @@ test_that("chromatogram works", { adjusted = TRUE)[[2]]) ## Subset to certain mz range in all files. chrs_adj <- chromatogram(xod_xgr, mz = c(300, 330)) - chrs_raw <- chromatogram(xod_x, mz = c(300, 330)) - expect_true(sum(rtime(chrs_adj[1, 1]) != rtime(chrs_raw[1, 1])) > - length(chrs_raw[1, 1]) / 2) expect_equal(rtime(chrs_adj[1, 1]), rtime(xod_xgr, bySample = TRUE)[[1]]) expect_equal(rtime(chrs_adj[1, 2]), rtime(xod_xgr, bySample = TRUE)[[2]]) expect_equal(rtime(chrs_adj[1, 3]), rtime(xod_xgr, bySample = TRUE)[[3]]) - + ## Now subsetting for mz: - tmp <- filterFile(od_x, file = 2) + tmp <- filterRt(filterFile(od_x, file = 2), c(2500, 3000)) chrs <- chromatogram(tmp, mz = c(300, 400)) expect_equal(mz(chrs[1, 1], filter = TRUE), c(300, 400)) - expect_warning(spctr <- spectra(filterMz(tmp, mz = c(300, 400)))) + spctr <- spectra(filterMz(tmp, mz = c(300, 400))) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) ints2 <- intensity(chrs[1, 1]) @@ -69,13 +73,15 @@ test_that("chromatogram works", { chrs <- chromatogram(filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE), mz = c(300, 400)) + spctr <- spectra(filterMz(filterFile(xod_xgr, file = 2), + mz = c(300, 400))) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) ints2 <- intensity(chrs[1, 1]) ints2[is.na(ints2)] <- 0 expect_equal(ints2, ints) expect_equal(rtime(chrs[1, 1]), rtime(xod_xgr, bySample = TRUE)[[2]]) - + ## Now subsetting for rt: chrs <- chromatogram(od_x, rt = c(2700, 2900)) expect_true(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) @@ -108,7 +114,7 @@ test_that("chromatogram works", { expect_equal(intensity(chrs2[1, 1]), intsL[[1]]) expect_equal(intensity(chrs2[1, 2]), intsL[[2]]) expect_equal(intensity(chrs2[1, 3]), intsL[[3]]) - + ## Now subsetting for rt and mz: chrs <- chromatogram(od_x, rt = c(2700, 2900), mz = 335) expect_true(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) @@ -142,7 +148,7 @@ test_that("chromatogram works", { expect_equal(rtime(chrs[1, 1]), rtime(tmp, bySample = TRUE)[[1]]) expect_equal(rtime(chrs[1, 2]), rtime(tmp, bySample = TRUE)[[2]]) expect_equal(rtime(chrs[1, 3]), rtime(tmp, bySample = TRUE)[[3]]) - + ## What if we're completely off? chrs <- chromatogram(od_x, rt = c(5000, 5500)) expect_true(nrow(chrs) == 0) @@ -160,7 +166,7 @@ test_that("chromatogram works", { rtr <- matrix(c(2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) mzr <- matrix(c(355, 355, 344, 344), ncol = 2, byrow = TRUE) chrs <- chromatogram(od_x, rt = rtr, mz = mzr) - + expect_true(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) expect_true(all(rtime(chrs[1, 2]) >= 2700 & rtime(chrs[1, 2]) <= 2900)) expect_true(all(rtime(chrs[1, 3]) >= 2700 & rtime(chrs[1, 3]) <= 2900)) @@ -198,7 +204,7 @@ test_that("chromatogram works", { chrs <- chromatogram(od_x, rt = rtr, mz = mzr) expect_true(nrow(chrs) == 3) expect_true(all(lengths(chrs[2, ]) == 0)) - + rtr <- matrix(c(2700, 2900, 2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) mzr <- matrix(c(355, 355, 100000, 100000, 344, 344), ncol = 2, byrow = TRUE) chrs <- chromatogram(od_x, rt = rtr, mz = mzr) diff --git a/tests/testthat/test_MsFeatureData.R b/tests/testthat/test_MsFeatureData.R index c44039a8b..23483392f 100644 --- a/tests/testthat/test_MsFeatureData.R +++ b/tests/testthat/test_MsFeatureData.R @@ -1,68 +1,102 @@ test_that("MsFeatureData class validation works", { + skip_on_os(os = "windows", arch = "i386") + fd <- new("MsFeatureData") ## Check error for wrong elements. fd$a <- 5 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) rm("a", envir = fd) ## Check chromPeaks fd$chromPeaks <- 4 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) fdm <- matrix(ncol = 3, nrow = 5) colnames(fdm) <- c("a", "b", "sample") fd$chromPeaks <- fdm - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) rm("chromPeaks", envir = fd) + ## chromPeaks + cp <- matrix(nrow = 3, ncol = length(.REQ_PEAKS_COLS)) + colnames(cp) <- .REQ_PEAKS_COLS + expect_true(is.character(.validChromPeaksMatrix(cp))) + cp[3] <- 3.4 + expect_true(.validChromPeaksMatrix(cp)) + rownames(cp) <- letters[1:nrow(cp)] + chromPeaks(fd) <- cp + expect_true(validObject(fd)) ## featureDefinitions + chromPeakData(fd) <- 5 + expect_error(validObject(fd), "is supposed to be a 'DataFrame'") + fdef <- DataFrame(ms_level = rep(1L, nrow(cp)), + is_filled = rep(FALSE, nrow(cp)), + row.names = rownames(cp)) + chromPeakData(fd) <- fdef + expect_true(validObject(fd)) + chromPeakData(fd)$ms_level <- "a" + expect_error(validObject(fd), "column 'ms_level' should contain") + chromPeakData(fd)$ms_level <- 1L + rownames(chromPeakData(fd)) <- 1:nrow(cp) + expect_error(validObject(fd), "rownames differ") + rm("chromPeaks", envir = fd) + expect_error(validObject(fd), "'chromPeakData' present but 'chromPeaks'") + rm("chromPeakData", envir = fd) + + ## Additional tests. fd$chromPeaks <- chromPeaks(xod_x) fd$featureDefinitions <- 4 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) fg <- featureDefinitions(xod_xgrg) fd$featureDefinitions <- fg[, 1:8] - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) fg_2 <- fg fg_2$mzmin <- "a" fd$featureDefinitions <- fg_2 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) fg_2 <- fg fg_2$peakidx[[1]] <- c(50000, 3) fd$featureDefinitions <- fg_2 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) ## adjustedRtime fd$featureDefinitions <- fg fd$adjustedRtime <- 4 - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) fd$adjustedRtime <- list(1:5, "b") - expect_true(!is.logical(xcms:::validateMsFeatureData(fd))) + expect_true(!is.logical(validateMsFeatureData(fd))) ## Now check that we pass if we put all correct data into the object: fd <- new("MsFeatureData") fd$chromPeaks <- chromPeaks(xod_xgrg) - expect_true(length(xcms:::validateMsFeatureData(fd)) == 0) + expect_true(length(validateMsFeatureData(fd)) == 0) fd$adjustedRtime <- xod_xgrg@msFeatureData$adjustedRtime - expect_true(length(xcms:::validateMsFeatureData(fd)) == 0) + expect_true(length(validateMsFeatureData(fd)) == 0) fd$featureDefinitions <- featureDefinitions(xod_xgrg) - expect_true(length(xcms:::validateMsFeatureData(fd)) == 0) + expect_true(length(validateMsFeatureData(fd)) == 0) }) test_that("MsFeatureData class_accessors work", { + skip_on_os(os = "windows", arch = "i386") + fd <- new("MsFeatureData") expect_true(!hasChromPeaks(fd)) expect_true(!hasAdjustedRtime(fd)) expect_true(!hasFeatures(fd)) - expect_warning(expect_equal(chromPeaks(fd), NULL)) - expect_warning(expect_equal(featureDefinitions(fd), NULL)) + expect_equal(chromPeaks(fd), NULL) + expect_warning(expect_equal(featureDefinitions(fd), DataFrame())) expect_warning(expect_equal(adjustedRtime(fd), NULL)) ## chromPeaks chromPeaks(fd) <- chromPeaks(xod_xgrg) + chromPeakData(fd) <- chromPeakData(xod_xgrg) expect_true(hasChromPeaks(fd)) + expect_false(hasChromPeaks(fd, msLevel = 2L)) expect_equal(chromPeaks(fd), chromPeaks(xod_xgrg)) ## featureDefinitions featureDefinitions(fd) <- featureDefinitions(xod_xgrg) expect_true(hasFeatures(fd)) expect_equal(featureDefinitions(fd), featureDefinitions(xod_xgrg)) + expect_false(hasFeatures(fd, msLevel = 2L)) + expect_true(nrow(featureDefinitions(fd, msLevel = 2L)) == 0) ## adjustedRtime - expect_error(adjustedRtime(fd) <- adjustedRtime(xod_xgrg)) + adjustedRtime(fd) <- adjustedRtime(xod_xgrg) + expect_error(validObject(fd)) adjustedRtime(fd) <- xod_xgrg@msFeatureData$adjustedRtime expect_true(hasAdjustedRtime(fd)) expect_equal(adjustedRtime(fd), xod_xgrg@msFeatureData$adjustedRtime) }) - diff --git a/tests/testthat/test_Param_classes.R b/tests/testthat/test_Param_classes.R index 96fa00423..a20ff3554 100644 --- a/tests/testthat/test_Param_classes.R +++ b/tests/testthat/test_Param_classes.R @@ -1,4 +1,6 @@ test_that("CentWaveParam works", { + skip_on_os(os = "windows", arch = "i386") + p <- new("CentWaveParam") checkDefaultValues <- function(x) { expect_equal(x@ppm, 25) @@ -71,6 +73,8 @@ test_that("CentWaveParam works", { }) test_that("MatchedFilterParam works", { + skip_on_os(os = "windows", arch = "i386") + p <- new("MatchedFilterParam") checkDefaultValues <- function(x) { expect_equal(x@binSize, 0.1) @@ -156,7 +160,9 @@ test_that("MatchedFilterParam works", { expect_error(index(p) <- 1:3) }) -test_that("MassifquantParam works", { +test_that("MassifquantParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("MassifquantParam") ppm(p) <- 1 @@ -237,6 +243,8 @@ test_that("MassifquantParam works", { }) test_that("MSWParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("MSWParam", snthresh = 14) expect_equal(snthresh(p), 14) @@ -326,17 +334,19 @@ test_that("MSWParam works", { ## Check the .param2list method: p <- new("MSWParam", addParams = list(z = "z", bla = 1:4)) - L <- xcms:::.param2list(p) + L <- .param2list(p) expect_equal(L$z, "z") expect_equal(L$bla, 1:4) expect_equal(L$snthresh, 3) p <- new("MSWParam") - L <- xcms:::.param2list(p) + L <- .param2list(p) expect_true(!any(names(L) == "addParams")) expect_equal(L$snthresh, 3) }) test_that("CentWavePredIsoParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("CentWavePredIsoParam", ppm = 14) expect_equal(ppm(p), 14) @@ -468,11 +478,13 @@ test_that("CentWavePredIsoParam works", { ## Check the .param2list method: p <- new("CentWavePredIsoParam", snthresh = 123) - L <- xcms:::.param2list(p) + L <- .param2list(p) expect_equal(L$snthresh, 123) }) -test_that("PeakDensityParam works", { +test_that("PeakDensityParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("PeakDensityParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) expect_equal(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) @@ -484,7 +496,7 @@ test_that("PeakDensityParam works", { expect_error(sampleGroups(p) <- c(2, 2, NA)) expect_error(PeakDensityParam()) expect_error(PeakDensityParam(sampleGroups = c(1, 1, NA))) - + p <- new("PeakDensityParam", bw = 3) expect_equal(bw(p), 3) bw(p) <- 20 @@ -533,6 +545,8 @@ test_that("PeakDensityParam works", { }) test_that("MzClustParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("MzClustParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) expect_equal(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) @@ -556,7 +570,7 @@ test_that("MzClustParam works", { p <- MzClustParam(absMz = 33) expect_equal(absMz(p), 33) expect_error(MzClustParam(absMz = -4)) - + ## minFraction p <- new("MzClustParam", minFraction = 0.7) expect_equal(minFraction(p), 0.7) @@ -579,6 +593,8 @@ test_that("MzClustParam works", { }) test_that("NearestPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("NearestPeaksParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) expect_equal(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) @@ -613,7 +629,7 @@ test_that("NearestPeaksParam works", { expect_equal(absRt(p), 33) expect_error(NearestPeaksParam(absRt = -4)) expect_error(NearestPeaksParam(absRt = 1:3)) - + p <- new("NearestPeaksParam", kNN = 3) expect_equal(kNN(p), 3) kNN(p) <- 20 @@ -625,6 +641,8 @@ test_that("NearestPeaksParam works", { }) test_that("PeakGroupsParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("PeakGroupsParam", minFraction = 0.8) expect_equal(minFraction(p), 0.8) @@ -635,7 +653,7 @@ test_that("PeakGroupsParam works", { expect_error(minFraction(p) <- c(2, 2)) expect_error(minFraction(p) <- -1) expect_error(minFraction(p) <- 3) - + p <- new("PeakGroupsParam", extraPeaks = 2) expect_equal(extraPeaks(p), 2) extraPeaks(p) <- 0.3 @@ -679,9 +697,25 @@ test_that("PeakGroupsParam works", { expect_equal(peakGroupsMatrix(p), mt + 2) p <- PeakGroupsParam(peakGroupsMatrix = mt) expect_equal(peakGroupsMatrix(p), mt) + + p <- new("PeakGroupsParam", subset = 1:3) + expect_equal(subset(p), 1:3) + subset(p) <- 4:6 + expect_equal(subset(p), 4:6) + p <- PeakGroupsParam(subset = 6:9) + expect_equal(subset(p), 6:9) + + p <- new("PeakGroupsParam", subsetAdjust = "average") + expect_equal(subsetAdjust(p), "average") + subsetAdjust(p) <- "previous" + expect_equal(subsetAdjust(p), "previous") + p <- PeakGroupsParam(subsetAdjust = "average") + expect_equal(subsetAdjust(p), "average") }) test_that("ObiwarpParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("ObiwarpParam", binSize = 0.8) expect_equal(binSize(p), 0.8) @@ -691,7 +725,7 @@ test_that("ObiwarpParam works", { expect_equal(binSize(p), 0.7) expect_error(binSize(p) <- c(2, 2)) expect_error(binSize(p) <- -1) - + p <- new("ObiwarpParam", centerSample = 2L) expect_equal(centerSample(p), 2L) centerSample(p) <- 1 @@ -784,9 +818,25 @@ test_that("ObiwarpParam works", { expect_equal(initPenalty(p), 3.1) expect_error(factorGap(p) <- c(2, 2)) expect_error(factorGap(p) <- -1) + + p <- new("ObiwarpParam", subset = 1L) + expect_equal(subset(p), 1L) + subset(p) <- 1:6 + expect_equal(subset(p), 1:6) + p <- ObiwarpParam(subset = 1:5) + expect_equal(subset(p), 1:5) + + p <- new("ObiwarpParam", subsetAdjust = "previous") + expect_equal(subsetAdjust(p), "previous") + subsetAdjust(p) <- "average" + expect_equal(subsetAdjust(p), "average") + p <- ObiwarpParam(subsetAdjust = "previous") + expect_equal(subsetAdjust(p), "previous") }) test_that("GenericParam works", { + skip_on_os(os = "windows", arch = "i386") + prm <- GenericParam(fun = "mean") expect_equal(prm@fun, "mean") ## Errors @@ -795,6 +845,8 @@ test_that("GenericParam works", { }) test_that("FillChromPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + ## Check getter/setter methods: p <- new("FillChromPeaksParam", expandMz = 0.8) expect_equal(expandMz(p), 0.8) @@ -841,6 +893,8 @@ test_that("FillChromPeaksParam works", { }) test_that("CalibrantMassParam works", { + skip_on_os(os = "windows", arch = "i386") + p <- new("CalibrantMassParam") expect_true(validObject(p)) p@method <- "other" @@ -849,7 +903,7 @@ test_that("CalibrantMassParam works", { p <- new("CalibrantMassParam") p@mz <- list(mzs) expect_error(validObject(p)) - + ## Constructor. p <- CalibrantMassParam(mz = mzs, mzabs = 3, mzppm = 9, neighbors = 4, method = "shift") @@ -859,3 +913,88 @@ test_that("CalibrantMassParam works", { expect_equal(.neighbors(p), 4L) expect_equal(.method(p), "shift") }) + +test_that("CleanPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + + p <- new("CleanPeaksParam") + expect_true(validObject(p)) + p@maxPeakwidth <- c(1, 4) + expect_error(validObject(p), "positive number") + p <- CleanPeaksParam(13.2) + show(p) + expect_true(validObject(p)) + expect_equal(p@maxPeakwidth, 13.2) + expect_error(CleanPeaksParam(-1), "positive number") +}) + +test_that("MergeNeighboringPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + + p <- new("MergeNeighboringPeaksParam") + expect_true(validObject(p)) + p@expandRt <- c(1, 4) + expect_error(validObject(p), "numeric of length 1") + p@expandRt <- NA_real_ + expect_error(validObject(p), "numeric of length 1") + p@expandMz <- c(1, 4) + p@expandRt <- 1.3 + expect_error(validObject(p), "numeric of length 1") + p@expandMz <- NA_real_ + expect_error(validObject(p), "numeric of length 1") + p@expandMz <- 3.2 + p@ppm <- c(1, 3) + expect_error(validObject(p), "numeric of length 1") + p@ppm <- NA_real_ + expect_error(validObject(p), "numeric of length 1") + p@ppm <- 10.0 + p@minProp <- c(32.4, 14.5, 13.45) + expect_error(validObject(p), "number of length 1") + p@minProp <- NA_real_ + expect_error(validObject(p), "number of length 1") + p@minProp <- 1.4 + expect_true(validObject(p)) + p <- MergeNeighboringPeaksParam(expandMz = 0.1, expandRt = 5, + ppm = 20, minProp = 0.9) + show(p) + expect_true(validObject(p)) + expect_equal(p@expandMz, 0.1) + expect_equal(p@expandRt, 5) + expect_equal(p@ppm, 20) + expect_equal(p@minProp, 0.9) + expect_error(MergeNeighboringPeaksParam(c(1, 3)), "numeric of length 1") +}) + +test_that("ChromPeakAreaParam works", { + skip_on_os(os = "windows", arch = "i386") + + res <- ChromPeakAreaParam() + expect_true(is(res, "ChromPeakAreaParam")) + res <- ChromPeakAreaParam(mzmin = median) + expect_true(validObject(res)) +}) + +test_that("FilterIntensityParam works", { + skip_on_os(os = "windows", arch = "i386") + + res <- FilterIntensityParam() + expect_true(is(res, "FilterIntensityParam")) + res <- FilterIntensityParam(threshold = 100) + expect_equal(res@threshold, 100) + expect_equal(res@nValues, 1) + expect_equal(res@value, "maxo") + + res <- new("FilterIntensityParam") + res@nValues <- 0L + expect_error(validObject(res), "positive number") + res@nValues <- c(1L, 1L) + expect_error(validObject(res), "positive number") + res@nValues <- 1L + res@value <- c("a", "b") + expect_error(validObject(res), "length 1") + res@value <- "b" + res@threshold <- -10 + expect_error(validObject(res), "length 1") + res@threshold <- c(10, 20) + expect_error(validObject(res), "length 1") +}) diff --git a/tests/testthat/test_do_adjustRtime-functions.R b/tests/testthat/test_do_adjustRtime-functions.R index 75888f7b8..bf24cad14 100644 --- a/tests/testthat/test_do_adjustRtime-functions.R +++ b/tests/testthat/test_do_adjustRtime-functions.R @@ -1,10 +1,12 @@ test_that("getPeakGroupsRtMatrix works", { + skip_on_os(os = "windows", arch = "i386") + param <- PeakGroupsParam() nSamples <- length(fileNames(xod_xg)) pkGrp <- .getPeakGroupsRtMatrix( peaks = chromPeaks(xod_xg), peakIndex = .peakIndex(xod_xg), - nSamples = nSamples, + sampleIndex = seq_len(nSamples), missingSample = nSamples - (nSamples * minFraction(param)), extraPeaks = extraPeaks(param) ) @@ -16,76 +18,168 @@ test_that("getPeakGroupsRtMatrix works", { }) test_that("do_adjustRtime_peakGroups works", { - xs <- faahko - xsg <- group(xs) + skip_on_os(os = "windows", arch = "i386") + + xsa <- xod_xgr + xsg <- xod_xg misSamp <- 1 - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr) - expect_equal(xsa@rt$corrected, res) - ## Change settings. - misSamp <- 3 - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr) - expect_equal(xsa@rt$corrected, res) - misSamp <- 2 - xtr <- 2 - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr) - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr, extraPeaks = xtr) - expect_equal(xsa@rt$corrected, res) - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, - smooth = "linear") - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr, extraPeaks = xtr, - smooth = "linear") - expect_equal(xsa@rt$corrected, res) - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, - family = "symmetric") - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr, extraPeaks = xtr, - family = "symmetric") - expect_equal(xsa@rt$corrected, res) - xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, - span = 1) - minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) - res <- do_adjustRtime_peakGroups(peaks = peaks(xs), - peakIndex = xsg@groupidx, - rtime = xsg@rt$raw, - minFraction = minFr, extraPeaks = xtr, - span = 1) - expect_equal(xsa@rt$corrected, res) + minFr <- (length(fileNames(xsa)) - misSamp) / length(fileNames(xsa)) + expect_error(do_adjustRtime_peakGroups( + peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), minFraction = minFr, subset = "4"), + "expected to be an integer") + expect_error(do_adjustRtime_peakGroups( + peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), minFraction = minFr, + subset = 4L), "out of range") + expect_error(do_adjustRtime_peakGroups( + peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), minFraction = minFr, + subset = c(1, 2, 5, 14)), "out of range") + + res <- do_adjustRtime_peakGroups( + peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), minFraction = minFr) + res_orig <- do_adjustRtime_peakGroups_orig( + peaks = chromPeaks(xsg), + peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), + minFraction = minFr) + expect_equal(res, res_orig) + expect_true(sum(unlist(res) != rtime(xsg)) > 3000) + ## Use only a subset. + res_sub <- do_adjustRtime_peakGroups( + peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, + rtime = rtime(xsg, bySample = TRUE), minFraction = minFr, + subset = c(1, 3)) }) test_that("applyRtAdjustment works", { + skip_on_os(os = "windows", arch = "i386") + xs <- faahko ## group em. - xsg <- group(xs) - ## align em. - xsa <- retcor(xsg, method = "peakgroups") - pksAdj <- .applyRtAdjToChromPeaks(peaks(xsg), - rtraw = xsa@rt$raw, - rtadj = xsa@rt$corrected) - expect_equal(pksAdj, peaks(xsa)) + ## xsg <- group(xs) + ## ## align em. + ## xsa <- retcor(xsg, method = "peakgroups") + pksAdj <- .applyRtAdjToChromPeaks(chromPeaks(xod_xg), + rtraw = rtime(xod_xg, bySample = TRUE), + rtadj = rtime(xod_xgr, bySample = TRUE)) + expect_equal(pksAdj, chromPeaks(xod_xgr)) ## Reset em. pksRaw <- .applyRtAdjToChromPeaks(pksAdj, - rtraw = xsa@rt$corrected, - rtadj = xsa@rt$raw) - expect_equal(pksRaw, peaks(xsg)) + rtraw = rtime(xod_xgr, bySample = TRUE), + rtadj = rtime(xod_xg, bySample = TRUE)) + expect_equal(pksRaw, chromPeaks(xod_xg)) + + rt_raw <- rtime(xod_xgr, adjusted = FALSE, bySample = TRUE)[[1]] + rt_adj <- rtime(xod_xgr, bySample = TRUE)[[1]] + + rt_new <- .applyRtAdjustment(rt_raw, rt_raw, rt_adj) + expect_equal(unname(rt_new), unname(rt_adj)) + + rt_new2 <- .applyRtAdjustment(rt_raw, rt_raw[200:1000], rt_adj[200:1000]) + + ## Artificial examples. + a_raw <- c(1, 2, 3, 5, 6, 7, 8, 10, 12, 13, 14, 16) + a_adj <- a_raw + 2 # shift by 2 + b <- .applyRtAdjustment(a_raw, a_raw, a_adj) + expect_equal(a_adj, b) + b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8]) + expect_equal(b, b_2) + + a_adj <- a_raw - 2 + b <- .applyRtAdjustment(a_raw, a_raw, a_adj) + expect_equal(a_adj, b) + b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8]) + expect_equal(b, b_2) +}) + +test_that(".get_closest_index works", { + skip_on_os(os = "windows", arch = "i386") + + expect_equal(.get_closest_index(2, c(1, 3, 5, 7)), 3) + expect_equal(.get_closest_index(2, c(1, 3, 5, 7), method = "previous"), 1) + expect_equal(.get_closest_index(2, c(1, 3, 5, 7), method = "closest"), 1) + expect_equal(.get_closest_index(6, c(1, 3, 5)), 5) + expect_equal(.get_closest_index(6, c(1, 3, 5), method = "previous"), 5) + expect_equal(.get_closest_index(6, c(1, 3, 5), method = "closest"), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5)), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5), method = "previous"), 5) + expect_equal(.get_closest_index(10, c(1, 3, 5), method = "closest"), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9)), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9), method = "previous"), 5) + expect_equal(.get_closest_index(2, c(5, 7, 9), method = "closest"), 5) + expect_equal(.get_closest_index(2, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(2, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(2, c(1, 5, 9), method = "closest"), 1) + expect_equal(.get_closest_index(3, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(3, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(3, c(1, 5, 9), method = "closest"), 1) + expect_equal(.get_closest_index(4, c(1, 5, 9)), 5) + expect_equal(.get_closest_index(4, c(1, 5, 9), method = "previous"), 1) + expect_equal(.get_closest_index(4, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(6, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(6, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(6, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(7, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(7, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(7, c(1, 5, 9), method = "closest"), 5) + expect_equal(.get_closest_index(8, c(1, 5, 9)), 9) + expect_equal(.get_closest_index(8, c(1, 5, 9), method = "previous"), 5) + expect_equal(.get_closest_index(8, c(1, 5, 9), method = "closest"), 9) +}) + +test_that(".match_trim_vectors and index works", { + skip_on_os(os = "windows", arch = "i386") + + res <- .match_trim_vectors(list(1:10, 3:10)) + expect_equal(res, list(3:10, 3:10)) + res <- .match_trim_vectors(list(3:10, 4:15)) + expect_equal(res, list(3:10, 4:11)) + res <- .match_trim_vectors(list(1:5, 1:20)) + expect_equal(res, list(1:5, 1:5)) + res <- .match_trim_vector_index(list(1:10, 3:10)) + expect_equal(res, list(3:10, 1:8)) + res <- .match_trim_vector_index(list(2:10, 2:8)) + expect_equal(res, list(1:7, 1:7)) +}) + +test_that("adjustRtimeSubset works", { + skip_on_os(os = "windows", arch = "i386") + + rt_raw <- rtime(xod_xgr, adjusted = FALSE, bySample = TRUE) + rt_adj <- rtime(xod_xgr, adjusted = TRUE, bySample = TRUE) + + res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3), + method = "previous") + expect_equal(res[[1]], rt_adj[[1]]) + expect_equal(res[[3]], rt_adj[[3]]) + expect_true(all(res[[2]] != rt_adj[[2]])) + expect_equal(names(res[[2]]), names(rt_adj[[2]])) + expect_equal(unname(res[[2]]), unname(rt_adj[[1]])) + + a <- res[[1]] - rt_raw[[1]] + b <- res[[2]] - rt_raw[[2]] + c <- res[[3]] - rt_raw[[3]] + plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2, + ylim = range(a, b, c)) + points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1) + points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2) + + res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3), + method = "average") + expect_equal(res[[1]], rt_adj[[1]]) + expect_equal(res[[3]], rt_adj[[3]]) + expect_true(all(res[[2]] != rt_adj[[2]])) + expect_true(all(res[[2]] != rt_adj[[1]])) + expect_true(all(res[[2]] != rt_adj[[3]])) + + a <- res[[1]] - rt_raw[[1]] + b <- res[[2]] - rt_raw[[2]] + c <- res[[3]] - rt_raw[[3]] + plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2, + ylim = range(a, b, c)) + points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1) + points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2) }) diff --git a/tests/testthat/test_do_findChromPeaks-functions.R b/tests/testthat/test_do_findChromPeaks-functions.R index 8005ea369..94aebf335 100644 --- a/tests/testthat/test_do_findChromPeaks-functions.R +++ b/tests/testthat/test_do_findChromPeaks-functions.R @@ -1,4 +1,6 @@ test_that("do_findPeaks_MSW works", { + skip_on_os(os = "windows", arch = "i386") + first_file <- filterFile(fticr, file = 1) spctr <- spectra(first_file) expect_true(length(spctr) == 1) @@ -14,130 +16,140 @@ test_that("do_findPeaks_MSW works", { }) test_that("do_findChromPeaks_centWave works", { - ## xr <- xcmsRaw(fs[1], profstep = 0) + skip_on_os(os = "windows", arch = "i386") + ## We expect that changing a parameter has an influence on the result. - xr <- deepCopy(faahko_xr_1) - mzVals <- xr@env$mz - intVals <- xr@env$intensity + mzVals <- mz(xod_x) + intVals <- unlist(intensity(xod_x), use.names = FALSE) ## Define the values per spectrum: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - res1 <- do_findChromPeaks_centWave(mz = mzVals, + valsPerSpect <- lengths(mzVals) + res1 <- do_findChromPeaks_centWave(mz = unlist(mzVals, use.names = FALSE), int = intVals, - scantime = xr@scantime, + scantime = rtime(xod_x), valsPerSpect, - snthresh = 200, - noise = 4000) + snthresh = 100, + noise = 4000, + prefilter = c(3, 10000)) ## Eventually disable the sleep option to improve speed! - res2 <- do_findChromPeaks_centWave(mz = mzVals, + res2 <- do_findChromPeaks_centWave(mz = unlist(mzVals, use.names = FALSE), int = intVals, - scantime = xr@scantime, + scantime = rtime(xod_x), valsPerSpect, snthresh = 500, - noise = 4000, sleep = 0.00) + noise = 4000, sleep = 0.00, + prefilter = c(3, 10000)) expect_true(nrow(res1) > nrow(res2)) - - ## Check scanrange on findPeaks.centWave. - res_1 <- findPeaks.centWave(xr, scanrange = c(90, 345), noise = 2000) - xr <- xr[90:345] - mzVals <- xr@env$mz - intVals <- xr@env$intensity - ## Define the values per spectrum: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - res_2 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, - scantime = xr@scantime, valsPerSpect, - noise = 2000) - expect_equal(res_1@.Data, res_2) }) test_that("do_findChromPeaks_centWaveWithPredIsoROIs works", { - xr <- deepCopy(faahko_xr_1) - mzVals <- xr@env$mz - intVals <- xr@env$intensity + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterFile(od_x, 1) + mzVals <- mz(tmp) + intVals <- unlist(intensity(tmp), use.names = FALSE) ## initial centWave: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - feats_1 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, - scantime = xr@scantime, - valsPerSpect = valsPerSpect, - noise = 1500, verboseColumns = TRUE) - feats_2 <- do_findChromPeaks_addPredIsoROIs(mz = mzVals, - int = intVals, - scantime = xr@scantime, - valsPerSpect = valsPerSpect, - noise = 1500, - peaks. = feats_1) - all_f <- do_findChromPeaks_centWaveWithPredIsoROIs(mz = mzVals, - int = intVals, - scantime = xr@scantime, - valsPerSpect = valsPerSpect, - noise = 1500) - ## Comparisons. + valsPerSpect <- lengths(mzVals) + feats_1 <- do_findChromPeaks_centWave( + mz = unlist(mzVals, use.names = FALSE), int = intVals, + prefilter = c(3, 5000), + scantime = rtime(tmp), + valsPerSpect = valsPerSpect, noise = 1500, verboseColumns = TRUE) + feats_2 <- do_findChromPeaks_addPredIsoROIs( + mz = unlist(mzVals, use.names = FALSE), int = intVals, + scantime = rtime(tmp), valsPerSpect = valsPerSpect, noise = 1500, + prefilter = c(3, 5000), + peaks. = feats_1) + expect_true(nrow(feats_1) < nrow(feats_2)) + all_f <- do_findChromPeaks_centWaveWithPredIsoROIs( + mz = unlist(mzVals, use.names = FALSE), int = intVals, + scantime = rtime(tmp), prefilter = c(3, 5000), + valsPerSpect = valsPerSpect, noise = 1500) expect_equal(all_f, feats_2) - ## old_all <- xcms:::.centWaveWithPredictedIsotopeROIs(xr, noise = 1500) - ## checkEquals(all_f, old_all@.Data) }) test_that("do_findChromPeaks_massifquant works", { - xr <- deepCopy(faahko_xr_1) - res <- findPeaks.massifquant(xr, snthresh = 100) - mz <- xr@env$mz - int <- xr@env$intensity - valsPerSpect <- diff(c(xr@scanindex, length(mz))) - scantime <- xr@scantime + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterFile(od_x, 1) + mz <- mz(tmp) + valsPerSpect <- lengths(mz) + mz <- unlist(mz, use.names = FALSE) + int <- unlist(intensity(tmp), use.names = FALSE) + rtime <- rtime(tmp) res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, valsPerSpect = valsPerSpect, - scantime = scantime) - expect_equal(res@.Data, res_2) + scantime = rtime) + expect_true(nrow(res_2) == 1542) ## With centWave: res_3 <- do_findChromPeaks_massifquant(mz = mz, int = int, valsPerSpect = valsPerSpect, - scantime = scantime, withWave = TRUE, + scantime = rtime, withWave = TRUE, snthresh = 100, noise = 4000) - res_4 <- findPeaks.massifquant(xr, withWave = 1, snthresh = 100, - noise = 4000) - expect_equal(res_3, res_4@.Data) expect_true(nrow(res_3) < nrow(res_2)) - - ## Subsetted data and scanrange: - res_1 <- findPeaks.massifquant(xr, scanrange = c(90, 345)) - xsub <- xr[90:345] - mz <- xsub@env$mz - int <- xsub@env$intensity - valsPerSpect <- diff(c(xsub@scanindex, length(mz))) - scantime <- xsub@scantime - res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, - valsPerSpect = valsPerSpect, - scantime = scantime) - expect_identical(res_1@.Data, res_2) }) test_that("do_findChromPeaks_matchedFilter works", { - xr <- deepCopy(faahko_xr_1) - ## We expect that changing a parameter has an influence on the result. - mzVals <- xr@env$mz - intVals <- xr@env$intensity - ## Define the values per spectrum: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterFile(od_x, 1) + mzVals <- mz(tmp) + valsPerSpect <- lengths(mzVals) + mzVals <- unlist(mzVals, use.names = FALSE) + intVals <- unlist(intensity(tmp), use.names = FALSE) res1 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, - scantime = xr@scantime, + scantime = rtime(tmp), valsPerSpect, binSize = 10) res2 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, - scantime = xr@scantime, + scantime = rtime(tmp), valsPerSpect, binSize = 10, snthresh = 100) expect_true(nrow(res1) > nrow(res2)) res2 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, - scantime = xr@scantime, + scantime = rtime(tmp), valsPerSpect, binSize = 20) expect_true(nrow(res1) > nrow(res2)) + + ## with empty spectra - simulating issue #325 + od_sub <- filterMz(od_x, mz = c(334.9, 344.1)) + sps <- spectra(filterFile(od_sub, 1)) + ## Add an artificial peak at m/z 0 if spectrum is empty + sps <- lapply(sps, function(z) { + if (!length(z@mz)) { + z@mz <- 0.0 + z@intensity <- 0.0 + } + z + }) + mzs <- lapply(sps, mz) + n_peaks <- lengths(mzs, FALSE) + mzs <- unlist(mzs, use.names = FALSE) + ints <- unlist(lapply(sps, intensity), use.names = FALSE) + rtms <- vapply(sps, rtime, numeric(1)) + res3 <- do_findChromPeaks_matchedFilter(mz = mzs, int = ints, + scantime = rtms, + valsPerSpect = n_peaks) + full_data <- findChromPeaks(filterFile(od_x, 1), + param = MatchedFilterParam()) + pks_full <- chromPeaks(full_data, mz = c(335, 344)) + rownames(pks_full) <- NULL + rownames(res3) <- NULL + expect_equal(res3, pks_full[, colnames(res3)]) + res4 <- findChromPeaks(filterMz(filterFile(od_x, 1), mz = c(334.9, 344.1)), + param = MatchedFilterParam()) + res4 <- chromPeaks(res4) + rownames(res4) <- NULL + expect_equal(res4, pks_full) }) test_that("peaksWithMatchedFilter is working", { + skip_on_os(os = "windows", arch = "i386") + od <- filterFile(faahko_od, file = 1) od_mf <- findChromPeaks(od, param = MatchedFilterParam()) @@ -160,6 +172,8 @@ test_that("peaksWithMatchedFilter is working", { }) test_that(".getRtROI works", { + skip_on_os(os = "windows", arch = "i386") + od <- filterFile(faahko_od, file = 1) expect_error(.getRtROI()) expect_error(.getRtROI(1:3)) @@ -178,9 +192,48 @@ test_that(".getRtROI works", { expect_true(nrow(res_2) > nrow(res_3)) res_4 <- .getRtROI(int, rt, noise = 400, prefilter = c(100, 500)) expect_true(nrow(res_4) == 0) + + # Generate a nice-looking peak + # Values from table(cut(rnorm(20000), breaks = 40)) + model_peak <- c(3, 4, 4, 9, 26, 31, 65, 123, 196, 260, 404, 523, 743, 893, + 1188, 1329, 1505, 1540, 1705, 1592, 1535, 1371, 1255, 929, 790, + 652, 438, 336, 223, 138, 78, 50, 25, 15, 11, 6, 3, 0, 1, 1) + model_single_peak <- c(numeric(80), model_peak, numeric(80)) + single_peak_scans <- seq_along(model_single_peak)+200 + single_peak_rois <- .getRtROI(model_single_peak, single_peak_scans) + expect_true(is.matrix(single_peak_rois)) + expect_true(nrow(single_peak_rois)==1) + + model_triple_peak <- c(numeric(20), model_peak, numeric(100), + model_peak/5, numeric(100), + rev(model_peak)*2, numeric(20)) + triple_peak_scans <- seq_along(model_triple_peak)+200 + # Get ROIs for a chromatogram with 3 good peaks + triple_peak_rois <- .getRtROI(model_triple_peak, triple_peak_scans) + expect_true(nrow(triple_peak_rois)==3) + + # Get ROIs for a chromatogram with three peaks + # One of which doesn't pass prefilter check + skipped_peak_rois <- .getRtROI(model_triple_peak, triple_peak_scans, + prefilter = c(3, 500)) + expect_true(nrow(skipped_peak_rois)==2) + + # Get ROIs for a chromatogram with three peaks + # None of which pass stringent prefilter check + skipped_peak_rois <- .getRtROI(model_triple_peak, triple_peak_scans, + prefilter = c(3, 5000)) + expect_true(nrow(skipped_peak_rois)==0) + + # Get ROIs for a chromatogram with three peaks + # One of which passes stringent prefilter check + tall_peak_rois <- .getRtROI(model_triple_peak, triple_peak_scans, + prefilter = c(9, 1500)) + expect_true(nrow(tall_peak_rois)==1) }) test_that("peaksWithCentWave works", { + skip_on_os(os = "windows", arch = "i386") + od <- filterFile(faahko_od, file = 1) mzr <- c(272.1, 272.2) @@ -199,6 +252,48 @@ test_that("peaksWithCentWave works", { cwp <- CentWaveParam(fitgauss = TRUE) pks <- peaksWithCentWave(intensity(chr), rtime(chr), fitgauss = TRUE) + #Testing for Github issue #445 after introducing new CWP method + # Values from round((c(numeric(20), dnorm(seq(-3, 3, length.out = 20)), + # numeric(20))*100 +runif(60))*10000) + skinny_peak <- c(9107, 3326, 9523, 3245, 3429, 9394, 1123, 935, 5128, 8576, + 2711, 3427, 7294, 8109, 9288, 6997, 9756, 8034, 1317, 8866, 13877, + 14854, 28296, 57101, 92209, 151797, 222386, 299402, 365045, 394255, + 402680, 363996, 293985, 222989, 147007, 94947, 52924, 32438, + 11511, 10836, 8046, 601, 889, 5917, 2690, 5381, 9901, 8494, 3349, + 8283, 3410, 5935, 3332, 7041, 3284, 7478, 76, 3739, 2158, 5507) + skinny_peak_rt <- seq_along(skinny_peak)+100 + pks <- peaksWithCentWave(skinny_peak, rt=skinny_peak_rt, + snthresh = 0, peakwidth = c(20, 50), + extendLengthMSW = TRUE) + expect_true(nrow(pks)==1) + + # Reducing minimum peakwidth shouldn't affect peak detection + pks_widerpeakwidth <- peaksWithCentWave(skinny_peak, rt=skinny_peak_rt, + snthresh = 0, peakwidth = c(2, 50), + extendLengthMSW = TRUE) + expect_true(nrow(pks)==nrow(pks_widerpeakwidth)) + + # Test a wider peak + # Values from round((dnorm(seq(-3, 3, length.out = 60))*100+runif(60))*10000) + wider_peak <- c(5000, 12043, 15344, 12748, 20730, 20781, 24673, 36956, 44600, + 48596, 57698, 76937, 89422, 106482, 122977, 143989, 157769, 181563, + 206296, 226309, 251067, 283592, 307523, 324212, 341520, 368568, + 375716, 388428, 401694, 408352, 399415, 403964, 394144, 382952, + 368333, 341668, 330255, 301146, 276234, 254643, 231601, 211038, + 184239, 155817, 140996, 123284, 100121, 90280, 77303, 58708, + 52817, 44003, 36068, 24637, 20688, 14162, 14836, 16603, 8341, + 8307) + wider_peak_rt <- seq_along(wider_peak)+100 + pks <- peaksWithCentWave(wider_peak, rt=wider_peak_rt, + snthresh = 0, peakwidth = c(20, 80), + extendLengthMSW = TRUE) + expect_true(nrow(pks)==1) + pks_widerpeakwidth <- peaksWithCentWave(skinny_peak, rt=skinny_peak_rt, + snthresh = 0, peakwidth = c(2, 80), + extendLengthMSW = TRUE) + expect_true(nrow(pks)==nrow(pks_widerpeakwidth)) + + ## Check errors expect_error(peaksWithCentWave()) expect_error(peaksWithCentWave(int = 1:3, rt = 1:5)) @@ -207,8 +302,10 @@ test_that("peaksWithCentWave works", { }) test_that(".narrow_rt_boundaries works", { + skip_on_os(os = "windows", arch = "i386") + d <- c(0, 0, 1, 2, 1, 3, 4, 6, 4, 3, 2, 0, 1, 0, 2, 0) - + ## Full range lm <- c(1, length(d)) res <- .narrow_rt_boundaries(lm, d) @@ -217,7 +314,7 @@ test_that(".narrow_rt_boundaries works", { expect_equal(res, c(3, 16)) res <- .narrow_rt_boundaries(lm, d, thresh = 3) expect_equal(res, c(5, 11)) - + ## Subset (reflecting the real situation). lm <- c(3, 9) res <- .narrow_rt_boundaries(lm, d) @@ -226,13 +323,13 @@ test_that(".narrow_rt_boundaries works", { expect_equal(res, c(3, 9)) res <- .narrow_rt_boundaries(lm, d, thresh = 3) expect_equal(res, c(5, 9)) - + lm <- c(3, 13) res <- .narrow_rt_boundaries(lm, d) expect_equal(res, c(3, 13)) res <- .narrow_rt_boundaries(lm, d, thresh = 3) expect_equal(res, c(5, 11)) - + ## That's the fix for issue #300 expect_equal(.narrow_rt_boundaries(lm, d, thresh = 100), lm) expect_equal(.narrow_rt_boundaries(c(1, length(d)), d, thresh = 100), diff --git a/tests/testthat/test_do_groupChromPeaks-functions.R b/tests/testthat/test_do_groupChromPeaks-functions.R index da1e9330c..b65fdb9ea 100644 --- a/tests/testthat/test_do_groupChromPeaks-functions.R +++ b/tests/testthat/test_do_groupChromPeaks-functions.R @@ -1,29 +1,40 @@ test_that("do_groupChromPeaks_density works", { - fts <- peaks(faahko) - res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) - res_2 <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko), + skip_on_os(os = "windows", arch = "i386") + + fts <- chromPeaks(xod_xg) + grps <- rep(1, 3) + res <- do_groupChromPeaks_density(fts, sampleGroups = grps) + res_2 <- do_groupChromPeaks_density(fts, sampleGroups = grps, minFraction = 0.9) - expect_true(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) + expect_true(nrow(res) > nrow(res_2)) }) test_that("do_groupPeaks_mzClust works", { - fts <- peaks(fticr_xs) + skip_on_os(os = "windows", arch = "i386") + + fts <- chromPeaks(fticr_xod) res <- do_groupPeaks_mzClust(peaks = fts, - sampleGroups = sampclass(fticr_xs)) + sampleGroups = c(1, 1)) res_2 <- do_groupPeaks_mzClust(peaks = fts, - sampleGroups = sampclass(fticr_xs), + sampleGroups = c(1, 1), minFraction = 0, absMz = 2) expect_true(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) - res_x <- group(fticr_xs, method = "mzClust") - expect_equal(res_x@groups, res$featureDefinitions) - expect_equal(res_x@groupidx, res$peakIndex) + ## Issue 416 + nas <- sample(1:nrow(fts), 10) + fts[nas, "mz"] <- NA + expect_warning(res <- .fix_mz_clust_peaks(fts), "Replaced them with the") + expect_false(any(is.na(res[, "mz"]))) + fts <- fts[, !(colnames(fts) %in% c("mzmin", "mzmax"))] + expect_error(res <- .fix_mz_clust_peaks(fts), "peaks with missing") }) test_that("do_groupChromPeaks_nearest works", { - xs <- faahko_xs - features <- peaks(xs) - sampleGroups <- sampclass(xs) + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterFile(xod_xg) + features <- chromPeaks(tmp) + sampleGroups <- rep(1, length(fileNames(tmp))) mzVsRtBalance <- 10 mzCheck <- 0.2 rtCheck <- 15 @@ -32,6 +43,42 @@ test_that("do_groupChromPeaks_nearest works", { res <- do_groupChromPeaks_nearest(features, sampleGroups) res_2 <- do_groupChromPeaks_nearest(features, sampleGroups, absRt = 3) expect_true(nrow(res$featureDefinitions) < nrow(res_2$featureDefinitions)) - res_x <- group(xs, method = "nearest") - expect_equal(res_x@groups, res$featureDefinitions) +}) + +test_that(".group_peaks_density works", { + skip_on_os(os = "windows", arch = "i386") + + x <- rbind(c(rt = 3.1, mz = 3, index = 1, sample = 1, into = 120), + c(rt = 3.2, mz = 3, index = 2, sample = 2, into = 130), + c(rt = 3.15, mz = 3, index = 3, sample = 3, into = 29), + c(rt = 5, mz = 3, index = 4, sample = 4, into = 32), + c(rt = 3, mz = 3, index = 5, sample = 5, into = 43), + c(rt = 6, mz = 3, index = 6, sample = 6, into = 35)) + densFrom <- 1 + densTo <- 100 + densN <- 100 + sampleGroups <- c("b", "a", "b", "a", "b", "a") + sampleGroupTable <- table(sampleGroups) + bw <- 2 + maxFeatures <- 20 + minFraction <- 0.8 + minSamples <- 1 + res <- .group_peaks_density(x, bw = bw, densFrom = densFrom, + densTo = densTo, densN = densN, + sampleGroups = sampleGroups, + sampleGroupTable = sampleGroupTable, + minFraction = minFraction, + minSamples = minSamples, + maxFeatures = maxFeatures, sleep = 0) + expect_true(nrow(res) == 1) + expect_equal(res$peakidx, list(1:6)) + res <- .group_peaks_density(x, bw = bw, densFrom = densFrom, + densTo = densTo, densN = densN, + sampleGroups = sampleGroups, + sampleGroupTable = sampleGroupTable, + minFraction = minFraction, + minSamples = 7, + maxFeatures = maxFeatures, sleep = 0) + expect_true(nrow(res) == 0) + expect_true(is(res, "data.frame")) }) diff --git a/tests/testthat/test_functions-Chromatogram.R b/tests/testthat/test_functions-Chromatogram.R new file mode 100644 index 000000000..6616d20b7 --- /dev/null +++ b/tests/testthat/test_functions-Chromatogram.R @@ -0,0 +1,64 @@ +test_that(".chrom_merge_neighboring_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + ints <- c(0.5, 1, 1, 3, 6, 9, 12, 13, 11, 6, 5, 3, 1, 1, 1.5, 1, 4, 6, + 8, 9, 8, 6, 3, 2, 1.3, 1, 0.7, 0.5, 1, 1, 1, 0.5, 3, 5, 8, + 12, 10, 9, 6, 3, 2, 1, 1, 1) + rts <- 1:length(ints) + chr <- Chromatogram(rts, ints) + cwp <- CentWaveParam(snthresh = 0, prefilter = c(1, 1), peakwidth = c(1, 4)) + xchr <- findChromPeaks(chr, param = cwp) + pkd <- chromPeakData(xchr) + pkd$index <- 1:nrow(pkd) + res <- .chrom_merge_neighboring_peaks(chr, chromPeaks(xchr), pkd) + expect_true(all(names(res) %in% c("chromPeaks", "chromPeakData"))) + rownames(res$chromPeaks) <- NULL + expect_equal(res$chromPeaks, chromPeaks(xchr)) + res <- .chrom_merge_neighboring_peaks(chr, chromPeaks(xchr), pkd, + diffRt = 5) + rownames(res$chromPeaks) <- NULL + expect_equal(res$chromPeaks, chromPeaks(xchr)) + res <- .chrom_merge_neighboring_peaks( + chr, chromPeaks(xchr)[1, , drop = FALSE], + pkd[1, ], + diffRt = 5) + rownames(res$chromPeaks) <- NULL + expect_equal(res$chromPeaks, chromPeaks(xchr)[1, , drop = FALSE]) + + set.seed(123) + ints <- c(0.5, 1, 1, 3, 6, 9, 12, 13, 11, 7, 6, 5.5, 5.2, 5, 5.2, 5.4, + 5.7, 6, 8, 9, 8, 6, 3, 2, 1.3, 1, 0.7, 0.5, 1, 1, 1, 0.5, 3, + 5, 8, 14, 10, 9, 6, 3, 2, 1, 1, 1) + rts <- 1:length(ints) + rnorm(length(ints), sd = 0.05) + chr <- Chromatogram(rts, ints) + xchr <- findChromPeaks(chr, param = cwp) + pks <- chromPeaks(xchr) + pkd <- chromPeakData(xchr) + pkd$index <- 1:nrow(pkd) + res <- .chrom_merge_neighboring_peaks( + chr, pks, pkd, diffRt = 5, minProp = 0.5) + expect_true(nrow(res$chromPeaks) == 2) + expect_equal(rownames(res$chromPeaks), c(NA_character_, "3")) + expect_equal(res$chromPeaks[1, "rtmin"], unname(pks[1, "rtmin"])) + expect_equal(res$chromPeaks[1, "rtmax"], unname(pks[2, "rtmax"])) + expect_equal(nrow(res$chromPeaks), nrow(res$chromPeakData)) + expect_equal(res$chromPeakData$index, c(1L, 3L)) + + res <- .chrom_merge_neighboring_peaks( + chr, pks, pkd, diffRt = 10, minProp = 0.01) + expect_true(nrow(res$chromPeaks) == 1) + expect_equal(res$chromPeaks[1, "rtmin"], unname(pks[1, "rtmin"])) + expect_equal(res$chromPeaks[1, "rtmax"], unname(pks[3, "rtmax"])) + expect_equal(res$chromPeakData$index, 3L) + + ## Check "into" calculation. + pks <- rbind(pks[-c(1, 2), ], + c(18, pks[2, "rtmin"], pks[2, "rtmin"] + 4, NA_real_, + NA_real_, 3, 3), + c(20, pks[2, "rtmax"] - 5, pks[2, "rtmax"], NA_real_, + NA_real_, 9, 8)) + res <- .chrom_merge_neighboring_peaks( + chr, pks, pkd, diffRt = 5, minProp = 0.75) + expect_equal(unname(res$chromPeaks[1, "into"]), + unname(chromPeaks(xchr)[2, "into"])) +}) diff --git a/tests/testthat/test_functions-OnDiskMSnExp.R b/tests/testthat/test_functions-OnDiskMSnExp.R index 72e4d6fae..bc720f182 100644 --- a/tests/testthat/test_functions-OnDiskMSnExp.R +++ b/tests/testthat/test_functions-OnDiskMSnExp.R @@ -1,20 +1,16 @@ test_that(".obiwarp works", { + skip_on_os(os = "windows", arch = "i386") + - xs <- faahko_xs od <- faahko_od xod <- faahko_xod - ## Feature alignment on those: - ## object <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 10000, - ## snthresh = 40)) prm <- ObiwarpParam(binSize = 1) - xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm)) - expect_equal(xs_2@rt$raw[[2]], xs_2@rt$corrected[[2]]) - expect_true(sum(xs_2@rt$raw[[1]] != xs_2@rt$corrected[[1]]) > 500) - expect_true(sum(xs_2@rt$raw[[3]] != xs_2@rt$corrected[[3]]) > 500) - + + raw_rt <- split(rtime(od), fromFile(od)) ## And the OnDiskMSnExp implementation: res <- .obiwarp(od, param = prm) - expect_equal(xs_2@rt$corrected, res) + expect_true(all(raw_rt[[1]] != res[[1]])) + expect_equal(res[[2]], unname(raw_rt[[2]])) res_2 <- adjustRtime(od, param = prm) res_3 <- adjustRtime(xod, param = prm) expect_equal(adjustedRtime(res_3), res_2) @@ -27,33 +23,37 @@ test_that(".obiwarp works", { expect_true(hasAdjustedRtime(res_3)) tmp <- dropAdjustedRtime(res_3) expect_equal(chromPeaks(tmp), chromPeaks(xod)) - + ## File issue on that! retcor.obiwarp does use round for the adjustment of ## the peak! ## -> issue #122 ## expect_equal(chromPeaks(res_3), peaks(xs_2)) - + ## Manually specify center Sample centerSample(prm) <- 3 - xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), center = centerSample(prm)) - expect_equal(xs_2@rt$raw[[centerSample(prm)]], - xs_2@rt$corrected[[centerSample(prm)]]) res <- .obiwarp(od, param = prm) - expect_equal(xs_2@rt$corrected, res) - ## change some settings - gapInit(prm) <- 3.1 - gapExtend(prm) <- 0.9 - xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), gapInit = gapInit(prm), - center = centerSample(prm), gapExtend = gapExtend(prm)) - expect_equal(xs_2@rt$raw[[centerSample(prm)]], - xs_2@rt$corrected[[centerSample(prm)]]) + expect_equal(res[[3]], unname(raw_rt[[3]])) + expect_true(all(res[[2]] != raw_rt[[2]])) + + ## With subset. + prm <- ObiwarpParam(binSize = 1, subset = c(1, 3)) + res <- .obiwarp(od, param = prm) + expect_equal(res[[1]], unname(raw_rt[[1]])) + expect_true(all(res[[2]] != unname(raw_rt[[2]]))) + expect_true(all(res[[3]] != unname(raw_rt[[3]]))) + + prm <- ObiwarpParam(binSize = 1, subset = c(2, 3)) res <- .obiwarp(od, param = prm) - expect_equal(xs_2@rt$corrected, res) + expect_equal(res[[2]], unname(raw_rt[[2]])) + expect_true(sum(res[[1]] == unname(raw_rt[[1]])) > 500) + expect_true(all(res[[3]] != unname(raw_rt[[3]]))) }) test_that(".concatenate_OnDiskMSnExp works", { - od1 <- readMSData(faahko_3_files[1], mode = "onDisk") - od2 <- readMSData(faahko_3_files[2:3], mode = "onDisk") + skip_on_os(os = "windows", arch = "i386") + + od1 <- filterFile(od_x, 1) + od2 <- filterFile(od_x, 2:3) res <- .concatenate_OnDiskMSnExp(od1, od2) expect_equal(fileNames(faahko_od), fileNames(res)) expect_equal(experimentData(faahko_od), experimentData(res)) @@ -80,4 +80,82 @@ test_that(".concatenate_OnDiskMSnExp works", { expect_equal(.concatenate_OnDiskMSnExp(od1), od1) }) +test_that(".split_by_file and .split_bu_file2 work", { + skip_on_os(os = "windows", arch = "i386") + + a <- lapply(seq_along(fileNames(od_x)), filterFile, object = od_x) + b <- .split_by_file(od_x, subsetFeatureData = FALSE) + expect_equal(a[[2]][[19]], b[[2]][[19]]) + expect_equal(spectra(a[[3]]), spectra(b[[3]])) + d <- .split_by_file2(od_x, subsetFeatureData = FALSE) + expect_equal(b, d) + + b_2 <- .split_by_file(od_x, subsetFeatureData = TRUE) + expect_true(ncol(fData(b_2[[1]])) < ncol(fData(b[[1]]))) + d_2 <- .split_by_file2(od_x, subsetFeatureData = TRUE) + + res <- .split_by_file(xod_xgr) + expect_true(length(res) == 3) + expect_equal(rtime(res[[2]]), + rtime(xod_xgr, bySample = TRUE, adjusted = TRUE)[[2]]) + expect_true(ncol(fData(res[[1]])) < ncol(fData(xod_xgr))) + res_2 <- .split_by_file(xod_xgr, subsetFeatureData = TRUE) + expect_equal(res, res_2) + expect_error(.split_by_file(xod_xgr, msLevel. = 2), "No MS level") + + a <- filterFile(xod_xgrg, 2, keepAdjustedRtime = TRUE) + b <- .split_by_file(xod_xgrg, to_class = "XCMSnExp") + expect_equal(rtime(a), rtime(b[[2]])) + expect_true(is(b[[1]], "XCMSnExp")) + expect_equal(chromPeaks(a), chromPeaks(b[[2]])) + expect_equal(chromPeakData(a), chromPeakData(b[[2]])) + d <- .split_by_file2(xod_xgrg, to_class = "XCMSnExp", + subsetFeatureData = TRUE) + expect_equal(b, d) +}) + +test_that(".estimate_prec_intensity works", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterRt(as(pest_dda, "OnDiskMSnExp"), c(300, 400)) + res <- .estimate_prec_intensity(tmp, method = "previous") + expect_true(length(res) == length(tmp)) + expect_true(all(is.na(res[msLevel(tmp) == 1L]))) + + res <- .estimate_prec_intensity(tmp, method = "interpolation") + expect_true(length(res) == length(tmp)) + expect_true(all(is.na(res[msLevel(tmp) == 1L]))) +}) +test_that("estimatePrecursorIntensity,OnDiskMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterRt(pest_dda, c(300, 400)) + res <- estimatePrecursorIntensity(tmp, BPPARAM = SerialParam()) + expect_true(length(res) == length(tmp)) + expect_true(all(is.na(res[msLevel(tmp) == 1L]))) + + expect_error(estimatePrecursorIntensity(od_x)) +}) + +test_that(".OnDiskMSnExp2MsBackendMzR works", { + skip_on_os(os = "windows", arch = "i386") + + if (requireNamespace("Spectra", quietly = TRUE)) { + res <- .OnDiskMSnExp2MsBackendMzR(xod_x) + expect_equal(Spectra::rtime(res), unname(rtime(xod_x))) + } +}) + +test_that(".fData2MsBackendMzR works", { + skip_on_os(os = "windows", arch = "i386") + + if (requireNamespace("Spectra", quietly = TRUE)) { + res <- .fData2MsBackendMzR(fData(xod_x), fileNames(xod_x)) + expect_true(is(res, "MsBackendMzR")) + expect_equal(Spectra::rtime(res), unname(rtime(xod_x))) + tmp <- xod_x[c(1, 45, 113)] + res <- .fData2MsBackendMzR(fData(tmp), fileNames(tmp)) + expect_equal(Spectra::rtime(res), unname(rtime(tmp))) + } +}) diff --git a/tests/testthat/test_functions-ProcessHistory.R b/tests/testthat/test_functions-ProcessHistory.R index c4947dfdf..f161d1437 100644 --- a/tests/testthat/test_functions-ProcessHistory.R +++ b/tests/testthat/test_functions-ProcessHistory.R @@ -1,4 +1,6 @@ test_that("ProcessHistory constructor and class works", { + skip_on_os(os = "windows", arch = "i386") + ph <- ProcessHistory() expect_true(inherits(ph, "ProcessHistory")) @@ -21,6 +23,8 @@ test_that("ProcessHistory constructor and class works", { }) test_that("XProcessHistory works", { + skip_on_os(os = "windows", arch = "i386") + ph <- XProcessHistory() expect_true(is(ph, "XProcessHistory")) expect_true(inherits(ph, "ProcessHistory")) @@ -42,6 +46,8 @@ test_that("XProcessHistory works", { }) test_that("GenericProcessHistory works", { + skip_on_os(os = "windows", arch = "i386") + xs <- list() xs <- c(xs, GenericProcessHistory(fun = "mean")) xs <- c(xs, GenericProcessHistory(fun = "median")) @@ -53,3 +59,25 @@ test_that("GenericProcessHistory works", { expect_true(length(xs) == 1) expect_equal(processParam(xs[[1]])@fun, "median") }) + +test_that(".process_history_subset_samples works", { + skip_on_os(os = "windows", arch = "i386") + + ph <- list(XProcessHistory(CentWaveParam())) + expect_equal(.process_history_subset_samples(ph), ph) + ph <- list(XProcessHistory(CentWaveParam(), fileIndex = 1:10)) + res <- .process_history_subset_samples(ph, 1:4) + expect_equal(res[[1]]@fileIndex, 1:4) + expect_true(is(res[[1]]@param, "CentWaveParam")) + ph <- list(XProcessHistory(CentWaveParam(), fileIndex = 1:7), + XProcessHistory(PeakDensityParam( + sampleGroups = c("a", "b", "a", "c", "c", "a", "b")))) + res <- .process_history_subset_samples(ph, c(2, 4, 1, 2)) + expect_equal(res[[1]]@fileIndex, 1:4) + expect_equal(res[[2]]@fileIndex, 1:4) + expect_equal(res[[2]]@param@sampleGroups, c("b", "c", "a", "b")) + res <- .process_history_subset_samples(ph, 5) + expect_equal(res[[1]]@fileIndex, 1) + expect_equal(res[[2]]@fileIndex, 1) + expect_equal(res[[2]]@param@sampleGroups, c("c")) +}) diff --git a/tests/testthat/test_functions-XCMSnExp.R b/tests/testthat/test_functions-XCMSnExp.R index 4319f5c88..064c4cec4 100644 --- a/tests/testthat/test_functions-XCMSnExp.R +++ b/tests/testthat/test_functions-XCMSnExp.R @@ -1,4 +1,6 @@ test_that("XCMSnExp new works", { + skip_on_os(os = "windows", arch = "i386") + ## Basic contructor. x <- new("XCMSnExp") x@.processHistory <- list("a") @@ -14,6 +16,8 @@ test_that("XCMSnExp new works", { }) test_that("adjustRtimePeakGroups works", { + skip_on_os(os = "windows", arch = "i386") + pkGrp <- adjustRtimePeakGroups(xod_xg, param = PeakGroupsParam(minFraction = 1)) expect_equal(colnames(pkGrp), basename(fileNames(xod_xg))) @@ -36,9 +40,15 @@ test_that("adjustRtimePeakGroups works", { ## rtime of the MS level 2 spectra are expected to be adjusted too expect_equal(rtime(xod_xgr), rtime(xod_mod_adj)) expect_true(all(rtime(xod_mod)[idx_ms2] != rtime(xod_mod_adj)[idx_ms2])) + + res <- adjustRtimePeakGroups(xod_xg, + param = PeakGroupsParam(subset = c(1, 3))) + expect_equal(colnames(res), basename(fileNames(xod_xg)[c(1, 3)])) }) test_that("plotAdjustedRtime works", { + skip_on_os(os = "windows", arch = "i386") + plotAdjustedRtime(xod_xgr, ylim = c(-20, 40)) plotAdjustedRtime(xod_xgrg) expect_warning(plotAdjustedRtime(xod_x)) @@ -46,19 +56,30 @@ test_that("plotAdjustedRtime works", { }) test_that("plotChromPeakDensity works", { + skip_on_os(os = "windows", arch = "i386") + mzr <- c(305.05, 305.15) + .plotChromPeakDensity(xod_x, mz = mzr) plotChromPeakDensity(xod_x, mz = mzr) ## Use the full range. + .plotChromPeakDensity(xod_x) plotChromPeakDensity(xod_x) - + + .plotChromPeakDensity(xod_x, mz = c(0, 1)) plotChromPeakDensity(xod_x, mz = c(0, 1)) - plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, xlim = c(2500, 4000)) + .plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, + xlim = c(2500, 4000)) + plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, + xlim = c(2500, 4000)) expect_error(plotChromPeakDensity(xod_x, mz = c(0, 1), type = "dunno")) + expect_error(.plotChromPeakDensity(xod_x, mz = c(0, 1), type = "dunno")) }) test_that("plotChromPeaks works", { + skip_on_os(os = "windows", arch = "i386") + ## Plot the full range. plotChromPeaks(xod_x) @@ -68,6 +89,8 @@ test_that("plotChromPeaks works", { }) test_that("plotChromPeakImage works", { + skip_on_os(os = "windows", arch = "i386") + plotChromPeakImage(xod_x, binSize = 30, log = FALSE) ## Check that it works if no peaks were found in one sample. tmp <- xod_x @@ -80,6 +103,8 @@ test_that("plotChromPeakImage works", { }) test_that("applyAdjustedRtime works", { + skip_on_os(os = "windows", arch = "i386") + expect_error(applyAdjustedRtime(faahko_od)) expect_equal(applyAdjustedRtime(faahko_xod), faahko_xod) ## Now really replacing the stuff. @@ -94,15 +119,11 @@ test_that("applyAdjustedRtime works", { }) test_that(".concatenate_XCMSnExp works", { - od1 <- readMSData(faahko_3_files[1], mode = "onDisk") - od2 <- readMSData(faahko_3_files[2], mode = "onDisk") - od3 <- readMSData(faahko_3_files[3], mode = "onDisk") - xod1 <- findChromPeaks(od1, param = CentWaveParam(noise = 10000, - snthresh = 40)) - xod2 <- findChromPeaks(od2, param = CentWaveParam(noise = 10000, - snthresh = 40)) - xod3 <- findChromPeaks(od3, param = CentWaveParam(noise = 10000, - snthresh = 40)) + skip_on_os(os = "windows", arch = "i386") + + xod1 <- filterFile(faahko_xod, 1) + xod2 <- filterFile(faahko_xod, 2) + xod3 <- filterFile(faahko_xod, 3) res <- .concatenate_XCMSnExp(xod1, xod2, xod3) expect_equal(pData(res), pData(faahko_xod)) expect_equal(fData(res), fData(faahko_xod)) @@ -114,6 +135,8 @@ test_that(".concatenate_XCMSnExp works", { }) test_that("filterFeatureDefinitions works", { + skip_on_os(os = "windows", arch = "i386") + tmp <- xod_xgrg expect_error(filterFeatureDefinitions("a")) expect_error(filterFeatureDefinitions(xod_xgr, 1:3)) @@ -134,11 +157,13 @@ test_that("filterFeatureDefinitions works", { }) test_that("featureSummary works", { + skip_on_os(os = "windows", arch = "i386") + expect_error(featureSummary(1:3)) expect_error(featureSummary(xod_xgrg, group = 1:5)) expect_error(featureSummary(xod_xgr)) - res <- xcms:::featureSummary(xod_xgrg) + res <- featureSummary(xod_xgrg) expect_equal(colnames(res), c("count", "perc", "multi_count", "multi_perc", "rsd")) expect_equal(rownames(res), rownames(featureDefinitions(xod_xgrg))) @@ -148,7 +173,7 @@ test_that("featureSummary works", { sd(z, na.rm = TRUE) / mean(z, na.rm = TRUE) }) expect_equal(rsds, res[, "rsd"]) - + res <- featureSummary(xod_xgrg, group = c(2, 1, 1)) expect_equal(colnames(res), c("count", "perc", "multi_count", "multi_perc", "rsd", "2_count", "2_perc", "2_multi_count", @@ -168,7 +193,7 @@ test_that("featureSummary works", { expect_equal(rsds, res[, "1_rsd"]) ## Columns except rsd are not allowed to have NAs expect_true(sum(is.na(res[, -grep(colnames(res), pattern = "rsd")])) == 0) - + res <- featureSummary(xod_xgrg, perSampleCounts = TRUE) expect_equal(colnames(res), c("count", "perc", "multi_count", "multi_perc", "rsd", @@ -176,6 +201,8 @@ test_that("featureSummary works", { }) test_that("overlappingFeatures works", { + skip_on_os(os = "windows", arch = "i386") + ## Errors expect_error(overlappingFeatures()) expect_error(overlappingFeatures(4)) @@ -187,6 +214,8 @@ test_that("overlappingFeatures works", { }) test_that("exportMetaboAnalyst works", { + skip_on_os(os = "windows", arch = "i386") + expect_error(exportMetaboAnalyst(xod_x)) expect_error(exportMetaboAnalyst(4)) expect_error(exportMetaboAnalyst(xod_xg)) @@ -207,4 +236,569 @@ test_that("exportMetaboAnalyst works", { res3 <- read.table(fl, sep = ",", row.names = 1, as.is = TRUE) colnames(res3) <- colnames(res) expect_equal(as.matrix(res3), res) + + res <- exportMetaboAnalyst(xod_xg, label = c("a", "a", "a"), + groupnames = TRUE) + expect_equal(rownames(res), c("Sample", "Label", groupnames(xod_xg))) +}) + +test_that("chromPeakSpectra works", { + skip_on_os(os = "windows", arch = "i386") + + ## For now we don't have MS1/MS2 data, so we have to stick to errors etc. + expect_error(ms2_mspectrum_for_all_peaks(xod_x, method = "other")) + expect_error(res <- chromPeakSpectra(od_x)) + expect_warning(res <- chromPeakSpectra(xod_x, return.type = "list")) + expect_true(length(res) == nrow(chromPeaks(xod_x))) + expect_equal(names(res), rownames(chromPeaks(xod_x))) + expect_warning(res <- chromPeakSpectra(xod_x, return.type = "MSpectra")) + expect_true(is(res, "MSpectra")) + expect_true(length(res) == 0) + expect_warning(res <- chromPeakSpectra(xod_x, msLevel = 1L)) + expect_true(length(res) == 0) + + dta <- pest_dda + + ## ms2_mspectrum_for_peaks_from_file + pks <- chromPeaks(dta) + pks[, "sample"] <- 5 + res_all <- ms2_mspectrum_for_peaks_from_file(dta, pks) + expect_equal(length(res_all), nrow(pks)) + expect_equal(names(res_all), rownames(pks)) + expect_true(any(lengths(res_all) > 1)) + tmp <- unlist(res_all) + expect_true(all(vapply(tmp, fromFile, integer(1)) == 5L)) + + res_sub <- ms2_mspectrum_for_peaks_from_file(dta, pks, method = "closest_rt") + expect_true(all(lengths(res_sub) <= 1)) + pks[, "mz"] <- NA + res_na <- ms2_mspectrum_for_peaks_from_file(dta, pks) + expect_true(all(lengths(res_na) == 0)) + + ## ms2_mspectrum_for_all_peaks + res_all <- ms2_mspectrum_for_all_peaks(dta) + expect_equal(rownames(chromPeaks(dta)), names(res_all)) + + ## With subset. + subs <- sample(1:nrow(chromPeaks(dta)), 20) + res_subs <- ms2_mspectrum_for_all_peaks(dta, subset = subs) + expect_true(all(lengths(res_subs[-subs]) == 0)) + + ## With Spectra + if (requireNamespace("Spectra", quietly = TRUE)) { + res <- chromPeakSpectra(pest_dda, msLevel = 1L, return.type = "Spectra", + method = "closest_rt") + expect_true(is(res, "Spectra")) + expect_equal(rtime(res), unname(chromPeaks(pest_dda)[, "rt"])) + + res <- chromPeakSpectra(pest_dda, msLevel = 2L, return.type = "List") + expect_true(is(res, "List")) + expect_true(length(res) == nrow(chromPeaks(pest_dda))) + } +}) + +test_that("featureSpectra works", { + skip_on_os(os = "windows", arch = "i386") + + ## For now we don't have MS1/MS2 data, so we have to stick to errors etc. + expect_error(ms2_mspectrum_for_features(xod_x, method = "other")) + expect_error(res <- featureSpectra(xod_x)) + expect_warning(res <- featureSpectra(xod_xg, return.type = "list")) + expect_true(length(res) == nrow(featureDefinitions(xod_xg))) + expect_equal(names(res), rownames(featureDefinitions(xod_xg))) + expect_warning(res <- featureSpectra(xod_xg, return.type = "MSpectra")) + expect_true(is(res, "MSpectra")) + expect_true(length(res) == 0) + expect_warning(res <- featureSpectra(xod_xg, msLevel = 1L)) + expect_true(length(res) == 0) + + res <- featureSpectra(xod_xg, method = "closest_rt", msLevel = 1L, + return.type = "List") + expect_equal(length(res), nrow(featureDefinitions(xod_xg))) + for (i in seq_along(res)) { + expect_true(is(res[[i]], "Spectra")) + } + + res2 <- featureSpectra(xod_xg, msLevel = 2L, return.type = "Spectra") + expect_true(length(res2) == 0) +}) + +test_that("featureChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + expect_error(featureChromatograms(xod_x)) + + fts <- rownames(featureDefinitions(xod_xgrg)) + chrs <- featureChromatograms(xod_xgrg, features = fts[c(1, 2, 1)]) + expect_true(ncol(chrs) == 3) + expect_equal(featureDefinitions(chrs)$row, 1:3) + expect_equal(featureValues(chrs), + featureValues(xod_xgrg)[fts[c(1, 2, 1)], ]) + chrs_ext <- featureChromatograms(xod_xgrg, expandRt = 2, + features = fts[c(1, 2, 1)]) + rts <- do.call(rbind, lapply(chrs, function(z) range(rtime(z)))) + rts_ext <- do.call(rbind, lapply(chrs_ext, function(z) range(rtime(z)))) + expect_true(all(rts[, 1] > rts_ext[, 1])) + expect_true(all(rts[, 2] < rts_ext[, 2])) + + expect_warning(res_n <- featureChromatograms(xod_xgrg, expandRt = 2, + features = fts[c(1, 2, 1)], + n = 1)) + expect_true(ncol(res_n) == 1) + fvals <- featureValues(xod_xgrg, value = "maxo", method = "maxint", + intensity = "maxo")[fts[c(1, 2, 1)], ] + fvals_sum <- apply(fvals, MARGIN = 2, sum, na.rm = TRUE) + expect_true(colnames(res_n) == "ko15.CDF") + expect_equal(chromPeaks(chrs[, 1])[, 1:11], chromPeaks(res_n)[, 1:11]) + expect_equal(featureValues(chrs[, 1]), featureValues(res_n)) + + res_n <- featureChromatograms(xod_xgrg, expandRt = 2, + features = fts[c(1, 2, 1)], n = 2) + expect_true(ncol(res_n) == 2) + expect_equal(featureValues(res_n), featureValues(chrs[, c(1, 3)])) + + res_2 <- featureChromatograms(xod_xgrg, features = c(1, 5)) + expect_equal(featureDefinitions(res_2)$row, 1:nrow(res_2)) + res_1 <- featureChromatograms(xod_xgrg, features = fts[c(1, 5)]) + expect_equal(res_1[1, ], res_2[1, ]) + expect_equal(res_1[2, 1], res_2[2, 1]) + expect_equal(res_1[2, 2], res_2[2, 2]) + expect_equal(res_1[2, 3], res_2[2, 3]) + + res_3 <- featureChromatograms(xod_xgrg, features = c("FT01", "FT05")) + expect_equal(res_2, res_3) + + res <- featureChromatograms(xod_xgrg, features = character()) + expect_true(nrow(res) == 0) + expect_error(featureChromatograms(xod_xgrg, + features = c(TRUE, FALSE, FALSE))) + expect_error(featureChromatograms(xod_xgrg, features = c(100000, 1000002))) + expect_error(featureChromatograms(xod_xgrg, features = c("a", "FT02"))) + + ## expandMz + res_4 <- featureChromatograms(xod_xgrg, features = c("FT01", "FT05"), + expandMz = 2) + expect_equal(mz(res_4)[1, ], mz(res_3)[1, ] + c(-2, 2)) + expect_true(sum(is.na(intensity(res_4[1, 2]))) < + sum(is.na(intensity(res_3[1, 2])))) + + ## Test with filled-in peaks. + xod_tmp <- groupChromPeaks( + xod_xgr, param = PeakDensityParam(sampleGroups = rep(1, 3), + minFraction = 0.25)) + xod_tmpf <- fillChromPeaks( + xod_tmp, param = FillChromPeaksParam(fixedRt = 30)) + fts <- c("FT036", "FT042") + fchrs <- featureChromatograms(xod_tmp, features = fts, filled = TRUE) + fchrsf <- featureChromatograms(xod_tmpf, features = fts, filled = TRUE) + expect_equal(nrow(chromPeaks(fchrs)), 4) + expect_equal(nrow(chromPeaks(fchrsf)), 6) + expect_equal(chromPeakData(fchrsf)$is_filled, c(TRUE, FALSE, TRUE, FALSE, + FALSE, FALSE)) + expect_equal(featureDefinitions(fchrs)$peakidx, list(1, c(2, 3, 4))) + expect_equal(featureDefinitions(fchrsf)$peakidx, list(1:3, 4:6)) + fchrsf2 <- featureChromatograms(xod_tmpf, features = fts, filled = FALSE) + expect_equal(chromPeaks(fchrsf2), chromPeaks(fchrs)) + expect_equal(featureDefinitions(fchrsf2), featureDefinitions(fchrs)) + expect_equal(featureValues(fchrsf2), featureValues(fchrs)) +}) + +test_that("highlightChromPeaks works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- c(279, 279) + rtr <- c(2700, 2850) + chr <- chromatogram(xod_xgrg, mz = mzr, rt = rtr) + plot(chr) + pks <- chromPeaks(xod_xgrg, mz = mzr, rt = rtr, type = "apex_within") + highlightChromPeaks(xod_xgrg, mz = mzr, rt = rtr) + highlightChromPeaks(xod_xgrg, mz = mzr, rt = rtr, type = "polygon", + col = c("#ff000020", "#00ff0020", "#0000ff20")) + expect_error(highlightChromPeaks(xod_xgrg, peakIds = c("a", "b"))) + + highlightChromPeaks(xod_xgrg, mz = mzr, rt = c(rtr[1] - 20, rtr[2] + 20), + type = "rect", col = c("#00000040")) + plot(chr) + highlightChromPeaks(xod_xgrg, mz = mzr, rt = c(rtr[1] - 30, rtr[2] + 20), + type = "polygon", + col = c("#ff000020", "#00ff0020", "#0000ff20")) +}) + +test_that(".swath_collect_chrom_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + obj <- dropChromPeaks(pest_swth) + msf <- new("MsFeatureData") + ## msf@.xData <- .copy_env(obj@msFeatureData) + cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 30), prefilter = c(3, 1000)) + x <- lapply(split(obj, f = isolationWindowTargetMz(obj)), + findChromPeaks, msLevel = 2L, param = cwp) + res <- .swath_collect_chrom_peaks(x, msf, fileNames(obj)) + expect_equal(names(res), c("chromPeakData", "chromPeaks")) + + x_mod <- x + x_mod[[2]] <- dropChromPeaks(x_mod[[2]]) + chromPeaks(x_mod[[2]]) <- chromPeaks(x_mod[[3]])[integer(), ] + msf <- new("MsFeatureData") + ## msf@.xData <- .copy_env(obj@msFeatureData) + res_mod <- .swath_collect_chrom_peaks(x_mod, msf, fileNames(obj)) + + a <- chromPeaks(res_mod) + b <- chromPeaks(res)[chromPeakData(res)$isolationWindowTargetMZ != 208.95, ] + expect_equal(unname(a), unname(b)) + + ## obj <- findChromPeaks(obj, param = cwp) + ## msf <- new("MsFeatureData") + ## msf@.xData <- .copy_env(obj@msFeatureData) + ## x <- lapply(split(obj, f = isolationWindowTargetMz(obj)), + ## findChromPeaks, msLevel = 2L, param = cwp) + ## res_2 <- .swath_collect_chrom_peaks(x, msf, fileNames(obj)) + ## expect_true(nrow(chromPeaks(res_2)) > nrow(chromPeaks(res))) + ## expect_equal(chromPeaks(obj), + ## chromPeaks(res_2)[1:nrow(chromPeaks(obj)), ]) + ## expect_equal(nrow(chromPeaks(res_2)), + ## nrow(chromPeaks(obj)) + nrow(chromPeaks(res))) + + ## expect_equal(colnames(chromPeakData(res_2)), + ## c("ms_level", "is_filled", "isolationWindow", + ## "isolationWindowTargetMZ", + ## "isolationWindowLowerMz", + ## "isolationWindowUpperMz")) + + ## expect_true(hasChromPeaks(obj)) + ## msf <- new("MsFeatureData") + ## msf@.xData <- xcms:::.copy_env(obj@msFeatureData) + ## cwp <- CentWaveParam(snthresh = 200, noise = 1000, ppm = 10, + ## peakwidth = c(3, 30)) + ## x <- lapply(split(obj, f = isolationWindowTargetMz(obj)), + ## findChromPeaks, msLevel = 2L, param = cwp) + ## res_3 <- xcms:::.swath_collect_chrom_peaks(x, msf, fileNames(obj)) + ## ## First two isolation windows do not have any peaks. + ## target_mz <- unique(isolationWindowTargetMz(obj)) + ## target_mz <- target_mz[!is.na(target_mz)] + + ## expect_equal( + ## sort(intersect(res_3$chromPeakData$isolationWindowTargetMZ, target_mz)), + ## sort(target_mz)[-1]) + ## expect_equal(chromPeaks(obj), + ## chromPeaks(res_3)[1:nrow(chromPeaks(obj)), ]) + + ## No chromPeaks found: + cwp <- CentWaveParam(snthresh = 10000, noise = 1e6, + prefilter = c(4, 10000)) + x <- lapply(split(obj, f = isolationWindowTargetMz(obj)), + findChromPeaks, msLevel = 2L, param = cwp) + res_5 <- .swath_collect_chrom_peaks(x, msf, fileNames(obj)) + expect_equal(res_5, msf) +}) + +test_that("findChromPeaksIsolationWindow works", { + skip_on_os(os = "windows", arch = "i386") + + obj <- filterRt(as(pest_swth, "OnDiskMSnExp"), rt = c(0, 300)) + cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 30), prefilter = c(2, 1000)) + res <- findChromPeaksIsolationWindow(obj, param = cwp) + expect_true(is(res, "XCMSnExp")) + expect_equal(length(processHistory(res)), 1) + expect_true(all(c("isolationWindow", "isolationWindowTargetMZ") %in% + colnames(chromPeakData(res)))) + expect_true(all(chromPeakData(res)$ms_level == 2L)) + + ## Add to existing peaks + obj <- findChromPeaks(obj, param = cwp) + res_2 <- findChromPeaksIsolationWindow(obj, param = cwp) + expect_equal(chromPeaks(res_2)[1:nrow(chromPeaks(obj)), , drop = FALSE], + chromPeaks(obj)) + expect_true(length(processHistory(res_2)) == 2) + + ## no isolation window/add isolation window + expect_error(findChromPeaksIsolationWindow(od_x), "are NA") + tmp <- od_x + cwp <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000)) + fData(tmp)$my_win <- 1 + res_3 <- findChromPeaksIsolationWindow( + tmp, param = cwp, isolationWindow = fData(tmp)$my_win, msLevel = 1L) + expect_equal(chromPeaks(xod_x), chromPeaks(res_3)) + expect_true(all(chromPeakData(res_3)$isolationWindow == 1)) + + res_4 <- findChromPeaksIsolationWindow( + xod_x, param = cwp, isolationWindow = rep(1, length(xod_x)), + msLevel = 1L) + expect_equal(chromPeaks(res_4)[1:nrow(chromPeaks(xod_x)), ], + chromPeaks(xod_x)) +}) + +test_that("reconstructChromPeakSpectra works", { + skip_on_os(os = "windows", arch = "i386") + + res <- reconstructChromPeakSpectra( + pest_swth, peakId = rownames(chromPeaks(pest_swth))[5:6]) + expect_true(length(res) == 2) + expect_s4_class(res, "Spectra") + expect_true(length(intensity(res)[[2]]) == 2) + + ## errors + expect_error(reconstructChromPeakSpectra(od_x), "object with") + + ## peakId + res_3 <- reconstructChromPeakSpectra(pest_swth, peakId = c("CP06")) + expect_identical(intensity(res_3), intensity(res[2])) + + expect_warning(res <- reconstructChromPeakSpectra( + pest_swth, peakId = c("CP06", "other"))) + expect_identical(mz(res_3), mz(res)) + expect_error(reconstructChromPeakSpectra(pest_swth, peakId = c("a", "b")), + "None of the provided") +}) + +test_that(".plot_XIC works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- c(453, 453.5) + rtr <- c(2400, 2700) + + tmp <- filterMz(filterRt(xod_x, rtr), mzr) + .plot_XIC(tmp) + + mzr <- c(301.9, 302.1) + rtr <- c(2500, 2650) + tmp <- filterMz(filterRt(xod_x, rtr), mzr) + .plot_XIC(tmp, peakCol = "#ff0000", lwd = 10) +}) + +test_that(".group_overlapping_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + mzmin <- c(123.3, 123.35, 123.5, 341, 342.1, 343.2, 564, 564.3) + mzmax <- c(123.4, 123.5, 124, 342, 343, 344, 564.1, 566) + pks <- cbind(mzmin, mzmax) + rownames(pks) <- letters[1:nrow(pks)] + + res <- .group_overlapping_peaks(pks) + expect_true(is.list(res)) + expect_true(all(lengths(res) > 0)) + expect_equal(res[[1]], c("a", "b", "c")) + + res <- .group_overlapping_peaks(pks, expand = 0.05) + expect_true(length(res) == 5) + expect_equal(res[[1]], c("a", "b", "c")) + expect_equal(res[[2]], c("d", "e")) + + res <- .group_overlapping_peaks(pks, expand = 0.1) + expect_true(length(res) == 3) + expect_equal(res[[1]], c("a", "b", "c")) + expect_equal(res[[2]], c("d", "e", "f")) + expect_equal(res[[3]], c("g", "h")) +}) + +test_that(".merge_neighboring_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + xod_x1 <- filterFile(xod_x, 1L) + res <- .merge_neighboring_peaks(xod_x1, expandRt = 4) + expect_true(is.list(res)) + expect_true(is.matrix(res$chromPeaks)) + expect_true(is(res$chromPeakData, "DataFrame")) + expect_true(nrow(res$chromPeakData) == nrow(res$chromPeaks)) + expect_true(nrow(res$chromPeaks) < nrow(chromPeaks(xod_x1))) + + mz_groups <- .group_overlapping_peaks(chromPeaks(xod_x1), ppm = 10) + mz_groups <- mz_groups[lengths(mz_groups) > 1] + + ## mz of 305.1: nice example of a split peak. + tmp <- chromPeaks(xod_x1)[mz_groups[[1]], ] + mzr <- range(tmp[, c("mzmin", "mzmax")]) + chr <- chromatogram(xod_x1, mz = mzr) + ## plot(chr) + pks <- res$chromPeaks + pks <- pks[pks[, "mzmin"] >= mzr[1] & pks[, "mzmax"] <= mzr[2], ] + expect_true(nrow(pks) == 2) + expect_true(nrow(pks) < nrow(chromPeaks(xod_x1, mz = mzr))) + ## rect(pks[, "rtmin"], 0, pks[, "rtmax"], pks[, "maxo"], border = "red") + + ## mz of 462.2: + tmp <- chromPeaks(xod_x1)[mz_groups[[4]], ] + mzr <- range(tmp[, c("mzmin", "mzmax")]) + chr <- chromatogram(xod_x1, mz = mzr) + ## plot(chr) + pks <- res$chromPeaks + res_mzr <- pks[pks[, "mzmin"] >= mzr[1] & pks[, "mzmax"] <= mzr[2], , drop = FALSE] + ## rect(res_mzr[, "rtmin"], 0, res_mzr[, "rtmax"], res_mzr[, "maxo"], border = "red") + expect_true(nrow(res_mzr) == 1) + + ## mz of 496.2: two peaks that DON'T get merged (and that's OK). + tmp <- chromPeaks(xod_x1)[mz_groups[[5]], ] + mzr <- range(tmp[, c("mzmin", "mzmax")]) + mzr <- mzr + c(-0.01, 0.01) + chr <- chromatogram(xod_x1, mz = mzr) + pks <- res$chromPeaks + pks <- pks[pks[, "mzmin"] >= mzr[1] & pks[, "mzmax"] <= mzr[2], ] + ## plot(chr) + ## rect(res_mzr[, "rtmin"], 0, res_mzr[, "rtmax"], res_mzr[, "maxo"], border = "red") + expect_true(nrow(pks) == 2) + expect_true(nrow(pks) == nrow(chromPeaks(xod_x1, mz = mzr))) + expect_equal(rownames(pks), rownames(chromPeaks(chr))) +}) + +test_that(".XCMSnExp2SummarizedExperiment works", { + skip_on_os(os = "windows", arch = "i386") + + expect_error(.XCMSnExp2SummarizedExperiment(xod_x), "No correspondence") + + res <- .XCMSnExp2SummarizedExperiment(xod_xgrg) + expect_equal(SummarizedExperiment::assay(res), featureValues(xod_xgrg)) + + res <- .XCMSnExp2SummarizedExperiment(xod_xgrg, value = "maxo") + expect_equal(SummarizedExperiment::assay(res), + featureValues(xod_xgrg, value = "maxo")) + + res <- quantify(xod_xgrg, value = "intb") + expect_equal(SummarizedExperiment::assay(res), + featureValues(xod_xgrg, value = "intb")) +}) + +test_that(".features_ms_region works", { + skip_on_os(os = "windows", arch = "i386") + + res <- .features_ms_region(xod_xgrg, msLevel = 1L) + expect_equal(nrow(res), nrow(featureDefinitions(xod_xgrg))) + expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax")) + expect_true(all(res[, "mzmin"] <= res[, "mzmax"])) + expect_true(all(res[, "rtmin"] < res[, "rtmax"])) + + expect_error(.features_ms_region(xod_xgrg, msLevel = 1L, + features = c("a", "b")), "not available") +}) + +test_that(".which_peaks_above_threshold works", { + skip_on_os(os = "windows", arch = "i386") + + xsub <- filterRt(filterFile(xod_x, 1L), rt = c(2500, 3500)) + pks <- chromPeaks(xsub) + res <- .chrom_peaks_above_threshold(xsub, threshold = 100, nValues = 4) + expect_equal(res, rep(TRUE, nrow(pks))) + res <- .chrom_peaks_above_threshold(xsub, threshold = 50000, nValues = 1) + expect_equal(res, unname(pks[, "maxo"] >= 50000)) + + res <- .chrom_peaks_above_threshold(xsub, threshold = 50000, nValues = 1, + msLevel = 2L) + expect_equal(res, rep(TRUE, nrow(pks))) +}) + +test_that("manualChromPeaks works", { + skip_on_os(os = "windows", arch = "i386") + + cp <- cbind(mzmin = c(453, 301.9, 100), + mzmax = c(453.5, 302.1, 102), + rtmin = c(2400, 2500, 2460), + rtmax = c(2700, 2650, 2500)) + ## Errors + expect_error(manualChromPeaks(xod_x, msLevel = 1:2), "can only add") + expect_error(manualChromPeaks(1:2), "either an OnDiskMSnExp") + expect_error(manualChromPeaks(xod_x, 1:2), "lacks one or more of the") + expect_error(manualChromPeaks(xod_x, cp, samples = 10), "out of bounds") + ## With an XCMSnExp + res <- manualChromPeaks(xod_x, cp) + expect_true(nrow(chromPeaks(res)) > nrow(chromPeaks(xod_x))) + expect_equal(chromPeaks(res)[!is.na(chromPeaks(res)[, "intb"]), ], + chromPeaks(xod_x)) + ## With an OnDiskMSnExp + res2 <- manualChromPeaks(od_x, cp) + expect_true(is(res2, "XCMSnExp")) + expect_true(hasChromPeaks(res2)) + + res3 <- manualChromPeaks(od_x, cp, samples = 2) + expect_true(all(chromPeaks(res3)[, "sample"] == 2)) +}) + +test_that(".spectra_for_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + if (requireNamespace("Spectra", quietly = TRUE)) { + res_all <- .spectra_for_peaks(pest_dda, method = "all") + expect_true(length(res_all) == nrow(chromPeaks(pest_dda))) + res_1 <- .spectra_for_peaks(pest_dda, method = "closest_rt") + expect_true(all(lengths(res_1) < 2)) + + res <- .spectra_for_peaks(pest_dda, msLevel = 3) + expect_true(all(lengths(res) == 0)) + + res <- .spectra_for_peaks(pest_dda, msLevel = 1L, method = "closest_rt") + res <- do.call(c, unname(res)) + expect_equal(unname(rtime(res)), unname(chromPeaks(pest_dda)[, "rt"])) + + expect_warning(res <- .spectra_for_peaks(pest_dda, msLevel = 1L, + method = "signal"), "Changing") + + res_56 <- .spectra_for_peaks(pest_dda, method = "all", peaks = c(5, 6)) + expect_equal(res_56[[1]], res_all[[5]]) + expect_equal(res_56[[2]], res_all[[6]]) + } +}) + +test_that(".spectra_for_features works", { + skip_on_os(os = "windows", arch = "i386") + + if (requireNamespace("Spectra", quietly = TRUE)) { + res <- .spectra_for_features(xod_xgrg, method = "closest_rt", + msLevel = 2L) + expect_true(length(res) == + nrow(featureDefinitions(xod_xgrg))) + expect_true(all(lengths(res) == 0)) + + res <- .spectra_for_features(xod_xgrg, method = "closest_rt", + msLevel = 1L) + expect_true(all(vapply(res, is, logical(1), "Spectra"))) + fds <- featureDefinitions(xod_xgrg) + for (i in seq_len(nrow(fds))) { + expect_true(all(res[[i]]$feature_id == rownames(fds)[i])) + } + + ## with subset + idx <- c(1, 400) + expect_error(.spectra_for_features( + xod_xg, msLevel = 1L, features = idx), "out of bounds") + res_all <- .spectra_for_features(xod_xg, msLevel = 1L) + res_sub <- .spectra_for_features(xod_xg, msLevel = 1L, + features = c(5, 12, 45)) + res_sub2 <- .spectra_for_features( + xod_xg, msLevel = 1L, + features = rownames(featureDefinitions(xod_xg))[c(5, 12, 45)]) + expect_equal(length(res_sub), 3) + expect_equal(rtime(res_sub[[1L]]), rtime(res_all[[5L]])) + expect_equal(rtime(res_sub[[2L]]), rtime(res_all[[12L]])) + expect_equal(rtime(res_sub[[3L]]), rtime(res_all[[45L]])) + + expect_equal(length(res_sub), length(res_sub2)) + expect_equal(rtime(res_sub[[1L]]), rtime(res_sub2[[1L]])) + expect_equal(rtime(res_sub[[2L]]), rtime(res_sub2[[2L]])) + expect_equal(rtime(res_sub[[3L]]), rtime(res_sub2[[3L]])) + } +}) + +test_that("manualFeatures works", { + skip_on_os(os = "windows", arch = "i386") + + idx <- list(1:4, c(4, "a")) + expect_error(manualFeatures(od_x, idx), "XCMSnExp") + ## Add features to an XCMSnExp without features. + expect_error(manualFeatures(xod_x, idx), "out of bounds") + idx <- list(1:4, c(5, 500, 500)) + expect_error(manualFeatures(xod_x, idx), "out of bounds") + idx <- list(1:5, c(6, 34, 234)) + res <- manualFeatures(xod_x, idx) + expect_true(hasFeatures(res)) + expect_true(nrow(featureDefinitions(res)) == 2) + expect_equal(featureDefinitions(res)$peakidx, idx) + ## Append features to an XCMSnExp. + idx <- featureDefinitions(xod_xg)$peakidx[c(3, 5, 7)] + res <- manualFeatures(xod_xg, idx) + nfd <- nrow(featureDefinitions(xod_xg)) + expect_true(nrow(featureDefinitions(res)) == nfd + 3) + expect_equal(featureDefinitions(res)[nfd + 1, "mzmed"], + featureDefinitions(xod_xg)[3, "mzmed"]) + expect_equal(featureDefinitions(res)[nfd + 2, "rtmin"], + featureDefinitions(xod_xg)[5, "rtmin"]) }) diff --git a/tests/testthat/test_functions-XChromatogram.R b/tests/testthat/test_functions-XChromatogram.R new file mode 100644 index 000000000..145d254a8 --- /dev/null +++ b/tests/testthat/test_functions-XChromatogram.R @@ -0,0 +1,112 @@ +test_that(".validXChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + xc <- new("XChromatogram") + expect_true(.validXChromatogram(xc)) + xc@chromPeaks <- matrix(1:10, ncol = 5) + expect_true(is.character(.validXChromatogram(xc))) + mat <- matrix("A", ncol = 6, nrow = 2) + xc@chromPeaks <- mat + expect_true(is.character(.validXChromatogram(xc))) + mat <- matrix(ncol = 6, nrow = 2) + colnames(mat) <- .CHROMPEAKS_REQ_NAMES + mat[, "rtmin"] <- c(3, 3) + mat[, "rtmax"] <- c(3, 4) + xc@chromPeaks <- mat + expect_true(is.character(.validXChromatogram(xc))) + xc@chromPeakData <- DataFrame(ms_level = c(1L, 1L), is_filled = FALSE) + expect_true(.validXChromatogram(xc)) + xc@chromPeakData <- DataFrame(ms_level = 1L, is_filled = TRUE) + expect_true(is.character(.validXChromatogram(xc))) + xc@chromPeakData <- DataFrame(ms_level = c(1L, 1L), other = "a") + expect_true(is.character(.validXChromatogram(xc))) + xc@chromPeaks[, "rtmin"] <- c(4, 3) + expect_true(is.character(.validXChromatogram(xc))) +}) + +test_that("XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + xc <- XChromatogram(rtime = 1:10, intensity = 1:10) + expect_true(nrow(xc@chromPeaks) == 0) + expect_true(nrow(xc@chromPeakData) == 0) + expect_equal(rtime(xc), 1:10) + expect_true(validObject(xc)) + + xc <- XChromatogram() + expect_true(nrow(xc@chromPeaks) == 0) + expect_true(nrow(xc@chromPeakData) == 0) + expect_error(XChromatogram(chr, 4)) + expect_true(validObject(xc)) + + pks <- matrix(nrow = 2, ncol = length(.CHROMPEAKS_REQ_NAMES), + dimnames = list(character(), .CHROMPEAKS_REQ_NAMES)) + pks[, "rtmin"] <- c(2, 4) + pks[, "rtmax"] <- c(3, 5) + xc <- XChromatogram(rtime = 1:10, intensity = 1:10, chromPeaks = pks) + expect_equal(xc@chromPeaks[, "rtmin"], c(2, 4)) + expect_true(validObject(xc)) + expect_true(is.logical(xc@chromPeakData$is_filled)) + expect_true(is.integer(xc@chromPeakData$ms_level)) + expect_equal(xc@chromPeakData$ms_level, c(1L, 1L)) + + df <- DataFrame(ms_level = c(1L, 2L)) + xc <- XChromatogram(rtime = 1:10, intensity = 1:10, chromPeaks = pks, + chromPeakData = df) + expect_true(validObject(xc)) + expect_equal(xc@chromPeakData$ms_level, c(1L, 2L)) +}) + +test_that(".add_chromatogram_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + xc <- XChromatogram(rtime = 1:10, intensity = c(2, 5, 12, 32, 38, 21, 13, + 5, 5, 9)) + plot(xc) + pks <- matrix(c(5, 3, 7, NA, 38, NA), nrow = 1, + dimnames = list(character(), c("rt", "rtmin", "rtmax", + "into", "maxo", "sn"))) + .add_chromatogram_peaks(xc, pks, type = "point", pch = 16, + col = "red", bg = "black") + .add_chromatogram_peaks(xc, pks, type = "rectangle", pch = 16, + col = "red", bg = NA) + .add_chromatogram_peaks(xc, pks, type = "polygon", col = "#00ff0020", + bg = "#00ff0060") +}) + +test_that(".xchrom_merge_neighboring_peaks and refineChromPeaks works", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- filterFile(xod_x, 1) + mzr <- 305.1 + c(-0.01, 0.01) + chr <- chromatogram(tmp, mz = mzr) + res <- .xchrom_merge_neighboring_peaks(chr[1, 1], diffRt = 6) + expect_true(nrow(chromPeaks(res)) == 2) + expect_true(sum(chromPeakData(res)$merged) == 1) + + res_2 <- refineChromPeaks(chr[1, 1], + param = MergeNeighboringPeaksParam(expandRt = 3)) + expect_true(validObject(res_2)) + expect_equal(res, res_2) +}) + +test_that(".filter_chrom_peaks_keep_top works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- chromatogram(xod_x, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + a <- chrs[1, 1] + res <- .filter_chrom_peaks_keep_top(a, decreasing = TRUE, n = 2) + expect_true(all(res@chromPeaks[, "maxo"] > min(a@chromPeaks[, "maxo"]))) + + res <- .filter_chrom_peaks_keep_top(a, decreasing = TRUE, n = 5) + expect_equal(a, res) + + a <- chrs[1, 2] + res <- .filter_chrom_peaks_keep_top(a, decreasing = TRUE, n = 2) + expect_true(nrow(chromPeaks(res)) == 0) + + a <- chrs[1, 3] + res <- .filter_chrom_peaks_keep_top(a, decreasing = TRUE, n = 2) + expect_equal(chromPeaks(res), chromPeaks(a)[c(1, 3), ]) +}) diff --git a/tests/testthat/test_functions-XChromatograms.R b/tests/testthat/test_functions-XChromatograms.R new file mode 100644 index 000000000..58d03415d --- /dev/null +++ b/tests/testthat/test_functions-XChromatograms.R @@ -0,0 +1,185 @@ +test_that("XChromatograms, as, validator, hasChromPeaks work", { + skip_on_os(os = "windows", arch = "i386") + + chr1 <- Chromatogram(rtime = 1:8, + intensity = c(3, 24.2, 343, 32, 3.3, 5, 2, 9)) + chr2 <- Chromatogram(rtime = 1:4, intensity = c(45, 3, 34, 2)) + chr3 <- Chromatogram(rtime = 1:7, intensity = c(12, 34, 54, 34, 23, 2, NA)) + chr4 <- Chromatogram(rtime = 1:3, intensity = c(3, 4, 1)) + chr5 <- Chromatogram(rtime = 1:6, intensity = c(3, 4, 6, 7, 2, 4)) + chr6 <- Chromatogram(rtime = 2:5, intensity = c(3, 65, 43, 12)) + chrs <- MChromatograms(list(chr1, chr2, chr3, chr4, chr5, chr6), nrow = 2) + + expect_error(new("XChromatograms", matrix(list(chr1, chr2), nrow = 1))) + + res <- as(chrs, "XChromatograms") + expect_true(validObject(res)) + expect_true(is(res, "XChromatograms")) + + colnames(chrs) <- c("A", "B", "C") + res <- as(chrs, "XChromatograms") + expect_equal(colnames(res), colnames(chrs)) + + xchrs <- XChromatograms(list(chr1, chr2, chr3), ncol = 3) + expect_equal(ncol(xchrs), 3) + expect_true(is(xchrs, "XChromatograms")) + expect_true(all(vapply(xchrs, is, logical(1), "XChromatogram"))) + + pks1 <- matrix(c(3, 2, 4, 339.2, 343, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + pks3 <- matrix(c(3, 2, 4, 145, 54, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + pks6 <- matrix(c(2, 2, 3, 108, 65, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + ## With peak matrix. + xchrs1 <- XChromatograms(list(chr1, chr2, chr3, chr4, chr5, chr6), ncol = 3, + chromPeaks = list(pks1, NULL, pks3, NULL, NULL, + pks6)) + expect_true(is(xchrs1, "XChromatograms")) + expect_equal(unname(hasChromPeaks(xchrs1)[1, ]), c(TRUE, TRUE, FALSE)) + expect_equal(unname(hasChromPeaks(xchrs1)[2, ]), c(FALSE, FALSE, TRUE)) + + xchrs1 <- XChromatograms(list(chr1, chr2, chr3, chr4, chr5, chr6), ncol = 3, + chromPeaks = list(pks1, NULL, pks3, NULL, NULL, + pks6), byrow = TRUE) + expect_true(is(xchrs1, "XChromatograms")) + expect_equal(unname(hasChromPeaks(xchrs1)[1, ]), c(TRUE, FALSE, TRUE)) + expect_equal(unname(hasChromPeaks(xchrs1)[2, ]), c(FALSE, FALSE, TRUE)) + expect_equal(intensity(chr3), intensity(xchrs1[1, 3])) + + ## With XChromatogram objects + xchr1 <- as(chr1, "XChromatogram") + xchr3 <- as(chr3, "XChromatogram") + xchr4 <- as(chr4, "XChromatogram") + xchr6 <- as(chr6, "XChromatogram") + chromPeaks(xchr1) <- pks1 + chromPeaks(xchr3) <- pks3 + chromPeaks(xchr6) <- pks6 + + xchrs2 <- XChromatograms(list(xchr1, xchr4, xchr3, xchr6), ncol = 2) + expect_equal(unname(hasChromPeaks(xchrs2)[1, ]), c(TRUE, TRUE)) + expect_equal(unname(hasChromPeaks(xchrs2)[2, ]), c(FALSE, TRUE)) + expect_equal(chromPeaks(xchrs2[1, 2]), chromPeaks(xchr3)) +}) + +test_that(".subset_chrom_peaks_xchromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + ## Matrix is: with elements + ## A B C D 2 1 3 1 + ## E F G H 1 4 0 2 + ## I J K L 3 2 1 3 + + testm <- data.frame(el = c("A", "A", "E", "I", "I", "I", + "B", "F", "F", "F", "F", "J", "J", + "C", "C", "C", "K", + "D", "H", "H", "L", "L", "L"), + row = c(1, 1, 2, 3, 3, 3, + 1, 2, 2, 2, 2, 3, 3, + 1, 1, 1, 3, + 1, 2, 2, 3, 3, 3), + column = c(1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4), + stringsAsFactors = FALSE) + + res <- .subset_chrom_peaks_xchromatograms(testm, i = 2:3, j = 2:4) + expect_equal(res$el, c("F", "F", "F", "F", "H", "H", + "J", "J", "K", "L", "L", "L")) + expect_equal(res$row, c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2)) + expect_equal(res$column, c(1, 1, 1, 1, 3, 3, 1, 1, 2, 3, 3, 3)) + + res <- .subset_chrom_peaks_xchromatograms(testm, i = c(3, 1), j = 2:4) + expect_equal(res$el, c("J", "J", "K", "L", "L", "L", + "B", "C", "C", "C", "D")) + expect_equal(res$row, c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2)) + expect_equal(res$column, c(1, 1, 2, 3, 3, 3, 1, 2, 2, 2, 3)) + + res <- .subset_chrom_peaks_xchromatograms(testm, i = 2, j = c(3, 2)) + expect_equal(res$el, c("F", "F", "F", "F")) + expect_equal(res$row, c(1, 1, 1, 1)) + expect_equal(res$column, c(2, 2, 2, 2)) + + res <- .subset_chrom_peaks_xchromatograms(testm, i = 2, j = c(3, 2, 4)) + expect_equal(res$el, c("F", "F", "F", "F", "H", "H")) + expect_equal(res$row, c(1, 1, 1, 1, 1, 1)) + expect_equal(res$column, c(2, 2, 2, 2, 3, 3)) + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + chrs <- chromatogram(od_x, mz = mzr) + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + + pks <- chromPeaks(chrs) + rownames(pks) <- letters[1:nrow(pks)] + res <- .subset_chrom_peaks_xchromatograms(pks, j = c(3, 1, 2)) + expect_equal(rownames(res), c("f", "g", "a", "b", "c", "d", "e", + "l", "m", "h", "i", "j", "k")) + res <- .subset_chrom_peaks_xchromatograms(pks, i = c(2, 1), j = c(2, 1, 3)) + expect_equal(rownames(res), c("j", "k", "h", "i", "l", "m", + "e", "a", "b", "c", "d", "f", "g")) +}) + +test_that(".subset_features_on_chrom_peaks works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- as(od_chrs, "XChromatograms") + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + + fts <- featureDefinitions(chrs) + pks <- chromPeaks(chrs) + res <- .subset_features_on_chrom_peaks(fts, pks, pks) + expect_equal(fts, res) + + pks_sub <- pks[1:2, ] + res <- .subset_features_on_chrom_peaks(fts, pks, pks_sub) + expect_equal(rownames(res), "FT1") + expect_equal(res$peakidx, list(1)) + + pks_sub <- pks[c(8, 10, 12), ] + res <- .subset_features_on_chrom_peaks(fts, pks, pks_sub) + expect_equal(rownames(res), "FT3") + expect_equal(res$peakidx, list(1:3)) + + pks_sub <- pks[2, , drop = FALSE] + res <- .subset_features_on_chrom_peaks(fts, pks, pks_sub) + expect_true(nrow(res) == 0) + + pks_sub <- rbind(pks, pks[pks[, "row"] == 1, ]) + pks_sub[(nrow(pks)+1):nrow(pks_sub), "row"] <- 3 + fts <- rbind(fts, fts[fts$row == 1, ]) + fts$row[5:6] <- 3 + res <- .subset_features_on_chrom_peaks(fts, pks, pks_sub) + expect_true(nrow(res) == 6) + expect_equal(pks_sub[res$peakidx[[1]], "into"], + pks_sub[res$peakidx[[5]], "into"]) + expect_equal(pks_sub[res$peakidx[[2]], "into"], + pks_sub[res$peakidx[[6]], "into"]) +}) + +test_that(".plot_chrom_peak_density works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- as(od_chrs, "XChromatograms") + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + + pks_1 <- chromPeaks(chrs[1, ]) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + .plot_chrom_peak_density(pks_1, param = prm, lwd = 2) + expect_warning(.plot_chrom_peak_density(pks_1, param = prm, + peakCol = c(1, 2))) + ## An individual color for each point. + .plot_chrom_peak_density(pks_1, param = prm, peakCol = 1:nrow(pks_1), + peakPch = 16) + + chrs <- groupChromPeaks(chrs, param = prm) + pks_1 <- chromPeaks(chrs[1, ]) + fts_1 <- featureDefinitions(chrs[1, ]) + .plot_chrom_peak_density(pks_1, fts = fts_1, param = prm, + peakCol = 1:nrow(pks_1), peakPch = 16, + simulate = FALSE) +}) diff --git a/tests/testthat/test_functions-binning.R b/tests/testthat/test_functions-binning.R index fa358b859..8a28fb3d2 100644 --- a/tests/testthat/test_functions-binning.R +++ b/tests/testthat/test_functions-binning.R @@ -1,4 +1,6 @@ test_that("binYonX NA handling works", { + skip_on_os(os = "windows", arch = "i386") + ## NA values in y should be ignored internally. X <- 1:10 Y <- X @@ -21,12 +23,14 @@ test_that("binYonX NA handling works", { }) test_that("binYonX max works", { + skip_on_os(os = "windows", arch = "i386") + X <- 1:10 Y <- 1:10 breakMidPoint <- function(x) { return((x[-1L] + x[-length(x)])/2) } - + ## o nBins res <- binYonX(X, Y, nBins = 5L) expect_equal(res$y, c(2, 4, 6, 8, 10)) @@ -141,67 +145,68 @@ test_that("binYonX max works", { expect_equal(res$x, breakMidPoint(brks)) ## Test on real data: - xr <- deepCopy(faahko_xr_1) - X <- xr@env$mz - Y <- xr@env$intensity + xr <- filterFile(faahko_od, 1) + X <- mz(xr) + Y <- intensity(xr) xRangeFull <- range(X) - scanidx <- xr@scanindex ## Get the data from the first spectrum: - X1 <- X[1:scanidx[2]] - Y1 <- Y[1:scanidx[2]] + X1 <- X[[1]] + Y1 <- Y[[2]] ## #### ## Define the number of bins. - step <- 0.1 - shift <- TRUE - mass <- seq(floor(min(xRangeFull)/step)*step, - ceiling(max(xRangeFull)/step)*step, by = step) - nBins <- length(mass) - resR <- xcms:::profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), - toX = max(xRangeFull), shiftByHalfBinSize = shift) - res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), - binToX = max(xRangeFull), shiftByHalfBinSize = shift, - baseValue = 0) - expect_equal(res$y, resR) - - ## Next - step <- 0.2 - shift <- TRUE - mass <- seq(floor(min(xRangeFull)/step)*step, - ceiling(max(xRangeFull)/step)*step, by = step) - nBins <- length(mass) - resR <- xcms:::profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), - toX = max(xRangeFull), shiftByHalfBinSize = shift) - res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), - binToX = max(xRangeFull), shiftByHalfBinSize = shift, - baseValue = 0) - expect_equal(res$y, resR) - shift <- FALSE - mass <- seq(floor(min(xRangeFull)/step)*step, - ceiling(max(xRangeFull)/step)*step, by = step) - nBins <- length(mass) - resR <- xcms:::profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), - toX = max(xRangeFull), shiftByHalfBinSize = shift) - res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), - binToX = max(xRangeFull), shiftByHalfBinSize = shift, - baseValue = 0) - expect_equal(res$y, resR) - - step <- 0.13 - shift <- TRUE - mass <- seq(floor(min(xRangeFull)/step)*step, - ceiling(max(xRangeFull)/step)*step, by = step) - nBins <- length(mass) - resR <- xcms:::profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), - toX = max(xRangeFull), shiftByHalfBinSize = shift) - res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), - binToX = max(xRangeFull), shiftByHalfBinSize = shift, - baseValue = 0) - expect_equal(res$y, resR) + ## step <- 0.1 + ## shift <- TRUE + ## mass <- seq(floor(min(xRangeFull)/step)*step, + ## ceiling(max(xRangeFull)/step)*step, by = step) + ## nBins <- length(mass) + ## resR <- profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), + ## toX = max(xRangeFull), shiftByHalfBinSize = shift) + ## res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), + ## binToX = max(xRangeFull), shiftByHalfBinSize = shift, + ## baseValue = 0) + ## expect_equal(res$y, resR) + + ## ## Next + ## step <- 0.2 + ## shift <- TRUE + ## mass <- seq(floor(min(xRangeFull)/step)*step, + ## ceiling(max(xRangeFull)/step)*step, by = step) + ## nBins <- length(mass) + ## resR <- profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), + ## toX = max(xRangeFull), shiftByHalfBinSize = shift) + ## res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), + ## binToX = max(xRangeFull), shiftByHalfBinSize = shift, + ## baseValue = 0) + ## expect_equal(res$y, resR) + ## shift <- FALSE + ## mass <- seq(floor(min(xRangeFull)/step)*step, + ## ceiling(max(xRangeFull)/step)*step, by = step) + ## nBins <- length(mass) + ## resR <- profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), + ## toX = max(xRangeFull), shiftByHalfBinSize = shift) + ## res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), + ## binToX = max(xRangeFull), shiftByHalfBinSize = shift, + ## baseValue = 0) + ## expect_equal(res$y, resR) + + ## step <- 0.13 + ## shift <- TRUE + ## mass <- seq(floor(min(xRangeFull)/step)*step, + ## ceiling(max(xRangeFull)/step)*step, by = step) + ## nBins <- length(mass) + ## resR <- profBinR(X1, Y1, nBins = nBins, fromX = min(xRangeFull), + ## toX = max(xRangeFull), shiftByHalfBinSize = shift) + ## res <- binYonX(X1, Y1, nBins = nBins, binFromX = min(xRangeFull), + ## binToX = max(xRangeFull), shiftByHalfBinSize = shift, + ## baseValue = 0) + ## expect_equal(res$y, resR) }) ## Test binning using min test_that("binYonX min works", { + skip_on_os(os = "windows", arch = "i386") + X <- 1:10 breakMidPoint <- function(x) { return((x[-1L] + x[-length(x)])/2) @@ -258,6 +263,8 @@ test_that("binYonX min works", { ## Test binning using sum test_that("binYonX sum works", { + skip_on_os(os = "windows", arch = "i386") + X <- 1:10 breakMidPoint <- function(x) { return((x[-1L] + x[-length(x)])/2) @@ -313,6 +320,8 @@ test_that("binYonX sum works", { ## Test binning using mean test_that("binYonX mean works", { + skip_on_os(os = "windows", arch = "i386") + X <- 1:10 breakMidPoint <- function(x) { return((x[-1L] + x[-length(x)])/2) @@ -343,6 +352,8 @@ test_that("binYonX mean works", { }) test_that("breaks defining functions work", { + skip_on_os(os = "windows", arch = "i386") + ## Test generation of breaks for binning. ## o nBins res <- breaks_on_nBins(1, 10, 4) @@ -379,7 +390,7 @@ test_that("breaks defining functions work", { binSize = 0.2) expect_equal(brks, brksR) ## - ## Ultimate fix for issue #118 + ## Ultimate fix for issue #118 brksR <- seq((200 - 0.1), (600), by = 0.2) brks <- breaks_on_binSize((200 - 0.1), (600), binSize = 0.2) ## Compare them up to the last value, since in R that will be 600-01, while @@ -394,6 +405,8 @@ test_that("breaks defining functions work", { }) test_that("binYonX with imputation_lin works", { + skip_on_os(os = "windows", arch = "i386") + X <- 1:11 brks <- breaks_on_nBins(1, 11, 5L) Y <- c(1, NA, NA, NA, 5, 6, NA, NA, 9, 10, 11) @@ -464,6 +477,8 @@ test_that("binYonX with imputation_lin works", { }) test_that("binYonX with imputation_linbin works", { + skip_on_os(os = "windows", arch = "i386") + doPlot <- FALSE ## Construct example: ## We're using the same test than we did for profBinLinBase. @@ -560,6 +575,8 @@ test_that("binYonX with imputation_linbin works", { }) ## test_that("testIntegerInput and testDoubleInput works", { + skip_on_os(os = "windows", arch = "i386") + ## xcms:::testIntegerInput(4) ## xcms:::testIntegerInput(c(4, 5)) @@ -570,6 +587,8 @@ test_that("binYonX with imputation_linbin works", { ## }) test_that("binYonX on subsets works", { + skip_on_os(os = "windows", arch = "i386") + ## Simple test without actually needing subsets. X <- 1:11 Y <- 1:11 diff --git a/tests/testthat/test_functions-imputation.R b/tests/testthat/test_functions-imputation.R index 8cc28514c..1d864ff1e 100644 --- a/tests/testthat/test_functions-imputation.R +++ b/tests/testthat/test_functions-imputation.R @@ -1,22 +1,34 @@ test_that("imputeRowMin works", { + skip_on_os(os = "windows", arch = "i386") + mat <- cbind(c(4, 2, 4, NA, NA), c(NA, NA, NA, NA, NA), c(4, NA, 6, 3, 9), c(6, 3, NA, 6, NA)) mat_imp <- imputeRowMin(mat) - expect_equal(mat_imp[, 2], c(4, 2, 4, 3, 9)) + expect_equal(mat_imp[, 2], c(2, 1, 2, 1.5, 4.5)) mat_imp <- imputeRowMin(mat[, -3]) - expect_equal(mat_imp[, 2], c(4, 2, 4, 6, NA)) + expect_equal(mat_imp[, 2], c(2, 1, 2, 3, NA)) }) test_that("imputeRowMinRand works", { + skip_on_os(os = "windows", arch = "i386") + set.seed(123) mat <- cbind(c(4, 2, 4, NA, NA), c(NA, NA, NA, NA, NA), c(4, NA, 6, 3, 9), c(6, 3, NA, 6, NA)) mat_imp <- imputeRowMinRand(mat) rmin <- apply(mat, 1, min, na.rm = TRUE) rmin_imp <- apply(mat_imp, 1, min, na.rm = TRUE) - expect_true(all(rmin_imp < rmin / 4)) - + expect_true(all(rmin_imp < rmin)) + + mat_2 <- imputeRowMinRand(mat, method = "from_to") + expect_true(all(!is.na(mat_2))) + rmin_imp2 <- apply(mat_2, 1, min, na.rm = TRUE) + expect_true(all(rmin_imp2 < rmin)) + expect_true(all(rmin_imp2 < rmin / 2)) + expect_true(all(rmin_imp2 > rmin / 1000)) + expect_true(all(rmin_imp != rmin_imp2)) + mat_imp <- imputeRowMinRand(mat[, -3]) expect_true(all(is.na(mat_imp[5, ]))) }) diff --git a/tests/testthat/test_functions-normalization.R b/tests/testthat/test_functions-normalization.R index dab777ab1..891476fc7 100644 --- a/tests/testthat/test_functions-normalization.R +++ b/tests/testthat/test_functions-normalization.R @@ -1,7 +1,9 @@ test_that("fitModel works", { + skip_on_os(os = "windows", arch = "i386") + vals <- featureValues(xod_xgrg) dat <- data.frame(injection_idx = 1:length(fileNames(xod_xgrg))) - fits <- xcms:::rowFitModel(formula = y ~ injection_idx, y = vals, + fits <- rowFitModel(formula = y ~ injection_idx, y = vals, minVals = 3, data = dat) ## Check that we've got NA for features with less than 3 values. nas <- apply(vals, MARGIN = 1, function(z) any(is.na(z))) @@ -9,12 +11,14 @@ test_that("fitModel works", { ## Check that robustbase would work if (requireNamespace("robustbase", quietly = TRUE)) - fits <- xcms:::rowFitModel(formula = y ~ injection_idx, y = vals, + fits <- rowFitModel(formula = y ~ injection_idx, y = vals, minVals = 3, data = dat, method = "lmrob") }) test_that("model adjustment with batch works", { + skip_on_os(os = "windows", arch = "i386") + ## Here we test that linear model based adjustment with a batch is ## working. y <- c(2, 3, 2.7, 3.5, 3.8, 4.6, 5.9, 8, 4, 5.1, 5.6, 6.8, 7.1, 8.1, 8.9, @@ -28,7 +32,7 @@ test_that("model adjustment with batch works", { btch = btch)) ## y_new <- y - prd + mean(y) y_new <- y - prd + mean(lmod$fitted.values + lmod$residuals) - + points(inj_idx, y_new, pch = 16, col = "grey") expect_equal(mean(y), mean(y_new)) @@ -43,49 +47,51 @@ test_that("model adjustment with batch works", { ## Check if we have only NAs in one batch y[btch == "a"] <- NA - mdl <- xcms:::fitModel(y ~ inj_idx + btch, y = y, data = dta) + mdl <- fitModel(y ~ inj_idx + btch, y = y, data = dta) expect_true(is.na(mdl)) ## plot(x = dta$inj_idx, y_2) ## abline(sum(mdl$coefficients[c(1, 3)]), mdl$coefficients[2]) ## ## Adjust. - ## res <- xcms:::applyModelAdjustment(y = y_2, data = dta, lmod = mdl) + ## res <- applyModelAdjustment(y = y_2, data = dta, lmod = mdl) ## points(dta$inj_idx, res, pch = 16) ## mean(res, na.rm = TRUE) ## mean(y_2, na.rm = TRUE) }) test_that("fitModel, rowFitModel works on matrix and vector", { + skip_on_os(os = "windows", arch = "i386") + y <- c(2, 3, 2.7, 3.5, 3.8, 4.6, 5.9, 8, 4, 5.1, 5.6, 6.8, 7.1) inj_idx <- 1:length(y) dta <- data.frame(inj_idx = inj_idx) - expect_error(xcms:::fitModel()) - expect_error(xcms:::fitModel(formula = ~ y)) - expect_error(xcms:::fitModel(formula = ~ y, data = dta)) - expect_error(xcms:::fitModel(formula = ~ inj_idx, data = dta, y = y)) - expect_error(xcms:::fitModel(y ~ inj_idx, data = dta, y = y, method = "adfd")) - expect_error(xcms:::fitModel(y ~ inj_idx, data = dta, y = y, weights = 3)) - - res <- xcms:::fitModel(y ~ inj_idx, data = dta, y = y) + expect_error(fitModel()) + expect_error(fitModel(formula = ~ y)) + expect_error(fitModel(formula = ~ y, data = dta)) + expect_error(fitModel(formula = ~ inj_idx, data = dta, y = y)) + expect_error(fitModel(y ~ inj_idx, data = dta, y = y, method = "adfd")) + expect_error(fitModel(y ~ inj_idx, data = dta, y = y, weights = 3)) + + res <- fitModel(y ~ inj_idx, data = dta, y = y) expect_equal(res$coefficients, lm(y ~ inj_idx)$coefficients) - rres <- xcms:::fitModel(y ~ inj_idx, data = dta, y = y, method = "lmrob") + rres <- fitModel(y ~ inj_idx, data = dta, y = y, method = "lmrob") expect_true(all(res$coefficients != rres$coefficients)) ## Test with weights - res2 <- xcms:::fitModel(y ~ inj_idx, data = dta, y = y, + res2 <- fitModel(y ~ inj_idx, data = dta, y = y, weights = abs(rnorm(length(y)))) expect_true(all(res$coefficients != res2$coefficients)) - rres2 <- xcms:::fitModel(y ~ inj_idx, data = dta, y = y, method = "lmrob", + rres2 <- fitModel(y ~ inj_idx, data = dta, y = y, method = "lmrob", weights = abs(rnorm(length(y)))) expect_true(all(rres$coefficients != rres2$coefficients)) - + ymat <- matrix(rep(y, 5), nrow = 5, byrow = TRUE) - res_3 <- xcms:::fitModel(y ~ inj_idx, data = dta, y = ymat) + res_3 <- fitModel(y ~ inj_idx, data = dta, y = ymat) expect_equal(res_3$coefficients, res$coefficients) ## rowFitModel ymat[2, ] <- ymat[2, ] + 3 - res <- xcms:::rowFitModel(y ~ inj_idx, data = dta, y = ymat) + res <- rowFitModel(y ~ inj_idx, data = dta, y = ymat) expect_true(length(res) == nrow(ymat)) expect_equal(res[[1]]$coefficients, res_3$coefficients) slps <- vapply(res, function(z) z$coefficients[2], numeric(1)) @@ -98,42 +104,44 @@ test_that("fitModel, rowFitModel works on matrix and vector", { ## y being a matrix, weights a vector. wght <- abs(rnorm(length(y))) - res <- xcms:::fitModel(y ~ inj_idx, data = dta, y = y, weights = wght) - res_mat <- xcms:::rowFitModel(y ~ inj_idx, data = dta, y = ymat, + res <- fitModel(y ~ inj_idx, data = dta, y = y, weights = wght) + res_mat <- rowFitModel(y ~ inj_idx, data = dta, y = ymat, weights = wght) expect_equal(res$coefficients, res_mat[[1]]$coefficients) expect_equal(res$coefficients, res_mat[[3]]$coefficients) expect_equal(res$coefficients, res_mat[[4]]$coefficients) expect_equal(res$coefficients, res_mat[[5]]$coefficients) - + ## y being a matrix, weights a matrix. wght <- rbind(wght, abs(rnorm(length(y))), abs(rnorm(length(y))), wght, abs(rnorm(length(y)))) - res_mat <- xcms:::rowFitModel(y ~ inj_idx, data = dta, y = ymat, + res_mat <- rowFitModel(y ~ inj_idx, data = dta, y = ymat, weights = wght) expect_equal(res_mat[[1]]$coefficients, res$coefficients) expect_equal(res_mat[[4]]$coefficients, res$coefficients) expect_true(all(res_mat[[2]]$coefficients != res$coefficients)) expect_true(all(res_mat[[3]]$coefficients != res$coefficients)) expect_true(all(res_mat[[5]]$coefficients != res$coefficients)) - + ## Errors for weights. - expect_error(xcms:::rowFitModel(y ~ inj_idx, data = dta, y = ymat, + expect_error(rowFitModel(y ~ inj_idx, data = dta, y = ymat, weights = 1:3)) - expect_error(xcms:::rowFitModel(y ~ inj_idx, data = dta, y = ymat, + expect_error(rowFitModel(y ~ inj_idx, data = dta, y = ymat, weights = wght[1:3, ])) }) test_that("applyModelAdjustment works", { + skip_on_os(os = "windows", arch = "i386") + y <- c(2, 3, 2.7, 3.5, 3.8, 4.6, 5.9, 8, 4, 5.1, 5.6, 6.8, 7.1) inj_idx <- 1:length(y) btch <- c(rep("a", 8), rep("b", 5)) dta <- data.frame(inj_idx = inj_idx, batch = btch) - mdl <- xcms:::fitModel(y ~ ii, data = data.frame(ii = inj_idx), y = y) + mdl <- fitModel(y ~ ii, data = data.frame(ii = inj_idx), y = y) ## A single model, single row. - res <- xcms:::applyModelAdjustment(y, data.frame(ii = inj_idx), lmod = mdl) + res <- applyModelAdjustment(y, data.frame(ii = inj_idx), lmod = mdl) plot(inj_idx, y) abline(mdl) points(inj_idx, res, col = "grey", pch = 16) @@ -141,17 +149,17 @@ test_that("applyModelAdjustment works", { expect_true(lm(res ~ inj_idx)$coefficients[2] < 1e-7) ## Model with only batch - mdl <- xcms:::fitModel(y ~ batch, data = dta, y = y) - res <- xcms:::applyModelAdjustment(y, dta, lmod = mdl) + mdl <- fitModel(y ~ batch, data = dta, y = y) + res <- applyModelAdjustment(y, dta, lmod = mdl) plot(inj_idx, y) abline(mdl) points(inj_idx, res, col = "grey", pch = 16) expect_equal(mean(res), mean(y)) expect_equal(mean(res[dta$batch == "a"]), mean(res[dta$batch == "b"])) - + ## Model with batch-specific slope - mdl <- xcms:::fitModel(y ~ inj_idx * batch, data = dta, y = y) - res <- xcms:::applyModelAdjustment(y, dta, lmod = mdl) + mdl <- fitModel(y ~ inj_idx * batch, data = dta, y = y) + res <- applyModelAdjustment(y, dta, lmod = mdl) plot(inj_idx, y) points(inj_idx, res, col = "grey", pch = 16) expect_equal(mean(res), mean(y)) @@ -160,17 +168,17 @@ test_that("applyModelAdjustment works", { ## A single model on a matrix. ymat <- matrix(rep(y, 5), nrow = 5, byrow = TRUE) ymat[2, ] <- ymat[2, ] + 3 - mdl <- xcms:::fitModel(y ~ ii, data = data.frame(ii = inj_idx), y = y) - res <- xcms:::applyModelAdjustment(y, data.frame(ii = inj_idx), lmod = mdl) - resm <- xcms:::applyModelAdjustment(ymat, data.frame(ii = inj_idx), + mdl <- fitModel(y ~ ii, data = data.frame(ii = inj_idx), y = y) + res <- applyModelAdjustment(y, data.frame(ii = inj_idx), lmod = mdl) + resm <- applyModelAdjustment(ymat, data.frame(ii = inj_idx), lmod = mdl) expect_equal(resm[1, ], res) expect_true(lm(resm[2, ] ~ inj_idx)$coefficients[2] < 1e-7) ## multiple models with a matrix. - mdls <- xcms:::rowFitModel(y ~ ii, data = data.frame(ii = inj_idx), + mdls <- rowFitModel(y ~ ii, data = data.frame(ii = inj_idx), y = ymat) - resm <- xcms:::applyModelAdjustment(ymat, data.frame(ii = inj_idx), + resm <- applyModelAdjustment(ymat, data.frame(ii = inj_idx), lmod = mdls) expect_equal(resm[1, ], res) expect_equal(resm[2, ], res + 3) @@ -186,6 +194,8 @@ test_that("applyModelAdjustment works", { }) test_that("replaceNaOnEnds works", { + skip_on_os(os = "windows", arch = "i386") + x <- c(NA, 3, 4, 6, 4, 2, NA, 3, NA, 4, 5, 6, NA) expect_equal(replaceNaOnEnds(x), c(3, 3, 4, 6, 4, 2, NA, 3, NA, 4, 5, 6, 6)) diff --git a/tests/testthat/test_functions-utils.R b/tests/testthat/test_functions-utils.R index 2f429cb34..b19111260 100644 --- a/tests/testthat/test_functions-utils.R +++ b/tests/testthat/test_functions-utils.R @@ -1,8 +1,11 @@ test_that(".createProfileMatrix works", { - xr <- deepCopy(faahko_xr_1) - mz <- xr@env$mz - int <- xr@env$intensity - numPerSc <- diff(c(xr@scanindex, length(xr@env$mz))) + skip_on_os(os = "windows", arch = "i386") + + xr <- filterFile(faahko_od, 1) + mz <- mz(xr) + int <- unlist(intensity(xr), use.names = FALSE) + numPerSc <- lengths(mz) + mz <- unlist(mz, use.names = FALSE) ## Testing all properties. ## o bin pm <- .createProfileMatrix(mz = mz, int = int, @@ -44,11 +47,15 @@ test_that(".createProfileMatrix works", { }) test_that("plotMsData works", { + skip_on_os(os = "windows", arch = "i386") + msd <- extractMsData(faahko_od, mz = c(334.9, 335.1), rt = c(2700, 2900)) plotMsData(msd[[1]]) }) test_that(".featureIDs works", { + skip_on_os(os = "windows", arch = "i386") + res <- .featureIDs(200) expect_equal(length(res), 200) expect_true(length(unique(res)) == 200) @@ -57,6 +64,8 @@ test_that(".featureIDs works", { }) test_that("rla, rowRla work", { + skip_on_os(os = "windows", arch = "i386") + x <- c(3, 4, 5, 1, 2, 3, 7, 8, 9) grp <- c(1, 1, 1, 2, 2, 2, 3, 3, 3) res <- rla(x, grp) @@ -78,6 +87,8 @@ test_that("rla, rowRla work", { }) test_that(".rect_overlap works", { + skip_on_os(os = "windows", arch = "i386") + xl <- c(1, 3, 1.5, 4, 4, 5.5, 7, 6) xr <- c(2, 4, 3.5, 5, 5, 6.5, 8, 7.5) yb <- c(1, 2, 3.5, 4.5, 7, 8, 9.5, 10.5) @@ -150,7 +161,7 @@ test_that(".rect_overlap works", { labels = names(xl_2)) res <- .rect_overlap(xl_2, xr_2, yb_2, yt_2) expect_equal(res, list(c(1:4), 6:8)) - + idx <- sample(1:length(xl_2), length(xl_2)) xl_2 <- xl_2[idx] xr_2 <- xr_2[idx] @@ -170,3 +181,278 @@ test_that(".rect_overlap works", { (all(names(xl_2)[res[[2]]] %in% c("f", "g", "h"))))) }) + +test_that(".insertColumn works", { + skip_on_os(os = "windows", arch = "i386") + + mat <- matrix(1:100, ncol = 5) + expect_equal(.insertColumn(mat), mat) + + expect_error(.insertColumn(mat, 3)) + expect_error(.insertColumn(mat, 3, 3:4)) + + res <- .insertColumn(mat, 3, 5) + expect_true(all(res[, 3] == 5)) + expect_equal(res[, -3], mat) + + res <- .insertColumn(mat, c(2, 4), 6) + expect_true(ncol(res) == ncol(mat) + 2) + expect_equal(mat, res[, -c(2, 4)]) + expect_true(all(res[, 2] == 6)) + expect_true(all(res[, 4] == 6)) + + res <- .insertColumn(mat, c(2, 4), list(101:120)) + expect_true(ncol(res) == ncol(mat) + 2) + expect_equal(res[, 2], 101:120) + expect_equal(res[, 4], 101:120) +}) + +test_that(".ppm_range works", { + skip_on_os(os = "windows", arch = "i386") + + res <- .ppm_range(100) + expect_equal(res[1], 100) + expect_equal(res[2], 100) + res <- .ppm_range(100, 100) + expect_equal(res[1], 100 - 5000 / 1e6) + expect_equal(res[2], 100 + 5000 / 1e6) +}) + +test_that(".update_feature_definitions works", { + skip_on_os(os = "windows", arch = "i386") + + cps <- matrix(nrow = 22, ncol = 3) + rownames(cps) <- 1:22 + fts <- DataFrame(a = letters[1:6]) + pidx <- list( + c(1, 2, 3, 6, 9, 12), + c(5, 10, 22), + c(4, 9, 13, 14, 15, 16, 17), + c(11, 15, 18, 19), + c(17, 20, 21, 22), + c(5, 13, 17) + ) + fts$peakidx <- pidx + cps_sub <- cps[c(4, 6, 17, 19), ] + res <- .update_feature_definitions(fts, rownames(cps), rownames(cps_sub)) + expect_equal(res$a, c("a", "c", "d", "e", "f")) + expect_equal(res$peakidx[[1]], c(2)) + expect_equal(res$peakidx[[2]], c(1, 3)) + expect_equal(res$peakidx[[3]], c(4)) + expect_equal(res$peakidx[[4]], c(3)) + cps_sub <- cps[1:10, ] + res <- .update_feature_definitions(fts, rownames(cps), rownames(cps_sub)) + expect_equal(res$a, c("a", "b", "c", "f")) + expect_equal(res$peakidx[[1]], c(1, 2, 3, 6, 9)) + expect_equal(res$peakidx[[2]], c(5, 10)) + expect_equal(res$peakidx[[3]], c(4, 9)) + expect_equal(res$peakidx[[4]], 5) + + ## Real data set: + orig_names <- rownames(chromPeaks(xod_xgrg)) + sub_names <- sample(orig_names, (length(orig_names) / 2)) + fts <- featureDefinitions(xod_xgrg) + res <- xcms:::.update_feature_definitions(fts, orig_names, sub_names) + expect_s4_class(res, "DataFrame") + expect_true(all(lengths(res$peakidx) > 0)) + tmp <- lapply(res$peakidx, function(z) sub_names[z]) + tmp <- unlist(tmp, use.names = FALSE) + + onames <- intersect(orig_names[unlist(fts$peakidx, use.names = FALSE)], + sub_names) + expect_true(all(onames %in% tmp)) + expect_true(all(tmp %in% sub_names)) +}) + +## test_that(".chrom_peak_id works", { + ## skip_on_os(os = "windows", arch = "i386") + +## res <- .chrom_peak_id(matrix(nrow = 0, ncol = 5)) +## expect_equal(res, character()) +## cpks <- rbind(c(3, 2, 4, 12, 13), +## c(4, 2, 4, 123, 43), +## c(3, 2, 4, 12, 13), +## c(5, 4, 6, 123, 45)) +## colnames(cpks) <- c("rt", "rtmin", "rtmax", "into", "maxo") +## expect_error(.chrom_peak_id(cpks)) +## res <- .chrom_peak_id(cpks[-3, ]) +## expect_equal(res, c("3-2-4-12-13", "4-2-4-123-43", "5-4-6-123-45")) +## cpks <- chromPeaks(xod_x) +## res <- .chrom_peak_id(cpks) +## }) + +test_that(".rbind_fill works", { + skip_on_os(os = "windows", arch = "i386") + + ## matrix + a <- matrix(1:9, nrow = 3, ncol = 3) + colnames(a) <- c("a", "b", "c") + b <- matrix(1:12, nrow = 3, ncol = 4) + colnames(b) <- c("b", "a", "d", "e") + res <- .rbind_fill(a, b) + expect_equal(colnames(res), c("a", "b", "c", "d", "e")) + expect_equal(class(res), class(a)) + expect_equal(res[, "a"], c(a[, "a"], b[, "a"])) + expect_equal(res[, "b"], c(a[, "b"], b[, "b"])) + expect_equal(res[, "d"], c(NA, NA, NA, b[, "d"])) + + res <- .rbind_fill(a, b[, c("b", "a")]) + expect_equal(colnames(res), c("a", "b", "c")) + expect_equal(res[, "a"], c(a[, "a"], b[, "a"])) + + ## DataFrame + a <- DataFrame(a = 1:4, b = FALSE, c = letters[1:4]) + b <- DataFrame(d = 1:4, b = TRUE) + res <- .rbind_fill(a, b) + expect_equal(colnames(res), c("a", "b", "c", "d")) + expect_equal(res$a, c(1:4, NA, NA, NA, NA)) + expect_equal(res$b, rep(c(FALSE, TRUE), each = 4)) +}) + +test_that(".reduce works", { + skip_on_os(os = "windows", arch = "i386") + + a <- c(1.23, 1.431, 2.43, 5.44, 6) + b <- c(1.33, 2.43, 5, 6, 7) + res <- .reduce(a, b) + expect_true(nrow(res) == 3) + expect_equal(res[, 1], c(1.23, 1.431, 5.44)) + expect_equal(res[, 2], c(1.33, 5, 7)) + + idx <- sample(1:length(a)) + res_2 <- .reduce(a[idx], b[idx]) + expect_identical(res, res_2) + + res <- .reduce(a[1], b[1]) + expect_equal(res, cbind(start = a[1], end = b[1])) + + res <- .reduce(numeric(), numeric()) + expect_equal(nrow(res), 0) + + res <- .reduce(a - 0.1, b + 0.1) + expect_equal(res[, 1], c(1.13, 5.34)) + expect_equal(res[, 2], c(5.1, 7.1)) + + a <- c(4, 4) + b <- c(5, 5) + res <- .reduce(a, b) + expect_true(nrow(res) == 1) + expect_equal(res[1, 1], c(start = 4)) + expect_equal(res[1, 2], c(end = 5)) + + a <- c(3, 4, 8) + b <- c(7, 5, 10) + res <- .reduce(a, b) + expect_equal(res[, 1], c(3, 8)) + expect_equal(res[, 2], c(7, 10)) + + a <- c(3, 4, 6) + b <- c(7, 5, 10) + res <- .reduce(a, b) + expect_equal(unname(res[, 1]), 3) + expect_equal(unname(res[, 2]), 10) +}) + +test_that("groupOverlaps works", { + skip_on_os(os = "windows", arch = "i386") + + x <- c(12.2, 13, 5) + y <- c(16, 15, 6) + res <- groupOverlaps(x, y) + expect_true(is.list(res)) + expect_equal(length(res), 2) + expect_equal(res, list(3, 1:2)) + + expect_error(groupOverlaps(x, 1:2), "lengths differ") +}) + +test_that(".require_spectra works", { + skip_on_os(os = "windows", arch = "i386") + + if (requireNamespace("Spectra", quietly = TRUE)) + expect_true(.require_spectra()) + else expect_error("installed.") +}) + +test_that(".i2index works", { + skip_on_os(os = "windows", arch = "i386") + + ids <- c("a", "b", "c", "d") + res <- .i2index(c("c", "d"), ids) + expect_equal(res, c(3L, 4L)) + expect_error(.i2index(12, ids), "out of bounds") +}) + +test_that(".chromatograms_for_peaks works", { + ## Purely MS1 data. + x <- filterFile(faahko_xod, 1L) + pd <- spectra(x, BPPARAM = SerialParam()) + pd <- lapply(pd, function(z) cbind(mz = z@mz, intensity = z@intensity)) + + ## out of range + pks <- cbind(mzmin = c(200, 301), mzmax = c(202, 303), + rtmin = c(10, 20), rtmax = c(40, 50)) + res <- .chromatograms_for_peaks(pd, rtime(x), msl = msLevel(x), + pks = pks, pks_msl = rep(1L, 2)) + expect_true(length(res) == 2) + expect_s4_class(res[[1L]], "Chromatogram") + expect_s4_class(res[[2L]], "Chromatogram") + expect_equal(rtime(res[[1L]]), numeric()) + expect_equal(rtime(res[[2L]]), numeric()) + expect_equal(intensity(res[[1L]]), numeric()) + expect_equal(intensity(res[[2L]]), numeric()) + + pks <- chromPeaks(x) + res <- .chromatograms_for_peaks(pd, rtime(x), msLevel(x), + pks = chromPeaks(x), + pks_msl = chromPeakData(x)$ms_level) + expect_true(length(res) == nrow(pks)) + expect_true(all(vapply(res, inherits, logical(1), "Chromatogram"))) + ## Expected results. + ref <- chromatogram(as(x, "OnDiskMSnExp"), rt = pks[, c("rtmin", "rtmax")], + mz = pks[, c("mzmin", "mzmax")]) + expect_equal(lapply(res, intensity), lapply(ref, intensity)) + expect_equal(lapply(res, rtime), lapply(ref, rtime)) + + ## MS1 and MS2 swath data. + x <- pest_swth + pd <- spectra(x, BPPARAM = SerialParam()) + pd <- lapply(pd, function(z) cbind(mz = z@mz, intensity = z@intensity)) + + pks <- chromPeaks(x) + res <- .chromatograms_for_peaks( + pd, rtime(x), msLevel(x), tmz = isolationWindowTargetMz(x), + pks = pks, pks_msl = chromPeakData(x)$ms_level, + pks_tmz = chromPeakData(x)$isolationWindowTargetMZ) + expect_true(length(res) == nrow(pks)) + expect_true(all(vapply(res, inherits, logical(1), "Chromatogram"))) + msl <- vapply(res, msLevel, integer(1)) + expect_equal(msl, chromPeakData(x)$ms_level) + ## old code. need to do separately for MS levels and isolation window. + ref <- chromatogram(x, rt = pks[msl == 1L, c("rtmin", "rtmax")], + msLevel = 1L, mz = pks[msl == 1L, c("mzmin", "mzmax")]) + expect_equal(lapply(ref, intensity), lapply(res[msl == 1L], intensity)) + expect_equal(lapply(ref, rtime), lapply(res[msl == 1L], rtime)) + + tmz <- chromPeakData(x)$isolationWindowTargetMZ + tmp <- filterIsolationWindow(x, mz = 163.75) + idx <- which(tmz == 163.75) + ref <- chromatogram(tmp, rt = pks[idx, c("rtmin", "rtmax")], + mz = pks[idx, c("mzmin", "mzmax")], msLevel = 2L) + expect_equal(lapply(ref, intensity), lapply(res[idx], intensity)) + expect_equal(lapply(ref, rtime), lapply(res[idx], rtime)) + + tmp <- filterIsolationWindow(x, mz = 208.95) + idx <- which(tmz == 208.95) + ref <- chromatogram(tmp, rt = pks[idx, c("rtmin", "rtmax")], + mz = pks[idx, c("mzmin", "mzmax")], msLevel = 2L) + expect_equal(lapply(ref, intensity), lapply(res[idx], intensity)) + expect_equal(lapply(ref, rtime), lapply(res[idx], rtime)) + + tmp <- filterIsolationWindow(x, mz = 299.1) + idx <- which(tmz == 299.1) + ref <- chromatogram(tmp, rt = pks[idx, c("rtmin", "rtmax")], + mz = pks[idx, c("mzmin", "mzmax")], msLevel = 2L) + expect_equal(lapply(ref, intensity), lapply(res[idx], intensity)) + expect_equal(lapply(ref, rtime), lapply(res[idx], rtime)) +}) diff --git a/tests/testthat/test_functions-xcmsSwath.R b/tests/testthat/test_functions-xcmsSwath.R new file mode 100644 index 000000000..7b7b87507 --- /dev/null +++ b/tests/testthat/test_functions-xcmsSwath.R @@ -0,0 +1,40 @@ +test_that(".which_mz_in_range works", { + skip_on_os(os = "windows", arch = "i386") + + mz <- 3.4 + lowerMz <- c(1, 3, 8, 12) + upperMz <- c(3, 7, 11, 15) + res <- .which_mz_in_range(mz, lowerMz, upperMz) + expect_equal(res, 2L) + res <- .which_mz_in_range(c(3, 3.4, 9), lowerMz, upperMz) + expect_equal(res, list(c(1L, 2L), 2L, 3L)) +}) + +test_that(".which_chrom_peak_overlap_rt works", { + skip_on_os(os = "windows", arch = "i386") + + pks <- cbind(rtmin = c(1, 2, 3, 4, 5), rtmax = c(2, 3, 4, 5, 6)) + res <- .which_chrom_peak_overlap_rt(c(rtmin = 1.1, rtmax = 2.2), pks) + expect_equal(res, c(1L, 2L)) + res <- .which_chrom_peak_overlap_rt(c(rtmin = 3.1, rtmax = 3.2), pks) + expect_equal(res, 3L) +}) + +test_that(".which_chrom_peak_diff_rt works", { + skip_on_os(os = "windows", arch = "i386") + + pks <- cbind(rt = c(1, 2, 3, 4, 5), rtmax = c(2, 3, 4, 5, 6)) + res <- .which_chrom_peak_diff_rt(c(rt = 1.1), pks, diffRt = 2) + expect_equal(res, c(1L, 2L, 3L)) + res <- .which_chrom_peak_diff_rt(c(rt = 3.1), pks, diffRt = 0.101) + expect_equal(res, 3L) +}) + +test_that(".reconstruct_dia_ms2 works", { + res <- .reconstruct_dia_ms2(pest_swth) + expect_true(is(res, "Spectra")) + expect_equal(length(res), nrow(chromPeaks(pest_swth, msLevel = 1L))) + expect_equal(res$peak_id, rownames(chromPeaks(pest_swth, msLevel = 1L))) + expect_equal( + res$precursorMz, unname(chromPeaks(pest_swth, msLevel = 1L)[, "mz"])) +}) diff --git a/tests/testthat/test_matchpeaks.R b/tests/testthat/test_matchpeaks.R index 4b42a62b9..a5ce01d20 100644 --- a/tests/testthat/test_matchpeaks.R +++ b/tests/testthat/test_matchpeaks.R @@ -1,12 +1,15 @@ test_that("matchpeaks doesn't fail", { + skip_on_os(os = "windows", arch = "i386") + faahko_file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - + faahko_xs <- xcmsSet(faahko_file, profparam = list(step = 0), - method = "centWave", noise = 10000, snthresh = 40) + method = "centWave", noise = 10000, snthresh = 40, + prefilter = c(6, 10000)) pks <- peaks(faahko_xs) - + calibs <- pks[c(3, 5, 7, 13, 17, 29), "mz"] - + res <- xcms:::matchpeaks(pks, calibs) res_2 <- xcms:::matchpeaks(pks[order(pks[, "mz"]), ], calibs) }) diff --git a/tests/testthat/test_methods-Chromatogram.R b/tests/testthat/test_methods-Chromatogram.R index 3d28702f5..2746b6efa 100644 --- a/tests/testthat/test_methods-Chromatogram.R +++ b/tests/testthat/test_methods-Chromatogram.R @@ -1,4 +1,6 @@ test_that("findChromPeaks,Chromatogram,MatchedFilterParam works", { + skip_on_os(os = "windows", arch = "i386") + od <- filterFile(faahko_od, file = 1) mzr <- c(272.1, 272.2) @@ -6,17 +8,18 @@ test_that("findChromPeaks,Chromatogram,MatchedFilterParam works", { mfp <- MatchedFilterParam() res <- findChromPeaks(chr, mfp) - expect_true(is.matrix(res)) - expect_true(nrow(res) == 2) + expect_true(is(res, "XChromatogram")) + expect_true(nrow(chromPeaks(res)) == 2) mfp <- MatchedFilterParam(fwhm = 60) res_2 <- findChromPeaks(chr, mfp) - expect_true(all(res_2[, "rtmin"] < res[, "rtmin"])) - expect_true(all(res_2[, "rtmax"] > res[, "rtmax"])) - expect_true(all(xcms:::.CPEAKS_CHROMPEAKS_REQ_NAMES[-(1:2)] %in% - colnames(res))) + expect_true(all(chromPeaks(res_2)[, "rtmin"] < chromPeaks(res)[, "rtmin"])) + expect_true(all(chromPeaks(res_2)[, "rtmax"] > chromPeaks(res)[, "rtmax"])) + expect_true(validObject(res)) }) test_that("findChromPeaks,Chromatogram,CentWaveParam works", { + skip_on_os(os = "windows", arch = "i386") + od <- filterFile(faahko_od, file = 1) mzr <- c(272.1, 272.2) @@ -24,16 +27,16 @@ test_that("findChromPeaks,Chromatogram,CentWaveParam works", { cwp <- CentWaveParam() res <- findChromPeaks(chr, cwp) - expect_true(is.matrix(res)) - expect_true(nrow(res) == 2) + expect_true(is(res, "XChromatogram")) + expect_true(nrow(chromPeaks(res)) == 2) cwp <- CentWaveParam(peakwidth = c(10, 60)) res_2 <- findChromPeaks(chr, cwp) - expect_true(nrow(res_2) > nrow(res)) + expect_true(nrow(chromPeaks(res_2)) > nrow(chromPeaks(res))) cwp <- CentWaveParam(snthresh = 5) res_2 <- findChromPeaks(chr, cwp) - expect_true(nrow(res_2) > nrow(res)) - expect_true(all(xcms:::.CPEAKS_CHROMPEAKS_REQ_NAMES[-(1:2)] %in% - colnames(res))) + expect_true(nrow(chromPeaks(res_2)) > nrow(chromPeaks(res))) + res <- chromPeaks(res) + res_2 <- chromPeaks(res_2) expect_equal(res[, "rtmin"], res_2[1:2, "rtmin"]) expect_equal(res[, "rtmax"], res_2[1:2, "rtmax"]) expect_equal(res[, "rt"], res_2[1:2, "rt"]) @@ -41,3 +44,14 @@ test_that("findChromPeaks,Chromatogram,CentWaveParam works", { expect_equal(res[, "into"], res_2[1:2, "into"]) }) +test_that("removeIntensity,Chromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chr <- Chromatogram(rtime = c(1, 2, 3, 4, 5, 6, 7), + intensity = c(NA_real_, 13, 16, 22, 34, 15, 6)) + res <- removeIntensity(chr) + expect_equal(chr, res) + res <- removeIntensity(chr, threshold = 20) + expect_equal(intensity(res), c(NA_real_, NA_real_, NA_real_, 22, 34, + NA_real_, NA_real_)) +}) diff --git a/tests/testthat/test_methods-MChromatograms.R b/tests/testthat/test_methods-MChromatograms.R new file mode 100644 index 000000000..977422c77 --- /dev/null +++ b/tests/testthat/test_methods-MChromatograms.R @@ -0,0 +1,232 @@ +test_that("findChromPeaks,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + chrs <- chromatogram(od_x, mz = mzr) + res <- findChromPeaks(chrs, param = CentWaveParam()) + expect_true(is(res, "XChromatograms")) + expect_equal(intensity(res[1, 2]), intensity(chrs[1, 2])) + expect_equal(intensity(res[2, 3]), intensity(chrs[2, 3])) + expect_true(length(res@.processHistory) == 1) + res_2 <- findChromPeaks(chrs, param = CentWaveParam(sn = 50)) + expect_true(nrow(chromPeaks(res)) > nrow(chromPeaks(res_2))) + + ## MatchedFilter + res_m <- findChromPeaks(chrs, param = MatchedFilterParam()) + expect_true(is(res_m, "XChromatograms")) + expect_true(nrow(chromPeaks(res_m)) < nrow(chromPeaks(res))) + + ## on a XChromatograms + res_3 <- findChromPeaks(res_2, param = CentWaveParam()) + expect_true(length(res_3@.processHistory) == 1) + expect_equal(chromPeaks(res), chromPeaks(res_3)) +}) + +test_that("correlate,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chrs <- MChromatograms(list(chr1, chr2, chr3)) + + res <- correlate(chrs) + expect_true(nrow(res) == 3) + expect_true(ncol(res) == 3) + expect_true(res[1, 3] > 0.9) + expect_true(res[1, 2] < 0.5) + + res_2 <- correlate(chrs, chrs) + expect_equal(res_2, res) + + res <- correlate(chrs) + expect_equal(res[2, 1], res[1, 2]) + expect_equal(res[3, 1], res[1, 3]) + +}) + +test_that("removeIntensity,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chrs <- MChromatograms(list(chr1, chr2, chr3)) + + res <- removeIntensity(chrs) + expect_equal(res, chrs) + + res <- removeIntensity(chrs, threshold = 20) + expect_equal(intensity(res[1, 1]), c(NA_real_, 29, 50, NA_real_, 100, + NA_real_, NA_real_, NA_real_, NA_real_, + NA_real_)) + expect_equal(intensity(res[3, 1]), c(53, 80, 130, NA_real_, NA_real_, + NA_real_, NA_real_)) + + chrs <- MChromatograms(list(chr1, chr2, chr2, chr3), ncol = 2) + res <- removeIntensity(chrs, threshold = 20) + expect_equal(intensity(res[2, 2]), c(53, 80, 130, NA_real_, NA_real_, + NA_real_, NA_real_)) +}) + +test_that("filterColumnsIntensityAbove,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chrs <- MChromatograms(list(chr1, chr2, chr3, chr1, chr2, chr3), ncol = 3) + + expect_error(filterColumnsIntensityAbove(chrs, threshold = c(1.1, 1.4)), + "should be") + expect_error(filterColumnsIntensityAbove(chrs, threshold = TRUE), + "should be") + + res <- filterColumnsIntensityAbove(chrs) + expect_equal(res, chrs) + + res <- filterColumnsIntensityAbove(chrs, threshold = 90) + expect_equal(res, chrs) + + res <- filterColumnsIntensityAbove(chrs, threshold = 90, which = "all") + expect_equal(res, chrs[, 2]) + + res <- filterColumnsIntensityAbove(chrs, threshold = 200, which = "any", + value = "tic") + expect_equal(res, chrs) + + res <- filterColumnsIntensityAbove(chrs, threshold = 200, which = "all", + value = "tic") + expect_equal(res, chrs[, 2]) +}) + +test_that("filterChromatogramsKeepTop,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chr4 <- Chromatogram(rtime = 1:10, + intensity = c(NA, NA, 4, NA, NA, 9, NA, 10, 9, 1)) + chr5 <- Chromatogram(rtime = 1:4, intensity = c(345, 5554, 323, 2000)) + chr6 <- Chromatogram(rtime = 1:3, intensity = c(400, 244, 133)) + + + chrs <- MChromatograms(list(chr1, chr2, chr3, chr4, chr5, chr6), ncol = 3) + + expect_error(filterColumnsKeepTop(chrs, n = c(1, 2)), "of length 1") + expect_error(filterColumnsKeepTop(chrs, n = "b"), "of length 1") + expect_error(filterColumnsKeepTop(chrs, n = 10), "number of columns") + + res <- filterColumnsKeepTop(chrs, n = 1) + expect_equal(res, chrs[, 3]) + + res <- filterColumnsKeepTop(chrs, n = 2) + expect_equal(res[, 1], chrs[, 1]) + expect_equal(res[, 2], chrs[, 3]) + + res <- filterColumnsKeepTop(chrs, n = 2, aggregationFun = max) + expect_equal(res[, 1], chrs[, 2]) + expect_equal(res[, 2], chrs[, 3]) + + res <- filterColumnsKeepTop(chrs, n = 0) + expect_true(ncol(res) == 0) + expect_true(nrow(res) == 2) + + res <- filterColumnsKeepTop(chrs, n = 1, sortBy = "tic") + expect_equal(res, chrs[, 3]) + + res <- filterColumnsKeepTop(chrs, n = 1, aggregationFun = mean) + expect_equal(res, chrs[, 3]) +}) + +test_that("normalize,MChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chr4 <- Chromatogram(rtime = 1:10, + intensity = c(NA, NA, 4, NA, NA, 9, NA, 10, 9, 1)) + chrs <- MChromatograms(list(chr1, chr2, chr3, chr4), ncol = 2) + res <- normalize(chrs) + + expect_true(ncol(res) == ncol(chrs)) + expect_true(nrow(res) == nrow(chrs)) + + expect_equal(intensity(res[1, 2]) * max(intensity(chrs[1, 2]), na.rm = TRUE), + intensity(chrs[1, 2])) +}) + +test_that(".plot_xchromatograms_overlay works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chrs <- MChromatograms(list(chr1, chr2), ncol = 1) + .plot_xchromatograms_overlay(chrs) + .plot_xchromatograms_overlay(chrs, xlim = c(-10, 20), + ylim = c(0, 150), fill = "red") + .plot_xchromatograms_overlay(chrs, xlim = c(-10, 20), ylim = c(0, 150), + yoffset = 10, fill = c("red", "blue")) + +}) + +test_that("plotChromatogramsOverlay,MChromatograms,XChromatograms work", { + skip_on_os(os = "windows", arch = "i386") + + data(xdata) + dirname(xdata) <- c(rep(system.file("cdf", "KO", package = "faahKO"), 4), + rep(system.file("cdf", "WT", package = "faahKO"), 4)) + fts <- c("FT097", "FT163", "FT165") + xdata <- filterFile(xdata, file = 1:2, keepFeatures = TRUE) + chrs <- featureChromatograms(xdata, features = fts) + + plotChromatogramsOverlay(chrs) + plotChromatogramsOverlay(chrs, transform = log10) + plotChromatogramsOverlay(chrs, peakType = "rectangle", peakBg = NA) + plotChromatogramsOverlay(chrs, peakType = "rectangle", peakBg = NA, + transform = log2) + plotChromatogramsOverlay( + chrs, peakType = "rectangle", peakBg = NA, yoffset = 100000, + fill = c("#ff000040", "#00ff0040", "#0000ff40")) + + res <- plotChromatogramsOverlay(chrs, stacked = 0.5, bty = "n") + expect_equal(length(res), ncol(chrs)) + res <- plotChromatogramsOverlay(chrs, stacked = 0.5, bty = "n", + transform = log2) + res <- plotChromatogramsOverlay(chrs, stacked = 0.1, bty = "n") + + plotChromatogramsOverlay(chrs[1, ]) + + chr <- chrs[, 1] + plotChromatogramsOverlay(chr, peakBg = c("red", "blue")) + plotChromatogramsOverlay(chr, peakBg = c("blue", "red")) + + chrs <- as(chrs, "MChromatograms") + plotChromatogramsOverlay(chrs) + plotChromatogramsOverlay(chrs, transform = log2) +}) diff --git a/tests/testthat/test_methods-OnDiskMSnExp.R b/tests/testthat/test_methods-OnDiskMSnExp.R index 5ce87ae02..59a22d727 100644 --- a/tests/testthat/test_methods-OnDiskMSnExp.R +++ b/tests/testthat/test_methods-OnDiskMSnExp.R @@ -1,16 +1,10 @@ test_that("profMat,OnDiskMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## Get it from all 3 files in one go. - res <- profMat(faahko_od, step = 2) - res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2) - expect_equal(res_2, res[[2]]) - res_2 <- profMat(xcmsRaw(faahko_3_files[3], profstep = 0), step = 2) - expect_equal(res_2, res[[3]]) - res_2 <- profMat(faahko_xod, step = 2) + res <- profMat(filterRt(faahko_od, c(2500, 3000)), step = 2) + res_2 <- profMat(filterRt(faahko_xod, c(2500, 3000)), step = 2) expect_equal(res, res_2) - res <- profMat(faahko_od, step = 2, method = "binlin", fileIndex = 2) - res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2, - method = "binlin") - expect_equal(res_2, res[[1]]) ## Simulating issue #312 od_1 <- filterFile(microtofq_od, 1) @@ -18,29 +12,19 @@ test_that("profMat,OnDiskMSnExp works", { res_clnd <- profMat(od_1_clnd) }) -test_that( - "findChromPeaks,OnDiskMSnExp,CentWaveParam works with multiple MS levels", { - msn_file <- system.file( - package = "msdata", - "proteomics/MS3TMT10_01022016_32917-33481.mzML.gz") - msn_file <- system.file( - package = "msdata", - "proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz") - msn_data <- readMSData(msn_file, mode = "onDisk") - msn_xdata <- findChromPeaks(pickPeaks(msn_data), param = CentWaveParam()) - expect_equal(msLevel(msn_data), msLevel(msn_xdata)) - }) - test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam variants", { + skip_on_os(os = "windows", arch = "i386") + ## Reproduce with msdata files: fl <- system.file("microtofq/MM14.mzML", package = "msdata") raw <- readMSData(fl, mode = "onDisk") options(originalCentWave = TRUE) - tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10), + prefilter = c(3, 500))) ## ## Use the getPeakInt2 which uses the rawMat function. - ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## pkI2 <- .getPeakInt2(tmp, chromPeaks(tmp)) ## ## Use the getPeakInt3 which uses the getEIC C function. - ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## pkI3 <- .getPeakInt3(tmp, chromPeaks(tmp)) ## ## These fail for the original centWave code. ## expect_true(sum(pkI2 != chromPeaks(tmp)[, "into"]) > length(pkI2) / 2) ## ## expect_equal(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) @@ -48,7 +32,8 @@ test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam variants", { ## expect_equal(pkI2, pkI3) ## Try with new implementation. options(originalCentWave = FALSE) - tmp2 <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + tmp2 <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10), + prefilter = c(3, 500))) ## Find different number of peaks: expect_true(nrow(chromPeaks(tmp2)) != nrow(chromPeaks(tmp))) ## Are the peaks similar? @@ -65,40 +50,41 @@ test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam variants", { rownames(cp2) <- NULL expect_equal(cp2[, cn], pks[, cn]) ## Are the values related? - plot(cp2[, "into"], pks[, "into"]) ## Very similar - plot(cp2[, "intb"], pks[, "intb"]) ## Very similar - plot(cp2[, "rtmin"], pks[, "rtmin"]) ## Very similar - plot(cp2[, "rtmax"], pks[, "rtmax"]) ## Very similar + ## plot(cp2[, "into"], pks[, "into"]) ## Very similar + ## plot(cp2[, "intb"], pks[, "intb"]) ## Very similar + ## plot(cp2[, "rtmin"], pks[, "rtmin"]) ## Very similar + ## plot(cp2[, "rtmax"], pks[, "rtmax"]) ## Very similar ## Use the getPeakInt3 which uses the getEIC C function. - ## pkI2_2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) - ## pkI3_2 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## pkI2_2 <- .getPeakInt2(tmp2, chromPeaks(tmp2)) + ## pkI3_2 <- .getPeakInt3(tmp2, chromPeaks(tmp2)) ## ## These fail for the original centWave code. ## expect_equal(unname(pkI2_2), unname(chromPeaks(tmp2)[, "into"])) ## expect_equal(unname(pkI3_2), unname(chromPeaks(tmp2)[, "into"])) ## expect_equal(pkI2_2, pkI3_2) - + ## The same for one of the test files; this works even with the original ## centWave code options(originalCentWave = TRUE) - tmp <- filterFile(xod_xgrg, file = 3) + tmp <- filterFile(xod_xgrg, file = 3, keepAdjustedRtime = FALSE) ## ## Use the getPeakInt2 which uses the rawMat function. - ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## pkI2 <- .getPeakInt2(tmp, chromPeaks(tmp)) ## ## Use the getPeakInt3 which uses the getEIC C function. - ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## pkI3 <- .getPeakInt3(tmp, chromPeaks(tmp)) ## expect_equal(pkI2, pkI3) ## expect_equal(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) ## expect_equal(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) ## New modified centWave. options(originalCentWave = FALSE) tmp2 <- findChromPeaks(filterFile(faahko_od, file = 3), - CentWaveParam(noise = 10000, snthresh = 40)) + CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000))) ## Even the identified peaks are identical! expect_equal(unname(chromPeaks(tmp)), unname(chromPeaks(tmp2))) ## Use the getPeakInt2 which uses the rawMat function. - ## pkI2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## pkI2 <- .getPeakInt2(tmp2, chromPeaks(tmp2)) ## ## Use the getPeakInt3 which uses the getEIC C function. - ## pkI3 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## pkI3 <- .getPeakInt3(tmp2, chromPeaks(tmp2)) ## expect_equal(pkI2, pkI3) ## expect_equal(unname(pkI2), unname(chromPeaks(tmp2)[, "into"])) ## expect_equal(unname(pkI3), unname(chromPeaks(tmp2)[, "into"])) @@ -106,41 +92,20 @@ test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam variants", { }) test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam works", { - fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - xr <- deepCopy(faahko_xr_1) + skip_on_os(os = "windows", arch = "i386") + onDisk <- filterFile(faahko_od, file = 1) ppm <- 40 snthresh <- 40 - res_x <- findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh, - noise = 100000)@.Data - ## Bypass xcmsRaw - xs <- xcmsSet(fs[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave", - noise = 100000) - expect_equal(xs@peaks[, colnames(res_x)], res_x) - ## OnDiskMSnExp - ## onDisk <- readMSData(fs[1], msLevel. = 1, mode = "onDisk") - cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh, noise = 100000) - res <- findChromPeaks(onDisk, param = cwp, return.type = "list") - expect_equal(res[[1]], peaks(xs)@.Data) + cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh, noise = 100000, + prefilter = c(3, 10000)) + res <- findChromPeaks(onDisk, param = cwp) + expect_true(hasChromPeaks(res)) + expect_equal(nrow(chromPeaks(res)), 6) expect_error(findChromPeaks(onDisk, param = cwp, msLevel = 2)) - ## returning an xcmsSet - res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") - pks <- peaks(res) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xs))], peaks(xs)) - expect_true(is(res, "xcmsSet")) - - ## Return type XCMSnExp - res <- findChromPeaks(onDisk, param = cwp) - expect_true(hasChromPeaks(res)) - expect_true(!hasAdjustedRtime(res)) - expect_true(!hasFeatures(res)) pks <- chromPeaks(res) - rownames(pks) <- NULL - expect_equal(peaks(xs)@.Data, pks[, -ncol(chromPeaks(res))]) ## check that rownames are set expect_true(!is.null(rownames(chromPeaks(res)))) @@ -149,71 +114,54 @@ test_that("findChromPeaks,OnDiskMSnExp,CentWaveParam works", { }) test_that("findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam works", { - fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - xr <- deepCopy(faahko_xr_1) - snth <- 20 - ns <- 2500 - snthIso <- 5 - res_x <- findPeaks.centWaveWithPredictedIsotopeROIs(xr, noise = ns, - snthresh = snth, - snthreshIsoROIs = snthIso)@.Data - ## Bypass xcmsRaw - xs <- xcmsSet(fs[1], profparam = list(profstep = 0), snthresh = snth, - method = "centWaveWithPredictedIsotopeROIs", noise = ns, - snthreshIsoROIs = snthIso) - expect_equal(xs@peaks[, colnames(res_x)], res_x) + skip_on_os(os = "windows", arch = "i386") + ## OnDiskMSnExp - onDisk <- readMSData(fs[1], msLevel. = 1, mode = "onDisk") - cwp <- CentWavePredIsoParam(snthresh = snth, noise = ns, - snthreshIsoROIs = snthIso) - res <- findChromPeaks(onDisk, param = cwp, return.type = "list") - expect_equal(res[[1]], peaks(xs)@.Data) + onDisk <- filterFile(faahko_od, file = 1) + cwp <- CentWavePredIsoParam(snthresh = 20, noise = 2500, + snthreshIsoROIs = 5, prefilter = c(5, 10000)) expect_error(findChromPeaks(onDisk, param = cwp, msLevel = 2)) - ## returning an xcmsSet - res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") - pks <- peaks(res) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xs))], peaks(xs)) - expect_true(is(res, "xcmsSet")) - ## Return an XCMSnExp res <- findChromPeaks(onDisk, param = cwp) expect_true(hasChromPeaks(res)) expect_true(!hasAdjustedRtime(res)) expect_true(!hasFeatures(res)) pks <- chromPeaks(res) - rownames(pks) <- NULL - expect_equal(peaks(xs)@.Data, pks[, colnames(peaks(xs)@.Data)]) }) test_that("findChromPeaks,OnDiskMSnExp,MassifquantParam works", { - mzf <- system.file("microtofq/MM14.mzML", package = "msdata") - mqp <- MassifquantParam(ppm = 20, criticalValue = 1.2) - res <- xcmsSet(mzf[1], method = "massifquant", ppm = 20, - criticalValue = 1.2) - ## onDisk - onDisk <- readMSData(mzf[1], mode = "onDisk") - res_o <- findChromPeaks(onDisk, param = mqp, return.type = "xcmsSet") - expect_equal(unname(peaks(res_o)[, colnames(peaks(res))]), - unname(peaks(res))) - expect_equal(unname(res_o@rt$raw[[1]]), unname(res@rt$raw[[1]])) + skip_on_os(os = "windows", arch = "i386") + + onDisk <- filterFile(microtofq_od, 1) + res_o <- findChromPeaks(onDisk, param = MassifquantParam(prefilter = c(5, 5000))) + expect_true(hasChromPeaks(res_o)) + expect_equal(nrow(chromPeaks(res_o)), 15) expect_error(findChromPeaks(onDisk, param = mqp, msLevel = 2)) }) test_that("findChromPeaks,OnDiskMSnExp,MatchedFilterParam works", { - fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") + skip_on_os(os = "windows", arch = "i386") + mfp <- MatchedFilterParam(binSize = 20, impute = "lin") - res <- xcmsSet(fs[1], method = "matchedFilter", profmethod = "binlin", - step = binSize(mfp)) - ## onDisk - ## onDisk <- readMSData(fs[1], mode = "onDisk") onDisk <- filterFile(faahko_od, file = 1) - res_o <- findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet") - expect_equal(unname(peaks(res_o)[, colnames(peaks(res))]), - unname(peaks(res))) - expect_equal(unname(res_o@rt$raw[[1]]), unname(res@rt$raw[[1]])) + res_o <- findChromPeaks(onDisk, param = mfp) + expect_true(hasChromPeaks(res_o)) + expect_equal(nrow(chromPeaks(res_o)), 54) expect_error(findChromPeaks(onDisk, param = mfp, msLevel = 2)) }) + +test_that("isolationWindowTargetMz,OnDiskMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + res <- isolationWindowTargetMz(xod_x) + expect_true(all(is.na(res))) + expect_true(length(res) == length(xod_x)) + + f <- proteomics(full.names = TRUE)[5] + tmt <- readMSData(f, mode = "onDisk") + res <- isolationWindowTargetMz(tmt) + expect_true(!all(is.na(res))) +}) diff --git a/tests/testthat/test_methods-XCMSnExp.R b/tests/testthat/test_methods-XCMSnExp.R index 8484c3c8f..30303c345 100644 --- a/tests/testthat/test_methods-XCMSnExp.R +++ b/tests/testthat/test_methods-XCMSnExp.R @@ -1,4 +1,6 @@ test_that("XCMSnExp, XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + rts <- rtime(faahko_od) rts_2 <- rtime(od_x) expect_equal(rts, rts_2) @@ -10,43 +12,37 @@ test_that("XCMSnExp, XCMSnExp works", { expect_equal(rts_4, rts_3[[2]]) rts_4 <- rtime(filterFile(faahko_od, file = 3)) expect_equal(rts_4, rts_3[[3]]) - ## Compare with the values we get from an xcmsSet: - rtx <- faahko_xs@rt$raw - expect_equal(unlist(rtx, use.names = FALSE), - unlist(rtime(faahko_xod, bySample = TRUE), use.names = FALSE)) }) test_that("mz,XCMSnExp works", { - mzs <- mz(faahko_od) + skip_on_os(os = "windows", arch = "i386") + + tmp_od <- filterRt(faahko_od, rt = c(3500, 3800)) + tmp_xod <- filterRt(xod_x, rt = c(3500, 3800)) + mzs <- mz(tmp_od) ## The check below has to work, since we're calling the mz,OnDiskMSnExp. - ## mzs_2 <- mz(od_x) - ## expect_equal(mzs, mzs_2) - mzs_2 <- mz(xod_x, bySample = TRUE) - tmp <- split(mzs, fromFile(faahko_od)) + mzs_2 <- mz(tmp_xod, bySample = TRUE) + tmp <- split(mzs, fromFile(tmp_od)) expect_equal(lapply(tmp, unlist, use.names = FALSE), mzs_2) - ## Check if mz are correctly ordered for bySample - mzs_3 <- mz(filterFile(faahko_od, file = 2)) - expect_equal(unlist(mzs_3, use.names = FALSE), mzs_2[[2]]) }) test_that("intensity,XCMSnExp works", { - ints <- intensity(faahko_od) + skip_on_os(os = "windows", arch = "i386") + + tmp_od <- filterRt(faahko_od, rt = c(3500, 3800)) + tmp_xod <- filterRt(xod_x, rt = c(3500, 3800)) + ints <- intensity(tmp_od) ## The check below has to work, since we're calling the intensity,OnDiskMSnExp. ## ints_2 <- intensity(od_x) ## expect_equal(ints, ints_2) - ints_2 <- intensity(xod_x, bySample = TRUE) - tmp <- split(ints, fromFile(faahko_od)) + ints_2 <- intensity(tmp_xod, bySample = TRUE) + tmp <- split(ints, fromFile(tmp_od)) expect_equal(lapply(tmp, unlist, use.names = FALSE), ints_2) - ## Check if mz are correctly ordered for bySample - ints_3 <- intensity(filterFile(faahko_od, file = 2)) - expect_equal(unlist(ints_3, use.names = FALSE), ints_2[[2]]) }) test_that("spectra,XCMSnExp works", { - xod <- as(faahko_od, "XCMSnExp") - res <- spectra(xod) - res_2 <- spectra(xod, bySample = TRUE) - expect_equal(split(res, fromFile(xod)), res_2) + skip_on_os(os = "windows", arch = "i386") + ## xod_x tmp <- filterRt(xod_x, rt = c(2700, 2900)) res <- spectra(tmp) @@ -67,7 +63,7 @@ test_that("spectra,XCMSnExp works", { expect_true(sum(unlist(lapply(res2, rtime)) == unlist(lapply(res, rtime))) < length(rtime(tmp)) / 4) res3 <- spectra(tmp2, adjusted = FALSE) - expect_equal(res, res3) + expect_equal(res, res3) ## adjusted rt tmp <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) expect_true(hasAdjustedRtime(tmp)) @@ -80,9 +76,11 @@ test_that("spectra,XCMSnExp works", { }) test_that("XCMSnExp accessors work", { + skip_on_os(os = "windows", arch = "i386") + ## Filling with data... xod <- as(faahko_od, "XCMSnExp") - ## peaks + ## peaks expect_true(!hasChromPeaks(xod)) chromPeaks(xod) <- chromPeaks(xod_x) expect_true(hasChromPeaks(xod)) @@ -95,6 +93,11 @@ test_that("XCMSnExp accessors work", { pks <- chromPeaks(xod) rownames(pks) <- NULL expect_equal(tmp, pks) + ## chromPeaks with isFilledColumn + expect_true(all(colnames(pks) != "is_filled")) + pks <- chromPeaks(xod_x, isFilledColumn = TRUE) + expect_true(any(colnames(pks) == "is_filled")) + expect_true(all(pks[, "is_filled"] == 0)) ## chromPeaks with rt all_pks <- chromPeaks(xod_x) pks <- chromPeaks(xod_x, rt = c(2000, 2600), type = "within") @@ -139,13 +142,18 @@ test_that("XCMSnExp accessors work", { expect_error(chromPeaks(xod) <- pks) ## featureDefinitions expect_true(!hasFeatures(xod)) - library(S4Vectors) - fd <- DataFrame(faahko_xsg@groups) - fd$peakidx <- faahko_xsg@groupidx + fd <- featureDefinitions(xod_xg) + fd$ms_level <- 2L featureDefinitions(xod) <- fd expect_true(hasChromPeaks(xod)) expect_true(hasFeatures(xod)) + expect_true(hasFeatures(xod, msLevel = 2L)) expect_equal(featureDefinitions(xod), fd) + expect_true(nrow(featureDefinitions(xod, msLevel = 2L)) > 0) + fd$ms_level <- 1L + featureDefinitions(xod) <- fd + expect_true(nrow(featureDefinitions(xod, msLevel = 2L)) == 0) + expect_false(hasFeatures(xod, msLevel = 2L)) ## featureDefinitions with mz and/or rt range: obj <- xod_xgrg feat_def <- featureDefinitions(obj) @@ -179,7 +187,7 @@ test_that("XCMSnExp accessors work", { feat_def[keep_rt, , drop = FALSE]) expect_equal(featureDefinitions(obj, rt = rtr, mz = mzr, type = "any"), feat_def[keep_rt & keep_mz, , drop = FALSE]) - + ## adjustedRtime expect_true(!hasAdjustedRtime(xod)) expect_true(hasAdjustedRtime(xod_r)) @@ -209,13 +217,18 @@ test_that("XCMSnExp accessors work", { }) test_that("findChromPeaks,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## Call findChromPeaks on an XCMSnExp - tmp <- findChromPeaks(xod_x, param = CentWaveParam(noise = 10000, - snthresh = 40)) - expect_equal(chromPeaks(tmp), chromPeaks(xod_x)) + tmp <- findChromPeaks(filterFile(xod_x, 1), + param = CentWaveParam(noise = 10000, + snthresh = 40, + prefilter = c(3, 10000))) + expect_equal(unname(chromPeaks(tmp)), unname(chromPeaks(filterFile(xod_x, 1)))) ## Check that it works also on adjusted retention times: tmp <- findChromPeaks(xod_r, param = CentWaveParam(noise = 10000, - snthresh = 40)) + snthresh = 40, + prefilter = c(3, 10000))) expect_true(hasAdjustedRtime(tmp)) expect_equal( length(processHistory(tmp, type = .PROCSTEP.RTIME.CORRECTION)),1) @@ -237,33 +250,22 @@ test_that("findChromPeaks,XCMSnExp works", { rownames(pks) <- NULL pks <- pks[pks[, "sample"] == 1, colnames(res_2)] expect_equal(res_2, pks) - ## Second try: - tmp <- findChromPeaks(xod_xgrg, param = CentWaveParam(noise = 10000, - snthresh = 40)) - expect_true(hasAdjustedRtime(tmp)) - expect_equal( - length(processHistory(tmp, type = .PROCSTEP.RTIME.CORRECTION)),1) - expect_true(sum(chromPeaks(tmp)[, "rt"] != chromPeaks(xod_x)[, "rt"]) > - ncol(chromPeaks(tmp))) - tmp_sub <- filterFile(xod_xgrg, file = 3, keepAdjustedRtime = TRUE) - expect_equal(unname(rtime(tmp_sub, adjusted = TRUE)), - unname(rtime(xod_xgrg, bySample = TRUE, adjusted = TRUE)[[3]])) - spctr <- spectra(tmp_sub) - mz_values <- lapply(spctr, mz) - int_values <- unlist(lapply(spctr, intensity)) - res_2 <- do_findChromPeaks_centWave(mz = unlist(mz_values), - int = int_values, - scantime = rtime(tmp_sub, - adjusted = TRUE), - valsPerSpect = lengths(mz_values), - noise = 10000, snthresh = 40) - pks <- chromPeaks(tmp) - rownames(pks) <- NULL - pks <- pks[pks[, "sample"] == 3, colnames(res_2)] - expect_equal(res_2, pks) + + ## Adding peak detection results + res <- findChromPeaks( + xod_x, param = CentWaveParam(noise = 8000, snthresh = 40, + prefilter = c(3, 8000), + verboseColumns = TRUE), add = TRUE) + expect_true(length(processHistory(res)) == + (length(processHistory(xod_x)) + 1)) + expect_true(nrow(chromPeaks(res)) > nrow(chromPeaks(xod_x))) + expect_true(ncol(chromPeaks(res)) > ncol(chromPeaks(xod_x))) + expect_true(length(unique(rownames(chromPeaks(res)))) == nrow(chromPeaks(res))) }) test_that("processHistory,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ph <- ProcessHistory(fileIndex. = 2, info. = "For file 2") ph_2 <- ProcessHistory(fileIndex. = 1:2, info. = "For files 1 to 2") xod <- as(od_x, "XCMSnExp") @@ -281,128 +283,81 @@ test_that("processHistory,XCMSnExp works", { expect_true(validObject(xod)) }) -test_that("XCMSnExp droppers work", { - ## How are the drop functions expected to work? - type_feat_det <- .PROCSTEP.PEAK.DETECTION - type_feat_algn <- .PROCSTEP.PEAK.GROUPING - type_rt_adj <- .PROCSTEP.RTIME.CORRECTION - ## Perform alignment. - ## xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) - expect_true(hasFeatures(xod_xg)) - expect_true(hasChromPeaks(xod_x)) - expect_true(hasChromPeaks(xod_xg)) - expect_true(!hasAdjustedRtime(xod_xg)) - expect_true(length(processHistory(xod_xg, type = type_feat_algn)) == 1) - ## Retention time adjustment. - ## xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 1)) - expect_true(hasChromPeaks(xod_xgr)) - expect_true(length(processHistory(xod_xgr, type = type_feat_det)) == 1) - expect_true(!hasFeatures(xod_xgr)) ## These should have been removed - expect_true(length(processHistory(xod_xgr, type = type_feat_algn)) == 1) - expect_true(hasAdjustedRtime(xod_xgr)) - expect_true(length(processHistory(xod_xgr, type = type_rt_adj)) == 1) - ## Most of the retention times are different - expect_true(sum(chromPeaks(xod_xgr)[, "rt"] != chromPeaks(xod_x)[, "rt"]) > - nrow(chromPeaks(xod_x)) / 2) - expect_true(sum(rtime(xod_xgr) == rtime(xod_xg)) < length(rtime(xod_xg) / 2)) - ## Alignment after retention time adjustment. - ## xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) - expect_true(hasChromPeaks(xod_xgrg)) - expect_equal(chromPeaks(xod_xgrg), chromPeaks(xod_xgr)) - expect_true(hasAdjustedRtime(xod_xgrg)) - expect_equal(rtime(xod_xgrg), rtime(xod_xgr)) - expect_equal(rtime(xod_xgrg, adjusted = FALSE), rtime(od_x)) - expect_true(length(processHistory(xod_xgr, type = type_feat_algn)) == 1) - expect_true(hasFeatures(xod_xgrg)) - expect_true(length(processHistory(xod_xgrg, type = type_feat_algn)) == 2) - +test_that("dropChromPeaks,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## 1) dropDetectedFeatures: delete all process history steps and all data. res <- dropChromPeaks(xod_x) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) - expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) - expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) - expect_equal(rtime(res), rtime(od_x)) + expect_true(.has_chrom_peak_data(xod_x@msFeatureData)) + expect_false(.has_chrom_peak_data(res@msFeatureData)) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) ## res <- dropChromPeaks(xod_xg) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) - expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) - expect_equal(rtime(res), rtime(od_x)) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) ## res <- dropChromPeaks(xod_xgr) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 0) expect_equal(rtime(res), rtime(od_x)) res <- dropChromPeaks(xod_xgr, keepAdjustedRtime = TRUE) - expect_true(hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 1) - expect_equal(rtime(res), rtime(xod_xgr)) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) + expect_true(hasAdjustedRtime(res)) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 1) + expect_equal(rtime(res), rtime(xod_xgr)) ## res <- dropChromPeaks(xod_xgrg) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.FEATURE.GROUPING)) == 0) expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 0) expect_equal(rtime(res), rtime(od_x)) res <- dropChromPeaks(xod_xgrg, keepAdjustedRtime = TRUE) - expect_true(hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 1) - expect_equal(rtime(res), rtime(xod_xgr)) expect_true(!hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) - + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) + expect_true(hasAdjustedRtime(res)) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 1) + expect_equal(rtime(res), rtime(xod_xgrg)) +}) + +test_that("dropFeatureDefinitions,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## 2) dropFeatureDefinitions: ## a) drop the feature groups and the latest related process history ## b) if retention time correction was performed AFTER the latest feature ## grouping, drop also the retention time correction and all related ## process histories. + res <- dropFeatureDefinitions(xod_x) + expect_equal(res, xod_x) res <- dropFeatureDefinitions(xod_xg) expect_equal(res, xod_x) - expect_true(hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 1) - expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) - expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) - expect_equal(rtime(res), rtime(od_x)) - ## No feature groups - so there is nothing that this function does here. - res <- dropFeatureDefinitions(xod_xgr) - expect_equal(res, xod_xgr) - expect_true(hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 1) - expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 1) - expect_true(hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 1) ## Remove the latest ones. res <- dropFeatureDefinitions(xod_xgrg) expect_equal(res, xod_xgr) - expect_true(hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 1) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 1) expect_true(hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 1) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 1) expect_equal(rtime(res, adjusted = FALSE), rtime(od_x)) expect_equal(rtime(res, adjusted = TRUE), rtime(xod_xgr)) +}) + +test_that("dropAdjustedRtime,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") ## 3) dropAdjustedRtime: ## a) drop the retention time adjustment and related process histories @@ -415,32 +370,32 @@ test_that("XCMSnExp droppers work", { ## This drops also the process history for alignment. res <- dropAdjustedRtime(xod_xgr) expect_true(hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 1) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 0) expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) expect_equal(chromPeaks(res), chromPeaks(xod_x)) - expect_equal(res, xod_x) - expect_equal(rtime(res), rtime(xod_x)) expect_equal(rtime(res), rtime(xod_xgr, adjusted = FALSE)) + ## expect_equal(res, xod_x) ## This drops also the feature alignment performed later. res <- dropAdjustedRtime(xod_xgrg) expect_true(hasChromPeaks(res)) - expect_true(length(processHistory(res, type = type_feat_det)) == 1) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.DETECTION)) == 1) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res, type = type_feat_algn)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.PEAK.GROUPING)) == 0) expect_true(!hasAdjustedRtime(res)) - expect_true(length(processHistory(res, type = type_rt_adj)) == 0) + expect_true(length(processHistory(res, type = .PROCSTEP.RTIME.CORRECTION)) == 0) expect_equal(chromPeaks(res), chromPeaks(xod_x)) - expect_equal(res, xod_x) + ## expect_equal(res, xod_x) expect_equal(rtime(res), rtime(xod_xgrg, adjusted = FALSE)) }) test_that("XCMSnExp inherited methods work", { + skip_on_os(os = "windows", arch = "i386") + ## [ tmp_1 <- faahko_od[1:10] - expect_warning(tmp_2 <- xod_x[1:10]) + tmp_2 <- xod_x[1:10] expect_true(length(processHistory(tmp_2)) == 0) expect_true(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") @@ -456,16 +411,17 @@ test_that("XCMSnExp inherited methods work", { tmp <- xod_r[idxs, keepAdjustedRtime = TRUE] expect_true(hasAdjustedRtime(tmp)) expect_equal(rtime(xod_r)[idxs], rtime(tmp)) + expect_true(length(processHistory(tmp, type = .PROCSTEP.RTIME.CORRECTION)) == 0) ## Same with object containing also peaks and features - expect_warning(tmp <- xod_xgrg[idxs]) + tmp <- xod_xgrg[idxs] expect_true(!hasAdjustedRtime(tmp)) expect_true(!hasChromPeaks(tmp)) expect_true(!hasFeatures(tmp)) - expect_warning(tmp <- xod_xgrg[idxs, keepAdjusted = TRUE]) + tmp <- xod_xgrg[idxs, keepAdjusted = TRUE] expect_true(hasAdjustedRtime(tmp)) expect_equal(rtime(xod_xgrg)[idxs], rtime(tmp)) - expect_true(length(processHistory(tmp)) == 1) - + expect_true(length(processHistory(tmp)) == 0) + ## [[ spct <- xod_x[[13]] expect_true(is(spct, "Spectrum1")) @@ -476,10 +432,10 @@ test_that("XCMSnExp inherited methods work", { spct <- xod_r[[13]] expect_equal(rtime(spct), unname(rtime(xod_r, adjusted = TRUE)[13])) expect_true(rtime(spct) != rtime(xod_r, adjusted = FALSE)[13]) - + ## bin - tmp_1 <- bin(faahko_od) - expect_warning(tmp_2 <- bin(xod_x)) + tmp_1 <- bin(faahko_od, binSize = 20) + expect_warning(tmp_2 <- bin(xod_x, binSize = 20)) expect_true(length(processHistory(tmp_2)) == 0) expect_true(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") @@ -508,11 +464,11 @@ test_that("XCMSnExp inherited methods work", { expect_equal(tmp_2, xod_x) expect_equal(length(filterMsLevel(xod_x, msLevel = 2)), 0) ## If we've got adjusted retention times, keep them. - expect_warning(tmp_1 <- filterMsLevel(xod_xgr, msLevel = 1)) + tmp_1 <- filterMsLevel(xod_xgr, msLevel = 1) expect_true(hasAdjustedRtime(tmp_1)) expect_equal(rtime(tmp_1), rtime(xod_xgr)) # adjusted rt present - expect_warning(tmp_1 <- filterMsLevel(xod_xgrg, msLevel = 1, - keepAdjustedRtime = FALSE)) + tmp_1 <- filterMsLevel(xod_xgrg, msLevel = 1, + keepAdjustedRtime = FALSE) expect_true(!hasAdjustedRtime(tmp_1)) expect_equal(rtime(tmp_1), rtime(xod_xgr, adjusted = FALSE)) ## normalize @@ -549,17 +505,20 @@ test_that("XCMSnExp inherited methods work", { expect_equal(tmp_1, as(tmp_2, "OnDiskMSnExp")) }) -test_that("filterFile,XCMSnExp works", { +test_that("filterFile,XCMSnExp and .filter_file_XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## filterFile tmp <- filterFile(xod_x, file = 2) expect_error(tmp@msFeatureData$bla <- 3) expect_true(!hasAdjustedRtime(tmp)) expect_true(!hasFeatures(tmp)) expect_true(all(chromPeaks(tmp)[, "sample"] == 1)) - expect_equal(chromPeaks(tmp)[, -(ncol(chromPeaks(tmp)) - 1)], + expect_equal(chromPeaks(tmp)[, colnames(chromPeaks(tmp)) != "sample"], chromPeaks(xod_x)[chromPeaks(xod_x)[, "sample"] == 2, - -(ncol(chromPeaks(xod_x)) - 1)]) - expect_equal(fileIndex(processHistory(tmp)[[1]]), 1) + colnames(chromPeaks(xod_x)) != "sample"]) + expect_true(length(processHistory(tmp)) == 1) + ## expect_equal(fileIndex(processHistory(tmp)[[1]]), 1) ## check with other index. tmp <- filterFile(xod_x, file = c(1, 3)) expect_true(length(tmp[[1]]) == 1) @@ -568,9 +527,10 @@ test_that("filterFile,XCMSnExp works", { expect_true(all(chromPeaks(tmp)[, "sample"] %in% c(1, 2))) a <- chromPeaks(tmp) b <- chromPeaks(xod_x) - expect_equal(a[, -(ncol(a) - 1)], - b[b[, "sample"] %in% c(1, 3), -(ncol(b) - 1)]) - expect_equal(fileIndex(processHistory(tmp)[[1]]), c(1, 2)) + expect_equal(a[, colnames(a) != "sample"], + b[b[, "sample"] %in% c(1, 3), colnames(b) != "sample"]) + expect_true(length(processHistory(tmp)) == 1) + ## expect_equal(fileIndex(processHistory(tmp)[[1]]), c(1, 2)) ## Errors expect_error(filterFile(xod_x, file = 5)) @@ -579,23 +539,19 @@ test_that("filterFile,XCMSnExp works", { ## Little mockup to check correctness of Process history. od_2 <- xod_x od_2 <- addProcessHistory( - od_2, - ProcessHistory( - type = .PROCSTEP.RTIME.CORRECTION)) + od_2, ProcessHistory(type = .PROCSTEP.RTIME.CORRECTION)) od_2 <- addProcessHistory( - od_2, - ProcessHistory(type = .PROCSTEP.UNKNOWN, - fileIndex = 2, - info. = "I should be here")) + od_2, ProcessHistory(type = .PROCSTEP.UNKNOWN, + fileIndex = 2, + info. = "I should be here")) od_2 <- addProcessHistory( - od_2, - ProcessHistory(type = .PROCSTEP.UNKNOWN, - fileIndex = 1, info. = "EEEEEE")) + od_2, ProcessHistory(type = .PROCSTEP.UNKNOWN, + fileIndex = 1, info. = "EEEEEE")) tmp <- filterFile(od_2, file = 2) ph <- processHistory(tmp) - expect_true(length(ph) == 2) - expect_equal(processType(ph[[2]]), .PROCSTEP.UNKNOWN) + expect_true(length(ph) == 4) + expect_equal(processType(ph[[3]]), .PROCSTEP.UNKNOWN) b <- unlist(lapply(ph, function(z) { processInfo(z) == "I should be here" })) @@ -603,34 +559,44 @@ test_that("filterFile,XCMSnExp works", { b <- unlist(lapply(ph, function(z) { processInfo(z) == "EEEEEE" })) - expect_true(!any(b)) + ## expect_true(!any(b)) ## Do filterFile on xod_xg res <- filterFile(xod_xg, file = 2) expect_true(hasChromPeaks(res)) expect_true(!hasAdjustedRtime(res)) expect_true(!hasFeatures(res)) tmp <- chromPeaks(xod_xg) - expect_equal(chromPeaks(res)[, -(ncol(tmp) - 1)], - tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + expect_equal(chromPeaks(res)[, colnames(chromPeaks(res)) != "sample"], + tmp[tmp[, "sample"] == 2, colnames(tmp) != "sample"]) expect_equal(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) + ## with keepFeatures = TRUE + res <- .filter_file_XCMSnExp(xod_xg, file = 2, keepFeatures = TRUE) + expect_true(hasChromPeaks(res)) + expect_true(hasFeatures(res)) + fvals <- featureValues(xod_xg)[, 2, drop = FALSE] + expect_equal(featureValues(res), fvals[!is.na(fvals[,1]), , drop = FALSE]) + res <- filterFile(xod_xg, file = c(1, 3), keepFeatures = TRUE) + expect_true(hasFeatures(res)) + fvals <- featureValues(xod_xg)[, c(1, 3)] + expect_equal(featureValues(res), fvals) ## Do filterFile on xod_xgr ## Should remove adjusted rts and revert the original peak rts. - res <- filterFile(xod_xgr, file = 2) + res <- .filter_file_XCMSnExp(xod_xgr, keepAdjustedRtime = FALSE, file = 2) expect_true(hasChromPeaks(res)) tmp <- chromPeaks(xod_xg) - expect_equal(chromPeaks(res)[, -(ncol(tmp) - 1)], - tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + expect_equal(chromPeaks(res)[, colnames(chromPeaks(res)) != "sample"], + tmp[tmp[, "sample"] == 2, colnames(tmp) != "sample"]) expect_equal(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) expect_true(!hasAdjustedRtime(res)) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res)) == 1) + expect_true(length(processHistory(res)) == 3) expect_equal(processType(processHistory(res)[[1]]), "Peak detection") ## The same but keep the adjusted retention times. - res <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) + res <- .filter_file_XCMSnExp(xod_xgr, file = 2, keepAdjustedRtime = TRUE) expect_true(hasChromPeaks(res)) tmp <- chromPeaks(xod_xgr) - expect_equal(chromPeaks(res)[, -(ncol(tmp) - 1)], - tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + expect_equal(chromPeaks(res)[, colnames(chromPeaks(res)) != "sample"], + tmp[tmp[, "sample"] == 2, colnames(tmp) != "sample"]) ## has to be different from the ones in xod_x tmp <- chromPeaks(xod_x) expect_true(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] == 2, "rt"]) < @@ -643,24 +609,30 @@ test_that("filterFile,XCMSnExp works", { expect_equal(processType(processHistory(res)[[1]]), "Peak detection") expect_equal(processType(processHistory(res)[[2]]), "Peak grouping") expect_equal(processType(processHistory(res)[[3]]), "Retention time correction") + res <- filterFile(xod_xgr, file = 2:3) + expect_true(hasAdjustedRtime(res)) + expect_equal(adjustedRtime(xod_xgr, bySample = TRUE)[[2]], + adjustedRtime(res, bySample = TRUE)[[1]]) + expect_equal(adjustedRtime(xod_xgr, bySample = TRUE)[[3]], + adjustedRtime(res, bySample = TRUE)[[2]]) ## Do filterFile on xod_xgrg - res <- filterFile(xod_xgrg, file = c(1, 3)) + res <- filterFile(xod_xgrg, keepAdjustedRtime = FALSE, file = c(1, 3)) expect_true(hasChromPeaks(res)) tmp <- chromPeaks(xod_x) - expect_equal(chromPeaks(res)[, -(ncol(tmp) - 1)], - tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) + expect_equal(chromPeaks(res)[, colnames(chromPeaks(res)) != "sample"], + tmp[tmp[, "sample"] %in% c(1, 3), colnames(tmp) != "sample"]) expect_equal(unname(rtime(res, bySample = TRUE)), unname(rtime(xod_xg, bySample = TRUE)[c(1, 3)])) expect_true(!hasAdjustedRtime(res)) expect_true(!hasFeatures(res)) - expect_true(length(processHistory(res)) == 1) + expect_true(length(processHistory(res)) == 3) expect_equal(processType(processHistory(res)[[1]]), "Peak detection") ## keep adjusted rtime res <- filterFile(xod_xgrg, file = c(1, 3), keepAdjustedRtime = TRUE) expect_true(hasChromPeaks(res)) tmp <- chromPeaks(xod_xgr) - expect_equal(chromPeaks(res)[, -(ncol(tmp) - 1)], - tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) + expect_equal(chromPeaks(res)[, colnames(chromPeaks(res)) != "sample"], + tmp[tmp[, "sample"] %in% c(1, 3), colnames(tmp) != "sample"]) ## has to be different from the ones in xod_x tmp <- chromPeaks(xod_x) expect_true(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] %in% c(1, 3), "rt"]) < @@ -675,9 +647,15 @@ test_that("filterFile,XCMSnExp works", { expect_equal(processType(processHistory(res)[[1]]), "Peak detection") expect_equal(processType(processHistory(res)[[2]]), "Peak grouping") expect_equal(processType(processHistory(res)[[3]]), "Retention time correction") + ## keep also features + res <- filterFile(xod_xgrg, file = c(1, 3), keepFeatures = TRUE) + expect_true(hasFeatures(res)) + expect_equal(featureValues(res), featureValues(xod_xgrg)[, c(1, 3)]) }) test_that("filterMz,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## subset on xod_x res <- filterMz(xod_x, mz = c(300, 400)) expect_true(length(res[[1]]) == 1) @@ -749,24 +727,26 @@ test_that("filterMz,XCMSnExp works", { expect_true(all(featureDefinitions(res)[, "mzmax"] >= 300 & featureDefinitions(res)[, "mzmax"] <= 400)) ## With groups - no groups within this range - mzr <- c(120, 130) + mzr <- c(595, 600) res <- filterMz(xod_xg, mz = mzr) expect_true(!hasFeatures(res)) expect_true(hasChromPeaks(res)) - expect_true(all(chromPeaks(res)[, "mz"] >= 120 & chromPeaks(res)[, "mz"] <= 130)) + expect_true(all(chromPeaks(res)[, "mz"] >= 595 & chromPeaks(res)[, "mz"] <= 600)) res <- filterMz(xod_xgrg, mz = mzr) expect_true(!hasFeatures(res)) expect_true(hasChromPeaks(res)) - expect_true(all(chromPeaks(res)[, "mz"] >= 120 & chromPeaks(res)[, "mz"] <= 130)) + expect_true(all(chromPeaks(res)[, "mz"] >= 595 & chromPeaks(res)[, "mz"] <= 600)) }) test_that("filterRt,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## xod_x res <- filterRt(xod_x, rt = c(2700, 2900)) ## Check if the object is OK: expect_equal(pData(res), pData(xod_x)) spct <- spectra(res) - expect_true(length(spct) > 0) + expect_true(length(spct) > 0) ## MsFeatureData has to be locked! expect_error(res@msFeatureData$bla <- 3) ## Retention time has to be within the range. @@ -922,6 +902,8 @@ test_that("filterRt,XCMSnExp works", { ## Test the coercion method. test_that("as,XCMSnExp,xcmsSet works", { + skip_on_os(os = "windows", arch = "i386") + od_x <- faahko_xod res <- .XCMSnExp2xcmsSet(od_x) res <- as(od_x, "xcmsSet") @@ -950,11 +932,12 @@ test_that("as,XCMSnExp,xcmsSet works", { span = 0.4)) ## With groups. res <- as(od_2, "xcmsSet") - ftDef <- featureDefinitions(od_2)[, -ncol(featureDefinitions(od_2))] + ftDef <- featureDefinitions(od_2)[, !names(featureDefinitions(od_2))%in%"peakidx"] ftDef <- S4Vectors::as.matrix(ftDef) rownames(ftDef) <- NULL expect_equal(res@groups, ftDef) expect_equal(res@groupidx, unname(featureDefinitions(od_2)$peakidx)) + expect_equivalent(groupval(res), featureValues(od_2, value = "index")) ## With adjusted retention time. res_2 <- retcor.peakgroups(res, missing = 0, span = 0.4) @@ -963,7 +946,7 @@ test_that("as,XCMSnExp,xcmsSet works", { expect_equal(res@rt$corrected, res_2@rt$corrected) expect_equal(chromPeaks(od_3), peaks(res)) expect_equal(peaks(res_2), peaks(res)) - + ## Test with different binning methods: ## o binlin mfp <- MatchedFilterParam(impute = "lin", binSize = 3) @@ -978,9 +961,15 @@ test_that("as,XCMSnExp,xcmsSet works", { expect_warning(res <- as(od_2, "xcmsSet")) expect_equal(profStep(res), 2) expect_equal(profMethod(res), "binlinbase") + + # Tests for issue https://github.com/sneumann/xcms/issues/464 + res <- as(xod_xgrg, "xcmsSet") + expect_type(groups(res), "double") }) test_that("chromatogram,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## Have: od_x: OnDiskMSNnExp ## xod_x: XCMSnExp, with detected chromPeaks. ## xod_xg: with feature groups. @@ -993,6 +982,10 @@ test_that("chromatogram,XCMSnExp works", { rtr <- c(2600, 2700) tmp_obj <- filterFile(xod_x, file = c(1, 2)) res <- chromatogram(tmp_obj, aggregationFun = "max", rt = rtr) + expect_true(is(res, "XChromatograms")) + chromPeaks(res) + cpks <- chromPeaks(tmp_obj, rt = rtr, type = "apex_within") + expect_equal(cpks, chromPeaks(res)[, 1:ncol(cpks)]) expect_true(all(rtime(res[1, 1]) >= rtr[1])) expect_true(all(rtime(res[1, 1]) <= rtr[2])) expect_true(all(rtime(res[1, 2]) >= rtr[1])) @@ -1006,26 +999,140 @@ test_that("chromatogram,XCMSnExp works", { ## Assure we get the same with an OnDiskMSnExp and grouped XCMSnExp res_2 <- chromatogram(filterFile(od_x, file = c(1, 2)), aggregationFun = "max", rt = rtr) - expect_equal(res, res_2) + expect_equal(pData(res), pData(res_2)) + expect_equal(lapply(res, rtime), lapply(res_2, rtime)) + expect_equal(lapply(res, intensity), lapply(res_2, intensity)) res_3 <- chromatogram(filterFile(xod_xg, file = c(1, 2)), aggregationFun = "max", rt = rtr) expect_equal(res, res_3) - + ## XCMSnExp: with mzrange and rtrange: mzr <- c(120, 130) tmp <- filterMz(xod_xg, mz = mzr) expect_warning(fts <- featureDefinitions(tmp)) - expect_true(is.null(fts)) + expect_true(nrow(fts) == 0) tmp <- filterRt(xod_xg, rt = rtr) featureDefinitions(tmp) + ## no features in mz range res_2 <- chromatogram(xod_xg, rt = rtr, mz = mzr) + expect_true(nrow(chromPeaks(res_2)) == 0) + expect_true(nrow(featureDefinitions(res_2)) == 0) ## - ## XCMSnExp with adjusted rtime - ## SEE runit.Chromatogram.R + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) + xchrs <- chromatogram(xod_chr, mz = mzr, rt = rtr) + expect_equal(nrow(xchrs), 2) + expect_equal(ncol(xchrs), 3) + pks <- chromPeaks(xchrs) + expect_true(all(pks[pks[, "column"] == 1, "sample"] == 1)) + expect_true(all(pks[pks[, "column"] == 2, "sample"] == 2)) + expect_true(all(pks[pks[, "column"] == 3, "sample"] == 3)) + expect_true(all(pks[pks[, "row"] == 1, "rt"] >= rtr[1, 1])) + expect_true(all(pks[pks[, "row"] == 1, "rt"] <= rtr[1, 2])) + expect_true(all(pks[pks[, "row"] == 2, "rt"] >= rtr[2, 1])) + expect_true(all(pks[pks[, "row"] == 2, "rt"] <= rtr[2, 2])) + expect_true(all(pks[pks[, "row"] == 1, "mz"] >= mzr[1, 1])) + expect_true(all(pks[pks[, "row"] == 1, "mz"] <= mzr[1, 2])) + expect_true(all(pks[pks[, "row"] == 2, "mz"] >= mzr[2, 1])) + expect_true(all(pks[pks[, "row"] == 2, "mz"] <= mzr[2, 2])) + + ## Full rt range + xchrs <- chromatogram(xod_chr, mz = mzr) + pks <- chromPeaks(xchrs) + expect_true(all(pks[pks[, "row"] == 1, "mz"] >= mzr[1, 1])) + expect_true(all(pks[pks[, "row"] == 1, "mz"] <= mzr[1, 2])) + expect_true(all(pks[pks[, "row"] == 2, "mz"] >= mzr[2, 1])) + expect_true(all(pks[pks[, "row"] == 2, "mz"] <= mzr[2, 2])) + + expect_equal(chromPeaks(xchrs[1, 1]), + chromPeaks(filterFile(xod_chr, 1), mz = mzr[1, ], + type = "apex_within")) + a <- chromPeaks(xchrs[1, 2]) + b <- chromPeaks(filterFile(xod_chr, 2), mz = mzr[1, ], type = "apex_within") + expect_equal(a[, -11], b[, -11]) + a <- chromPeaks(xchrs[1, 3]) + b <- chromPeaks(filterFile(xod_chr, 3), mz = mzr[1, ], type = "apex_within") + expect_equal(a[, -11], b[, -11]) + a <- chromPeaks(xchrs[2, 1]) + b <- chromPeaks(filterFile(xod_chr, 1), mz = mzr[2, ], type = "apex_within") + expect_equal(a[, -11], b[, -11]) + a <- chromPeaks(xchrs[2, 2]) + b <- chromPeaks(filterFile(xod_chr, 2), mz = mzr[2, ], type = "apex_within") + expect_equal(a[, -11], b[, -11]) + a <- chromPeaks(xchrs[2, 3]) + b <- chromPeaks(filterFile(xod_chr, 3), mz = mzr[2, ], type = "apex_within") + expect_equal(a[, -11], b[, -11]) + + rtr[1, 1] <- 2785 + xchrs <- chromatogram(xod_chr, mz = mzr, rt = rtr) + expect_true(nrow(chromPeaks(xchrs[1, 1])) == 0) + + pks <- chromPeaks(xchrs) + expect_true(!any(pks[, "row"] == 1 & pks[, "column"] == 1)) + + ## With filled-in data + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) + ## group + xod_tmp <- groupChromPeaks( + xod_xgr, param = PeakDensityParam(sampleGroups = rep(1, 3), + minFraction = 0.25)) + xod_tmpf <- fillChromPeaks( + xod_tmp, param = FillChromPeaksParam(fixedRt = 30)) + xchrs <- chromatogram(xod_tmp, mz = mzr[2:1, ], rt = rtr[2:1, ], filled = TRUE) + xchrsf <- chromatogram(xod_tmpf, mz = mzr[2:1, ], rt = rtr[2:1, ], filled = TRUE) + expect_equal(nrow(chromPeaks(xchrs)), 4) + expect_equal(unname(chromPeaks(xchrs)[, "sample"]), c(1, 2, 3, 2)) + expect_equal(unname(chromPeaks(xchrs)[, "row"]), c(1, 1, 1, 2)) + ## With filled-in peaks. + expect_equal(nrow(chromPeaks(xchrsf)), 6) + expect_equal(unname(chromPeaks(xchrsf)[, "sample"]), c(1, 2, 3, 1, 2, 3)) + expect_equal(chromPeakData(xchrsf)$is_filled, c(FALSE, FALSE, FALSE, TRUE, + FALSE, TRUE)) + expect_true(chromPeakData(xchrsf[2, 1])$is_filled) + expect_false(chromPeakData(xchrsf[2, 2])$is_filled, 0) + expect_true(chromPeakData(xchrsf[2, 3])$is_filled) + expect_false(chromPeakData(xchrsf[1, 2])$is_filled) + ## Check feature definitions. + fts <- featureDefinitions(xchrs) + ftsf <- featureDefinitions(xchrsf) + expect_equal(fts$peakidx, list(c(1, 2, 3), 4)) + expect_equal(ftsf$peakidx, list(c(1, 2, 3), c(4, 5, 6))) + xchrsf2 <- chromatogram(xod_tmpf, mz = mzr[2:1, ], rt = rtr[2:1, ]) + expect_equal(chromPeaks(xchrs), chromPeaks(xchrsf2)) + expect_equal(featureDefinitions(xchrs), featureDefinitions(xchrsf2)) + + ## Test with single range. + xchrs <- chromatogram(xod_tmp, mz = mzr[1, ], rt = rtr[1, ], filled = TRUE) + xchrsf <- chromatogram(xod_tmpf, mz = mzr[1, ], rt = rtr[1, ], filled = TRUE) + expect_equal(nrow(chromPeaks(xchrs)), 1) + expect_equal(nrow(chromPeaks(xchrsf)), 3) + expect_equal(chromPeakData(xchrsf)$is_filled, c(TRUE, FALSE, TRUE)) + expect_equal(unname(featureDefinitions(xchrsf)$peakidx[[1]]), c(1, 2, 3)) + xchrsf2 <- chromatogram(xod_tmpf, mz = mzr[1, ], rt = rtr[1, ]) + expect_equal(chromPeaks(xchrsf2), chromPeaks(xchrs)) + expect_equal(featureDefinitions(xchrsf2), featureDefinitions(xchrs)) + + ## + mzm <- rbind(305.1 + c(-0.01, 0.01), 496.2 + c(-0.01, 0.01)) + xchr <- chromatogram(xod_xgrg, mz = mzm) + expect_equal(featureDefinitions(xchr)$row, c(1L, 1L, 1L, 2L, 2L)) + + ## duplicated ranges. + mzm <- rbind(mzm, mzm[1, ]) + xchr_2 <- chromatogram(xod_xgrg, mz = mzm) + expect_equal(featureDefinitions(xchr_2)$row, c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L)) + cpks <- chromPeaks(xchr_2) + expect_equal(cpks[cpks[, "row"] == 1, "into"], cpks[cpks[, "row"] == 3, "into"]) + fts <- featureDefinitions(xchr_2) + expect_true(all(fts$peakidx[[1]] != fts$peakidx[[6]])) + expect_equal(cpks[fts$peakidx[[1]], "into"], cpks[fts$peakidx[[6]], "into"]) }) test_that("signal integration is correct", { + skip_on_os(os = "windows", arch = "i386") + ## Testing the signal integration of peaks. ## For centWave tmp <- xod_xgrg @@ -1036,7 +1143,7 @@ test_that("signal integration is correct", { ((rtr[2] - rtr[1]) / (length(chr[1, 1]) - 1))) expect_equal(pkInt, unname(chromPeaks(tmp)[1, "into"])) - tmp <- filterFile(xod_xgrg, file = 2) + tmp <- filterFile(xod_xgrg, file = 2, keepAdjustedRtime = FALSE) idxs <- sample(1:nrow(chromPeaks(tmp)), 5) ## Now, for i = 20, for 6 rt I got an NA. Should I remove these measurements? ## idxs <- 1:nrow(chromPeaks(tmp)) @@ -1051,7 +1158,7 @@ test_that("signal integration is correct", { } ## pkI2 <- .getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) ## expect_equal(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) - + ## Now for matchedfilter. tmp <- findChromPeaks(filterFile(od_x, 2), param = MatchedFilterParam()) rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] @@ -1085,17 +1192,25 @@ test_that("signal integration is correct", { }) test_that("featureValues,XCMSnExp works", { - fdp <- PeakDensityParam(sampleGroups = faahko_xs$class) - od_x <- groupChromPeaks(xod_x, param = fdp) - xs <- group(faahko_xs, method = "density") + skip_on_os(os = "windows", arch = "i386") + + fdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + od_x <- groupChromPeaks(faahko_xod, param = fdp) fvs <- featureValues(od_x, value = "into") expect_equal(rownames(fvs), rownames(featureDefinitions(od_x))) - rownames(fvs) <- NULL - colnames(fvs) <- NULL - gvs <- groupval(xs, value = "into") - rownames(gvs) <- NULL - colnames(gvs) <- NULL - expect_equal(fvs, gvs) + + ## Use the internal function + res <- .feature_values(chromPeaks(od_x), featureDefinitions(od_x), + value = "into", method = "medret", + intensity = "into", + colnames = basename(fileNames(od_x))) + expect_equal(featureValues(od_x, value = "into"), res) + res <- .feature_values(chromPeaks(od_x), featureDefinitions(od_x), + value = "into", method = "sum", + intensity = "into", + colnames = basename(fileNames(od_x))) + expect_equal(featureValues(od_x, value = "into", method = "sum"), res) + fsum <- featureSummary(xod_xg) fv <- featureValues(xod_xg, method = "maxint", value = "into") @@ -1137,15 +1252,52 @@ test_that("featureValues,XCMSnExp works", { ## Check errors expect_error(featureValues(od_x, value = "into", missing = "b")) expect_error(featureValues(od_x, value = "into", missing = TRUE)) + + ## feature values with MS level > 1 + expect_error(featureValues(xod_xg, msLevel = 2), "No feature definitions") + ## Fake feature definitions for MS level 2 + cwp <- CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000)) + tmp <- xod_xg + fd <- new("MsFeatureData") + fd@.xData <- .copy_env(tmp@msFeatureData) + chromPeakData(fd)$ms_level <- 2L + fd$featureDefinitions$ms_level <- 2L + lockEnvironment(fd, bindings = TRUE) + tmp@msFeatureData <- fd + expect_true(hasChromPeaks(tmp, msLevel = 2L)) + expect_true(hasFeatures(tmp, msLevel = 2L)) + expect_equal(featureValues(tmp, msLevel = 2L), featureValues(xod_xg)) + + tmp <- findChromPeaks(tmp, add = TRUE, param = cwp) + expect_equal(unname(chromPeaks(tmp, msLevel = 1L)[, "into"]), + unname(chromPeaks(tmp, msLevel = 2L)[, "into"])) + ## correspondence + pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + tmp <- groupChromPeaks(tmp, param = pdp, msLevel = 1L) + tmp <- groupChromPeaks(tmp, param = pdp, msLevel = 2L, add = TRUE) + expect_true(hasFeatures(tmp, msLevel = 1L)) + expect_true(hasFeatures(tmp, msLevel = 2L)) + + all <- featureValues(tmp) + ms1 <- featureValues(tmp, msLevel = 1L) + ms2 <- featureValues(tmp, msLevel = 2L) + expect_equal(all, rbind(ms1, ms2)) + rownames(ms1) <- rownames(ms2) <- NULL + expect_equal(ms1, ms2) }) test_that("peakIndex,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + pkI <- .peakIndex(xod_xg) expect_equal(names(pkI), rownames(featureDefinitions(xod_xg))) expect_equal(unname(pkI), featureDefinitions(xod_xg)$peakidx) }) test_that("MS1 MS2 data works on XCMSnExp", { + skip_on_os(os = "windows", arch = "i386") + ## That's to test stuff for issues #208 and related (also issue #214). ## Set every other spectra in the original files to MS2. @@ -1185,7 +1337,7 @@ test_that("MS1 MS2 data works on XCMSnExp", { diffs <- tail(res_by_file[[2]]) - tail(raw_by_file[[2]]) expect_equal(unname(diff(diffs)), rep(0, 5)) diffs <- tail(res_by_file[[3]]) - tail(raw_by_file[[3]]) - expect_equal(unname(diff(diffs)), rep(0, 5)) + expect_equal(unname(diff(diffs)), rep(0, 5)) ## - adjusted rtime of the MS level 2 are in interpolated between rts of ## MS level 2. ## rtime for 3 should be interpolated between 2 and 4: @@ -1236,7 +1388,7 @@ test_that("MS1 MS2 data works on XCMSnExp", { diffs <- tail(res_by_file[[2]]) - tail(raw_by_file[[2]]) expect_equal(unname(diff(diffs)), rep(0, 5)) diffs <- tail(res_by_file[[3]]) - tail(raw_by_file[[3]]) - expect_equal(unname(diff(diffs)), rep(0, 5)) + expect_equal(unname(diff(diffs)), rep(0, 5)) ## - adjusted rtime of the MS level 2 are in interpolated between rts of ## MS level 2. ## rtime for 3 should be interpolated between 2 and 4: @@ -1252,6 +1404,8 @@ test_that("MS1 MS2 data works on XCMSnExp", { }) test_that("extractMsData,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## All the data ## all <- extractMsData(od_x) ## expect_equal(length(all), length(fileNames(od_x))) @@ -1276,7 +1430,7 @@ test_that("extractMsData,XCMSnExp works", { expect_true(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) expect_true(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) expect_true(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) - + ## XCMSnExp, xod_xgr ## with adjusted retention times tmp <- filterFile(xod_xgr, 1:2, keepAdjustedRtime = TRUE) @@ -1296,7 +1450,7 @@ test_that("extractMsData,XCMSnExp works", { tmp_rts <- tmp_rts[tmp_rts >= rtr[1] & tmp_rts <= rtr[2]] res_rts <- res[[2]][, 1] expect_equal(unique(res_rts), unname(tmp_rts)) - + ## without adjusted retention times res_2 <- extractMsData(filterFile(xod_xgr, 1:2), adjustedRtime = FALSE, rt = rtr, mz = mzr) @@ -1320,6 +1474,8 @@ test_that("extractMsData,XCMSnExp works", { }) test_that("spectrapply and spectra,XCMSnExp work", { + skip_on_os(os = "windows", arch = "i386") + ## With adjusted retention time tmp <- filterFile(xod_r, file = 3, keepAdjustedRtime = TRUE) expect_true(hasAdjustedRtime(tmp)) @@ -1342,6 +1498,8 @@ test_that("spectrapply and spectra,XCMSnExp work", { }) test_that("processHistory,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + type_peak_det <- .PROCSTEP.PEAK.DETECTION type_align <- .PROCSTEP.RTIME.CORRECTION type_corr <- .PROCSTEP.PEAK.GROUPING @@ -1352,18 +1510,20 @@ test_that("processHistory,XCMSnExp works", { ph <- processHistory(xod_xgrg) expect_true(length(ph) == 4) ph <- processHistory(xod_xgrg, msLevel = 1L) - expect_true(length(ph) == 2) + expect_true(length(ph) == 4) expect_equal(as.character(class(processParam(ph[[1]]))), "CentWaveParam") }) test_that("split,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + xod <- as(faahko_od, "XCMSnExp") tmp <- split(xod_xgr, f = fromFile(xod_xgr)) ## Split by file. - expect_equal(spectra(tmp[[1]]), spectra(filterFile(xod, file = 1))) - expect_equal(spectra(tmp[[3]]), spectra(filterFile(xod, file = 3))) + expect_equal(spectra(tmp[[1]][7:13]), spectra(filterFile(xod, file = 1)[7:13])) + expect_equal(spectra(tmp[[3]][7:13]), spectra(filterFile(xod, file = 3)[7:13])) ## Split by acquisitionNum. - tmp <- filterRt(xod_xgr, rt = c(2500, 2700)) + tmp <- filterRt(xod_xgr, rt = c(2600, 2700)) expect_true(hasChromPeaks(tmp)) expect_true(hasAdjustedRtime(tmp)) tmp_2 <- split(tmp, f = acquisitionNum(tmp)) @@ -1384,14 +1544,18 @@ test_that("split,XCMSnExp works", { }) test_that("groupnames,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + gn <- groupnames(xod_xgrg) expect_error(groupnames(xod_x)) }) test_that("calibrate,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + do_plot <- FALSE tmp <- filterFile(faahko_xod, file = 1) - + ## Check shift calibration. mzs <- chromPeaks(tmp)[c(3, 6, 7, 13, 17, 32, 45)] mzs_shift <- mzs + 0.0001 @@ -1421,7 +1585,7 @@ test_that("calibrate,XCMSnExp works", { ## edgeshift prm <- CalibrantMassParam(mz = mzs_lin, method = "edgeshift", - mzabs = max_dif) + mzabs = max_dif) res <- calibrate(tmp, prm) expect_true(isCalibrated(res)) diffs <- chromPeaks(res)[, "mz"] - chromPeaks(tmp)[, "mz"] @@ -1440,10 +1604,10 @@ test_that("calibrate,XCMSnExp works", { lin_mod <- lm(diffs[lin_idx] ~ X[lin_idx]) expect_equal(unname(coefficients(lin_mod)[1]), 0.00005, tolerance = 1e-5) expect_equal(unname(coefficients(lin_mod)[2]), 0.000002, tolerance = 1e-5) - + ## Test with a single mass, fall back to shift. prm <- CalibrantMassParam(mz = mzs_lin[1], method = "edgeshift", - mzabs = max_dif) + mzabs = max_dif) expect_warning(res <- calibrate(tmp, prm)) diffs <- chromPeaks(res)[, "mz"] - chromPeaks(tmp)[, "mz"] min_diff <- min(abs(chromPeaks(tmp)[, "mz"] - mzs_lin[1])) @@ -1451,27 +1615,20 @@ test_that("calibrate,XCMSnExp works", { ## Check errors. expect_error(calibrate(tmp, 4)) - expect_error(calibrate(tmp, CalibrantMassParam(mz = list(mzs, mzs)))) + expect_error(calibrate(tmp, CalibrantMassParam(mz = list(mzs, mzs)))) }) test_that("adjustRtime,peakGroups works", { + skip_on_os(os = "windows", arch = "i386") + xod <- faahko_xod - xs <- faahko_xs - ## Group these - xsg <- group(xs) xodg <- groupChromPeaks(xod, - param = PeakDensityParam(sampleGroups = xs$class)) + param = PeakDensityParam(sampleGroups = rep(1, 3))) pks <- chromPeaks(xodg) - rownames(pks) <- NULL - expect_equal(peaks(xsg), pks[, colnames(peaks(xsg))]) - expect_equal(xsg@groupidx, featureDefinitions(xodg)$peakidx) expect_true(length(processHistory(xodg, type = .PROCSTEP.PEAK.DETECTION)) == 1) expect_true(length(processHistory(xodg, type = .PROCSTEP.PEAK.GROUPING)) == 1) - ## Now do the retention time correction - xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 0.3) - ## minFr <- (length(fileNames(xod)) - 1) / length(fileNames(xod)) p <- PeakGroupsParam(minFraction = 1, span = 0.3) xodr <- adjustRtime(xodg, param = p) ## Check that we've got process histories. @@ -1490,18 +1647,6 @@ test_that("adjustRtime,peakGroups works", { expect_true(sum(chromPeaks(xod)[, "rt"] != chromPeaks(xodr)[, "rt"]) > 200) expect_true(sum(chromPeaks(xod)[, "rtmin"] != chromPeaks(xodr)[, "rtmin"]) > 200) expect_true(sum(chromPeaks(xod)[, "rtmax"] != chromPeaks(xodr)[, "rtmax"]) > 200) - ## between xcmsSet and XCMSnExp - pks <- chromPeaks(xodr) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) - ## To compare the adjusted retention time we have to extract it by sample! - ## Otherwise the ordering will not be the same, as rtime is ordered by - ## retention time, but @rt$raw by sample. - expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), - unlist(xsr@rt$corrected, use.names = FALSE)) - ## Just to ensure - are the raw rt the same? - expect_equal(unlist(rtime(xod, bySample = TRUE), use.names = FALSE), - unlist(xs@rt$raw, use.names = FALSE)) ## Check that we get the same by supplying the peakGroupsMatrix. pgm <- adjustRtimePeakGroups(xodg, param = p) p_2 <- p @@ -1520,51 +1665,65 @@ test_that("adjustRtime,peakGroups works", { p_2) ## Doing an additional grouping xodrg <- groupChromPeaks(xodr, param = PeakDensityParam(sampleGroups = - xs$class)) + rep(1, 3))) expect_true(length(processHistory(xodrg, type = .PROCSTEP.PEAK.GROUPING)) == 2) expect_true(hasAdjustedRtime(xodrg)) expect_true(hasFeatures(xodrg)) - xsrg <- group(xsr) - expect_equal(xsrg@groupidx, featureDefinitions(xodrg)$peakidx) - + ## Mod settings: - xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1) xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1)) - pks <- chromPeaks(xodr) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) - expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), - unlist(xsr@rt$corrected, use.names = FALSE)) - - xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, - smooth = "linear") xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1, smooth = "linear")) - pks <- chromPeaks(xodr) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) - expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), - unlist(xsr@rt$corrected, use.names = FALSE)) - - xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, - family = "symmetric") xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1, family = "symmetric")) - pks <- chromPeaks(xodr) - rownames(pks) <- NULL - expect_equal(pks[, colnames(peaks(xsr))], peaks(xsr)) - expect_equal(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), - unlist(xsr@rt$corrected, use.names = FALSE)) ## Dropping results. tmp <- dropAdjustedRtime(xodr) - expect_equal(tmp, xod) + expect_equal(tmp, xod) + + ## With subset. + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = c(1, 3), + subsetAdjust = "previous")) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(xodg, bySample = TRUE)[[1]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[2]] != + rtime(xodg, bySample = TRUE)[[2]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] != + rtime(xodg, bySample = TRUE)[[3]])) + expect_equal(unname(rtime(res_sub, bySample = TRUE)[[1]]), + unname(rtime(res_sub, bySample = TRUE)[[2]])) + expect_equal(rtime(res_sub, bySample = TRUE)[[2]], + .applyRtAdjustment(rtime(xodg, bySample = TRUE)[[2]], + rtime(xodg, bySample = TRUE)[[1]], + rtime(res_sub, bySample = TRUE)[[1]])) + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = c(1, 3), + subsetAdjust = "average")) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(xodg, bySample = TRUE)[[1]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[2]] != + rtime(xodg, bySample = TRUE)[[2]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] != + rtime(xodg, bySample = TRUE)[[3]])) + expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + rtime(res_sub, bySample = TRUE)[[2]])) + tmp <- adjustRtime(xodg, param = PeakGroupsParam()) + + ## With subsetAdjust = "average" and the left-out being at the end. + res_sub <- adjustRtime( + xodg, param = PeakGroupsParam(subset = 1:2, subsetAdjust = "average")) + res_2 <- adjustRtime( + xodg, param = PeakGroupsParam(subset = 1:2, subsetAdjust = "previous")) + expect_equal(rtime(res_sub), rtime(res_2)) }) test_that("findChromPeaks,MSWParam works", { + skip_on_os(os = "windows", arch = "i386") + od <- microtofq_od ## Restrict to first spectrum od1 <- od[1] @@ -1604,141 +1763,142 @@ test_that("findChromPeaks,MSWParam works", { ## Compare old vs new: pks <- chromPeaks(fticr_xod) rownames(pks) <- NULL - expect_equal(pks[, -ncol(chromPeaks(fticr_xod))], - peaks(fticr_xs)) }) -test_that("featureValues,XCMSnExp works", { - od_x <- faahko_xod - xs <- faahko_xs - - p <- PeakDensityParam(sampleGroups = xs$class) - od_x <- groupChromPeaks(od_x, param = p) +test_that("featureValues,XCMSnExp works as with groupval", { + skip_on_os(os = "windows", arch = "i386") - xs <- group(xs, method = "density") + fval <- featureValues(xod_xg) + expect_true(nrow(fval) == nrow(featureDefinitions(xod_xg))) + expect_true(ncol(fval) == length(fileNames(xod_xg))) + expect_true(is.numeric(fval)) - expect_equal(unname(groupval(xs, value = "into")), - unname(featureValues(od_x, value = "into"))) - expect_equal(unname(groupval(xs, method = "maxint", value = "into")), - unname(featureValues(od_x, method = "maxint", value = "into"))) + fval <- featureValues(xod_xg, value = "index") ## Checking errors - expect_error(featureValues(od_x, value = "bla")) + expect_error(featureValues(od_x, value = "bla")) }) test_that("groupChromPeaks,XCMSnExp,PeakDensityParam works", { - od_x <- faahko_xod - xs <- faahko_xs + skip_on_os(os = "windows", arch = "i386") + ## Check error if no features were found. issue #273 - pdp <- PeakDensityParam(sampleGroups = xs$class, minSamples = 30) - expect_error(groupChromPeaks(od_x, param = pdp), "Unable to group any chromatographic peaks.") - - fdp <- PeakDensityParam(sampleGroups = xs$class) - od_x <- groupChromPeaks(od_x, param = fdp) - xs <- group(xs, method = "density") - expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) - fg <- featureDefinitions(od_x) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(xs@groups, fg) - expect_true(length(processHistory(od_x)) == 2) - ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + pdp <- PeakDensityParam(sampleGroups = rep(1, 3), minSamples = 30) + expect_warning(groupChromPeaks(faahko_xod, param = pdp), "Unable to group any chromatographic peaks.") + + fdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + res <- groupChromPeaks(faahko_xod, param = fdp) + expect_true(hasFeatures(res)) + expect_true(length(processHistory(res)) == 2) + ph <- processHistory(res, type = .PROCSTEP.PEAK.GROUPING)[[1]] expect_equal(processParam(ph), fdp) - expect_equal(rownames(featureDefinitions(od_x)), - .featureIDs(nrow(featureDefinitions(od_x)))) - - fdp2 <- PeakDensityParam(sampleGroups = xs$class, binSize = 2, + expect_equal(rownames(featureDefinitions(res)), + .featureIDs(nrow(featureDefinitions(res)))) + + fdp2 <- PeakDensityParam(sampleGroups = rep(1, 3), binSize = 2, minFraction = 0.8) - od_x <- groupChromPeaks(od_x, param = fdp2) - xs <- group(xs, method = "density", minfrac = 0.8, mzwid = 2) - expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) - fg <- featureDefinitions(od_x) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(xs@groups, fg) - expect_true(length(processHistory(od_x)) == 2) - ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + res_2 <- groupChromPeaks(faahko_xod, param = fdp2) + expect_true(length(processHistory(res_2)) == 2) + ph <- processHistory(res_2, type = .PROCSTEP.PEAK.GROUPING)[[1]] expect_equal(processParam(ph), fdp2) - expect_equal(rownames(featureDefinitions(od_x)), - .featureIDs(nrow(featureDefinitions(od_x)))) + expect_equal(rownames(featureDefinitions(res_2)), + .featureIDs(nrow(featureDefinitions(res_2)))) + + pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + res <- groupChromPeaks(faahko_xod, param = pdp) + res_2 <- groupChromPeaks(res, param = pdp) + expect_equal(featureDefinitions(res), featureDefinitions(res_2)) + res_2 <- groupChromPeaks(res, param = pdp, add = TRUE) + expect_true(nrow(featureDefinitions(res_2)) == + 2 * nrow(featureDefinitions(res))) + nr <- nrow(featureDefinitions(res)) + expect_equal(featureDefinitions(res), + featureDefinitions(res_2)[1:nr, ]) + expect_equal(featureDefinitions(res)$mzmed, + featureDefinitions(res_2)$mzmed[(nr + 1):(2 * nr)]) + expect_equal(featureDefinitions(res)$peakidx, + featureDefinitions(res_2)$peakidx[(nr + 1):(2 * nr)]) + + expect_error(groupChromPeaks(faahko_xod, param = pdp, msLevel = 2), "MS level 2") + expect_error(groupChromPeaks(faahko_xod, param = pdp, msLevel = 1:4), + "one MS level at a time") }) test_that("groupPeaks,XCMSnExp,MzClustParam works", { - p <- MzClustParam(sampleGroups = sampclass(fticr_xs)) + skip_on_os(os = "windows", arch = "i386") + + p <- MzClustParam(sampleGroups = rep(1, length(fileNames(fticr_xod)))) fticr_xod2 <- groupChromPeaks(fticr_xod, param = p) - fticr_xs2 <- group(fticr_xs, method = "mzClust") - expect_equal(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) - fg <- featureDefinitions(fticr_xod2) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(fticr_xs2@groups, fg) + expect_true(hasFeatures(fticr_xod2)) expect_true(length(processHistory(fticr_xod2)) == 2) ph <- processHistory(fticr_xod2, type = .PROCSTEP.PEAK.GROUPING)[[1]] expect_equal(processParam(ph), p) expect_equal(rownames(featureDefinitions(fticr_xod2)), .featureIDs(nrow(featureDefinitions(fticr_xod2)))) - p2 <- MzClustParam(sampleGroups = fticr_xs$class, absMz = 1, - minFraction = 0.8) + p2 <- MzClustParam(sampleGroups = rep(1, length(fileNames(fticr_xod))), + absMz = 1, minFraction = 0.8) fticr_xod2 <- groupChromPeaks(fticr_xod, param = p2) - fticr_xs2 <- group(fticr_xs, method = "mzClust", minfrac = 0.8, mzabs = 1) - expect_equal(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) - fg <- featureDefinitions(fticr_xod2) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(fticr_xs2@groups, fg) expect_true(length(processHistory(fticr_xod2)) == 2) ph <- processHistory(fticr_xod2, type = .PROCSTEP.PEAK.GROUPING)[[1]] - expect_equal(processParam(ph), p2) + expect_equal(processParam(ph), p2) expect_equal(rownames(featureDefinitions(fticr_xod2)), .featureIDs(nrow(featureDefinitions(fticr_xod2)))) }) test_that("groupChromPeaks,XCMSnExp,NearestPeaksParam works", { - od_x <- faahko_xod - xs <- faahko_xs - p <- NearestPeaksParam(sampleGroups = xs$class) - od_x <- groupChromPeaks(od_x, param = p) - xs <- group(xs, method = "nearest") - expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) - fg <- featureDefinitions(od_x) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(xs@groups, fg) - expect_true(length(processHistory(od_x)) == 2) - ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] + skip_on_os(os = "windows", arch = "i386") + + p <- NearestPeaksParam(sampleGroups = rep(1, 3)) + res <- groupChromPeaks(faahko_xod, param = p) + expect_true(hasFeatures(res)) + expect_true(length(processHistory(res)) == 2) + ph <- processHistory(res, type = .PROCSTEP.PEAK.GROUPING)[[1]] expect_equal(processParam(ph), p) - expect_equal(rownames(featureDefinitions(od_x)), - .featureIDs(nrow(featureDefinitions(od_x)))) - fdp2 <- NearestPeaksParam(sampleGroups = xs$class, kNN = 3) - od_x <- groupChromPeaks(od_x, param = fdp2) - xs <- group(xs, method = "nearest", kNN = 3) - expect_equal(xs@groupidx, featureDefinitions(od_x)$peakidx) - fg <- featureDefinitions(od_x) - fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) - rownames(fg) <- NULL - expect_equal(xs@groups, fg) - expect_true(length(processHistory(od_x)) == 2) - ph <- processHistory(od_x, type = .PROCSTEP.PEAK.GROUPING)[[1]] - expect_equal(processParam(ph), fdp2) - expect_equal(rownames(featureDefinitions(od_x)), - .featureIDs(nrow(featureDefinitions(od_x)))) + expect_equal(rownames(featureDefinitions(res)), + .featureIDs(nrow(featureDefinitions(res)))) + fdp2 <- NearestPeaksParam(sampleGroups = rep(1, 3), kNN = 3) + res <- groupChromPeaks(faahko_xod, param = fdp2) + expect_true(length(processHistory(res)) == 2) + ph <- processHistory(res, type = .PROCSTEP.PEAK.GROUPING)[[1]] + expect_equal(processParam(ph), fdp2) + expect_equal(rownames(featureDefinitions(res)), + .featureIDs(nrow(featureDefinitions(res)))) + + expect_error(groupChromPeaks(faahko_xod, param = p, msLevel = 2), "MS level 2") + expect_error(groupChromPeaks(faahko_xod, param = p, msLevel = 1:3), " at a time") + res <- groupChromPeaks(faahko_xod, param = p) + res_2 <- groupChromPeaks(res, param = p) + expect_equal(featureDefinitions(res), featureDefinitions(res_2)) + res_2 <- groupChromPeaks(res, param = p, add = TRUE) + expect_true(nrow(featureDefinitions(res_2)) == + 2 * nrow(featureDefinitions(res))) + nr <- nrow(featureDefinitions(res)) + expect_equal(featureDefinitions(res), featureDefinitions(res)[1:nr, ]) + expect_equal(featureDefinitions(res)$peakidx, + featureDefinitions(res_2)$peakidx[(nr + 1):(2 * nr)]) + expect_equal(featureDefinitions(res)$mzmed, + featureDefinitions(res_2)$mzmed[(nr + 1):(2 * nr)]) }) test_that("fillChromPeaks,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + ## No adjusted retention times expect_true(!.hasFilledPeaks(xod_xg)) + expect_false(hasFilledChromPeaks(xod_xg)) res <- fillChromPeaks(xod_xg) expect_true(.hasFilledPeaks(res)) - ph <- processHistory(res, type = .PROCSTEP.PEAK.FILLING) + expect_true(hasFilledChromPeaks(res)) + ph <- processHistory(res, type = xcms:::.PROCSTEP.PEAK.FILLING) expect_true(length(ph) == 1) expect_equal(ph[[1]]@param, FillChromPeaksParam()) ## Check parameter filled in featureValues (issue #157) expect_equal(featureValues(res, filled = FALSE), featureValues(xod_xg)) - + ## Check if the signal corresponds to what we expect for some peaks. fp <- chromPeaks(res) - fp <- fp[fp[, "is_filled"] == 1, ] + fp <- fp[chromPeakData(res)$is_filled, ] idxs <- sample(1:nrow(fp), 5) for (i in idxs) { cfp <- fp[i, , drop = FALSE] @@ -1749,25 +1909,6 @@ test_that("fillChromPeaks,XCMSnExp works", { (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) expect_equal(unname(into), unname(cfp[1, "into"])) } - ## Plot the data for some... - if (FALSE) { - pk_idx <- featureValues(res)[1, ] - pks <- chromPeaks(res)[pk_idx, ] - rtr <- c(min(pks[, "rtmin"]), max(pks[, "rtmax"])) - rtr[1] <- rtr[1] - 10 - rtr[2] <- rtr[2] + 10 - chrs <- chromatogram(res, rt = rtr, mz = c(min(pks[, "mzmin"]), - max(pks[, "mzmax"])))[1, ] - plot(3, 3, pch = NA, xlim = range(lapply(chrs, rtime), na.rm = TRUE), - ylim = range(lapply(chrs, intensity), na.rm = TRUE), xlab = "rt", - ylab = "int") - for (i in 1:length(chrs)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", - col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) - abline(v = pks[i, c("rtmin", "rtmax")], - col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) - } - } ## Check if the results are similar that we get with findChromPeaks for (i in 1:length(fileNames(xod_xg))) { @@ -1786,12 +1927,12 @@ test_that("fillChromPeaks,XCMSnExp works", { ## into expect_true(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) expect_equal(unname(fnd_pks[, "into"]), unname(fld_pks[, "into"])) - ## expect_equal(fnd_pks[, "into"], fld_pks[, "into"]) + ## expect_equal(fnd_pks[, "into"], fld_pks[, "into"]) ## maxo expect_equal(unname(fnd_pks[, "maxo"]), unname(fld_pks[, "maxo"])) expect_equal(unname(fnd_pks[, "maxo"]), unname(fld_pks[, "maxo"])) } - + ## Check for the NAs if there is really no signal gv <- featureValues(res) feat_i <- which(is.na(gv[, 1])) @@ -1806,24 +1947,34 @@ test_that("fillChromPeaks,XCMSnExp works", { mzs <- unlist(lapply(spctr, mz)) ## No spectra for the fiven mz: expect_equal(sum(mzs >= pkArea[3] & mzs <= pkArea[4]), 0) - + ## Check increasing the expandRt and expandMz to see whether we get rid of ## the NA. res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandMz = 1)) ## Check if the mzrange is now indeed broader for the integrated ones. fp <- chromPeaks(res) - fp <- fp[fp[, "is_filled"] == 1, ] + fp <- fp[chromPeakData(res)$is_filled, ] fp2 <- chromPeaks(res_2) - fp2 <- fp2[fp2[, "is_filled"] == 1, ] + fp2 <- fp2[chromPeakData(res)$is_filled, ] expect_equal(fp2[, "mzmax"] - fp2[, "mzmin"], 2 * (fp[, "mzmax"] - fp[, "mzmin"])) - + + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(fixedRt = 1)) + ## Check if the rtrange is now indeed broader for the integrated ones. + fp <- chromPeaks(res) + fp <- fp[chromPeakData(res)$is_filled, ] + fp2 <- chromPeaks(res_2) + fp2 <- fp2[chromPeakData(res)$is_filled, ] + expect_equal(fp[, "rt"], fp2[, "rt"]) + expect_equal(fp[, "rtmin"] - 1, fp2[, "rtmin"]) + expect_equal(fp[, "rtmax"] + 1, fp2[, "rtmax"]) + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandRt = 1)) - ## Check if the mzrange is now indeed broader for the integrated ones. + ## Check if the rtrange is now indeed broader for the integrated ones. fp <- chromPeaks(res) - fp <- fp[fp[, "is_filled"] == 1, ] + fp <- fp[chromPeakData(res)$is_filled, ] fp2 <- chromPeaks(res_2) - fp2 <- fp2[fp2[, "is_filled"] == 1, ] + fp2 <- fp2[chromPeakData(res)$is_filled, ] expect_equal(fp2[, "rtmax"] - fp2[, "rtmin"], 2 * (fp[, "rtmax"] - fp[, "rtmin"])) ## Check using ppm @@ -1838,17 +1989,17 @@ test_that("fillChromPeaks,XCMSnExp works", { ## Drop feature definitions from res -> also filled peaks should be dropped. res_rem <- dropFeatureDefinitions(res) expect_true(!.hasFilledPeaks(res_rem)) - expect_true(!any(chromPeaks(res_rem)[, "is_filled"] == 1)) + expect_true(!any(chromPeakData(res_rem)$is_filled)) expect_equal(res_rem, xod_x) - + ## With adjusted rtime. res_2 <- fillChromPeaks(xod_xgrg) ## Check if the signal corresponds to what we expect for some peaks. fp <- chromPeaks(res_2) - fp <- fp[fp[, "is_filled"] == 1, ] + fp <- fp[chromPeakData(res_2)$is_filled, ] ## These have to be different from before! fp_raw <- chromPeaks(res) - fp_raw <- fp_raw[fp_raw[, "is_filled"] == 1, ] + fp_raw <- fp_raw[chromPeakData(res)$is_filled, ] expect_true(all(fp_raw[, "rt"] != fp[, "rt"])) expect_true(all(fp_raw[, "rtmin"] != fp[, "rtmin"])) expect_true(all(fp_raw[, "rtmax"] != fp[, "rtmax"])) @@ -1882,32 +2033,45 @@ test_that("fillChromPeaks,XCMSnExp works", { res_rem <- dropFilledChromPeaks(res_2) expect_true(!.hasFilledPeaks(res_rem)) expect_equal(res_rem, xod_xgrg) + + expect_error(fillChromPeaks(xod_xgrg, msLevel = 1:2, + param = FillChromPeaksParam()), + "for one MS level at a time") + expect_error(fillChromPeaks(xod_xgrg, param = FillChromPeaksParam(), + msLevel = 2L), + "No feature definitions") +}) + +test_that("fillChromPeaks,XCMSnExp works with only MS2 data", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- xod_xgrg + fData(tmp)[fromFile(tmp) == 2, "msLevel"] <- 2L + res <- fillChromPeaks(tmp, FillChromPeaksParam(fixedRt = 2)) + expect_true(!any(chromPeakData(res)$is_filled[chromPeaks(res)[, "sample"] == 2])) + res_2 <- fillChromPeaks(xod_xgrg, FillChromPeaksParam(fixedRt = 2)) + expect_true(nrow(chromPeaks(res_2)) > nrow(chromPeaks(res))) + expect_equal(chromPeaks(res)[chromPeaks(res)[, "sample"] == 1, ], + chromPeaks(res_2)[chromPeaks(res_2)[, "sample"] == 1, ]) +}) + +test_that("fillChomPeaks,ChromPeakAreaParam works", { + skip_on_os(os = "windows", arch = "i386") + + res <- fillChromPeaks(xod_xgrg, ChromPeakAreaParam()) + expect_true(hasFilledChromPeaks(res)) }) test_that("fillChromPeaks,XCMSnExp with MSW works", { + skip_on_os(os = "windows", arch = "i386") + p <- MzClustParam() fticr_xodg <- groupChromPeaks(fticr_xod, param = p) expect_error(res <- fillChromPeaks(fticr_xod)) res <- fillChromPeaks(fticr_xodg) - + ## Got a signal for all of em. expect_true(!any(is.na(featureValues(res)))) - ## 1) Compare with what I get for xcmsSet. - tmp_x <- fticr_xs - tmp_x <- group(tmp_x, method = "mzClust") - tmp_x <- fillPeaks(tmp_x, method = "MSW") - ## Compare - expect_equal(unname(groupval(tmp_x)), unname(featureValues(res))) - expect_equal(unname(groupval(tmp_x, value = "maxo")), - unname(featureValues(res, value = "maxo"))) - expect_equal(unname(groupval(tmp_x, value = "into")), - unname(featureValues(res, value = "into"))) - expect_equal(unname(groupval(tmp_x, value = "mz")), - unname(featureValues(res, value = "mz"))) - expect_equal(unname(groupval(tmp_x, value = "mzmin")), - unname(featureValues(res, value = "mzmin"))) - expect_equal(unname(groupval(tmp_x, value = "mzmax")), - unname(featureValues(res, value = "mzmax"))) ## OK ## 2) Check if the fillChromPeaks returns same/similar data than the ## findChromPeaks does: @@ -1941,6 +2105,8 @@ test_that("fillChromPeaks,XCMSnExp with MSW works", { }) test_that("fillChromPeaks,XCMSnExp with matchedFilter works", { + skip_on_os(os = "windows", arch = "i386") + tmp <- findChromPeaks(faahko_od, param = MatchedFilterParam()) sg <- rep(1, length(fileNames(tmp))) tmp <- groupChromPeaks(tmp, param = PeakDensityParam(sampleGroups = sg)) @@ -1973,7 +2139,7 @@ test_that("fillChromPeaks,XCMSnExp with matchedFilter works", { diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] expect_true(max(diffs) < 1e-4) ## into - expect_equal(unname(fnd_pks[, "into"]), unname(fld_pks[, "into"])) + expect_equal(unname(fnd_pks[, "into"]), unname(fld_pks[, "into"])) ## maxo expect_equal(unname(fnd_pks[, "maxo"]), unname(fld_pks[, "maxo"])) } @@ -1992,18 +2158,269 @@ test_that("fillChromPeaks,XCMSnExp with matchedFilter works", { }) test_that("writeMSData,XCMSnExp works", { - ## Writing plain MS data to mzML - tmp_path <- tempdir() - nfls <- paste0(tmp_path, "/", - sub(".CDF$", ".mzML", basename(fileNames(xod_x)))) - writeMSData(xod_x, file = nfls) - data_in <- readMSData(nfls, mode = "onDisk") - expect_equal(rtime(data_in), rtime(xod_x)) - + skip_on_os(os = "windows", arch = "i386") + ## Write adjusted retention times + tmp_path <- tempdir() nfls <- paste0(tmp_path, "/", sub(".CDF$", "_2.mzML", basename(fileNames(xod_xgr)))) - writeMSData(xod_xgr, file = nfls) + tmp <- filterRt(xod_xgr, rt = c(2500, 2700)) + writeMSData(tmp, file = nfls) data_in <- readMSData(nfls, mode = "onDisk") - expect_equal(rtime(data_in), rtime(xod_xgr)) + expect_equal(unname(rtime(data_in)), unname(rtime(tmp))) +}) + +test_that("adjustRtime,XCMSnExp,Obiwarp works", { + skip_on_os(os = "windows", arch = "i386") + + prm <- ObiwarpParam(centerSample = 3, subset = c(1, 2), binSize = 10) + expect_error(adjustRtime(xod_x, param = prm)) + prm <- ObiwarpParam(centerSample = 2, subset = c(1, 2), binSize = 10) + res <- adjustRtime(xod_x, param = prm) + plotAdjustedRtime(res, col = c("#ff000060", "#00ff0060", "#0000ff60")) + expect_equal(rtime(xod_x, bySample = TRUE)[[2]], + rtime(xod_x, bySample = TRUE)[[2]]) + expect_equal(rtime(xod_x, bySample = TRUE)[[3]], + rtime(xod_x, bySample = TRUE)[[3]]) + + prm <- ObiwarpParam(centerSample = 1, subset = c(1, 3), binSize = 10) + res <- adjustRtime(xod_x, param = prm) + plotAdjustedRtime(res, col = c("#ff000060", "#00ff0060", "#0000ff60")) + expect_equal(rtime(xod_x, bySample = TRUE)[[1]], + rtime(xod_x, bySample = TRUE)[[1]]) +}) + +test_that("dropFilledChromPeaks,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + xod_tmp <- groupChromPeaks( + xod_xgr, param = PeakDensityParam(sampleGroups = rep(1, 3), + minFraction = 0.25)) + xod_tmpf <- fillChromPeaks( + xod_tmp, param = FillChromPeaksParam(fixedRt = 30)) + res <- dropFilledChromPeaks(xod_tmpf) + expect_equal(chromPeaks(res), chromPeaks(xod_xgr)) +}) + +test_that("updateObject,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- faahko_xod + en <- new("MsFeatureData") + en@.xData <- .copy_env(tmp@msFeatureData@.xData) + rownames(en$chromPeaks) <- NULL + tmp@msFeatureData <- en + expect_true(is.null(rownames(chromPeaks(tmp)))) + tmp <- updateObject(tmp) + expect_true(!is.null(rownames(chromPeaks(tmp)))) +}) + +test_that("filterMsLevel works with MS>1", { + skip_on_os(os = "windows", arch = "i386") + + ms2 <- filterRt(as(pest_dda, "OnDiskMSnExp"), rt = c(200, 600)) + res <- findChromPeaks(ms2, param = CentWaveParam( + prefilter = c(3, 1000)), + msLevel = 1) + res_1 <- filterMsLevel(res, msLevel = 1) + res_2 <- filterMsLevel(res, msLevel = 2) + expect_true(all(msLevel(res_1) == 1)) + expect_true(all(msLevel(res_2) == 2)) + expect_equal(chromPeaks(res_1), chromPeaks(res)) + expect_equal(processHistory(res_1), processHistory(res)) + expect_true(length(processHistory(res_2)) == 0) + expect_true(hasChromPeaks(res_1)) + expect_false(hasChromPeaks(res_1, msLevel = 2L)) + expect_false(hasChromPeaks(res_2)) +}) + +test_that("chromPeakData,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + tmp <- xod_x + expect_error(chromPeakData(tmp) <- 5, "'chromPeakData' is supposed") + chromPeakData(tmp)$other_column <- "b" + expect_true(all(chromPeakData(tmp)$other_column == "b")) + + res <- filterRt(tmp, rt = c(2800, 3000)) + expect_true(hasChromPeaks(res)) + expect_true(validObject(res)) + expect_true(all(chromPeakData(res)$other_column == "b")) + expect_true(any(colnames(chromPeakData(res)) == "other_column")) + + res <- filterMz(tmp, mz = c(400, 500)) + expect_true(hasChromPeaks(res)) + expect_true(validObject(res)) + expect_true(all(chromPeakData(res)$other_column == "b")) + expect_true(any(colnames(chromPeakData(res)) == "other_column")) + + res <- filterMsLevel(tmp, msLevel = 2L) + expect_true(length(res) == 0) + expect_false(hasChromPeaks(res)) + res <- filterMsLevel(tmp, msLevel = 1L) + expect_equal(chromPeaks(res), chromPeaks(tmp)) + expect_equal(chromPeakData(res), chromPeakData(tmp)) + + res <- filterFile(tmp, 2) + expect_true(hasChromPeaks(res)) + expect_true(validObject(res)) + expect_true(all(chromPeakData(res)$other_column == "b")) + expect_true(any(colnames(chromPeakData(res)) == "other_column")) + + tmp <- groupChromPeaks(tmp, param = PeakDensityParam(sampleGroups = rep(1, 3))) + res <- fillChromPeaks(tmp) + expect_true(hasChromPeaks(res)) + expect_true(validObject(res)) + expect_true(is.character(chromPeakData(res)$other_column)) + expect_true(any(colnames(chromPeakData(res)) == "other_column")) +}) + +test_that("plot,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- c(301.9, 302.1) + rtr <- c(2500, 2650) + tmp <- filterMz(filterRt(xod_x, rtr), mzr) + centroided(tmp) <- TRUE + + plot(tmp[1:3], type = "spectra") + + plot(tmp, type = "XIC") +}) + +test_that("refineChromPeaks,CleanPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + + rtw <- chromPeaks(xod_x)[, "rtmax"] - chromPeaks(xod_x)[, "rtmin"] + res <- refineChromPeaks(xod_x, param = CleanPeaksParam(20)) + rtw2 <- chromPeaks(res)[, "rtmax"] - chromPeaks(res)[, "rtmin"] + expect_true(all(rtw2 < 20)) + res <- refineChromPeaks(xod_x, param = CleanPeaksParam(1)) + expect_true(length(chromPeaks(res)) == 0) + expect_true(is(processHistory(res)[[2]]@param, "CleanPeaksParam")) + + expect_warning(res <- refineChromPeaks(xod_x, param = CleanPeaksParam(20), + msLevel = 2L)) + expect_equal(chromPeaks(res), chromPeaks(xod_x)) + + res <- refineChromPeaks(xod_xgr, param = CleanPeaksParam(20)) + expect_true(hasAdjustedRtime(res)) + + res <- refineChromPeaks(xod_xgrg, param = CleanPeaksParam(20)) + expect_true(hasFeatures(xod_xgrg)) + expect_false(hasFeatures(res)) + + ## Fake MS level > 1 + tmp <- xod_x + fd <- new("MsFeatureData") + fd@.xData <- .copy_env(tmp@msFeatureData) + chromPeakData(fd)$ms_level <- 2L + lockEnvironment(fd, bindings = TRUE) + tmp@msFeatureData <- fd + expect_true(hasChromPeaks(tmp, msLevel = 2L)) + tmp <- findChromPeaks(tmp, add = TRUE, + param = CentWaveParam(prefilter = c(5, 10000), + noise = 10000, sn = 40)) + expect_true(hasChromPeaks(tmp, msLevel = 1L)) + res <- refineChromPeaks(tmp, msLevel = 1L, param = CleanPeaksParam(20)) + expect_equal(chromPeaks(tmp, msLevel = 2L), chromPeaks(res, msLevel = 2L)) + expect_true(nrow(chromPeaks(tmp, msLevel = 1L)) > + nrow(chromPeaks(res, msLevel = 1L))) + res_2 <- refineChromPeaks(res, msLevel = 2L, param = CleanPeaksParam(20)) + expect_equal(chromPeaks(res, msLevel = 1L), chromPeaks(res_2, msLevel = 1L)) + expect_true(nrow(chromPeaks(res, msLevel = 2L)) > + nrow(chromPeaks(res_2, msLevel = 2L))) +}) + +test_that("refineChromPeaks,MergeNeighboringPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + + prm <- MergeNeighboringPeaksParam(expandRt = 4) + res <- refineChromPeaks(xod_xgr, param = prm) + expect_true(hasAdjustedRtime(res)) + expect_equal(rtime(xod_xgr), rtime(res)) + pks_old <- chromPeaks(res)[!chromPeakData(res)$merged, ] + expect_equal(chromPeaks(xod_xgr)[rownames(pks_old), ], pks_old) + expect_equal(rownames(chromPeaks(res)), rownames(chromPeakData(res))) + expect_true(nrow(chromPeaks(res)) < nrow(chromPeaks(xod_xgr))) + + expect_warning(res <- refineChromPeaks(as(od_x, "XCMSnExp"), param = prm), + "Please run") + + prm <- MergeNeighboringPeaksParam(expandRt = 10, ppm = 50) + res <- refineChromPeaks(pest_swth, param = prm) + expect_equal(chromPeaks(res), chromPeaks(pest_swth)) + expect_true(all(chromPeakData(res)$merged == FALSE)) + + res <- refineChromPeaks(pest_swth, param = prm, msLevel = 2L) + expect_equal(chromPeaks(res, msLevel = 1L), + chromPeaks(pest_swth, msLevel = 1L)) + expect_true(nrow(chromPeaks(res, msLevel = 2L)) < + nrow(chromPeaks(pest_swth, msLevel = 2L))) + + ## With fake MS level 2 data. + tmp <- xod_x + fd <- new("MsFeatureData") + fd@.xData <- .copy_env(tmp@msFeatureData) + chromPeakData(fd)$ms_level <- 2L + lockEnvironment(fd, bindings = TRUE) + tmp@msFeatureData <- fd + expect_true(hasChromPeaks(tmp, msLevel = 2L)) + tmp <- findChromPeaks(tmp, add = TRUE, + param = CentWaveParam(prefilter = c(5, 10000), + noise = 10000, sn = 40)) + prm <- MergeNeighboringPeaksParam(expandRt = 4) + res <- refineChromPeaks(tmp, param = prm, msLevel = 1L) + expect_true(nrow(chromPeaks(res, msLevel = 1L)) < + nrow(chromPeaks(tmp, msLevel = 1L))) + expect_equal(chromPeaks(res, msLevel = 2L), chromPeaks(tmp, msLevel = 2L)) +}) + +test_that("refineChromPeaks,FilterIntensityParam works", { + skip_on_os(os = "windows", arch = "i386") + + pks <- chromPeaks(xod_x) + prm <- FilterIntensityParam(nValues = 1, threshold = 50000) + res <- refineChromPeaks(xod_x, param = prm, msLevel = 2L) + expect_equal(chromPeaks(res), chromPeaks(xod_x)) + res <- refineChromPeaks(xod_x, param = prm, msLevel = 1L) + + x <- filterRt(xod_x, rt = c(2500, 3500)) + prm <- FilterIntensityParam(nValues = 2, threshold = 50000) + res <- refineChromPeaks(xod_x, param = prm) + expect_true(all(chromPeaks(res)[, "maxo"] > 50000)) + res <- refineChromPeaks(xod_x, param = prm, msLevel = 3) + expect_equal(chromPeaks(res), chromPeaks(xod_x)) + + ## With a real object having MS2. + prm <- FilterIntensityParam(nValues = 2, threshold = 50000) + res <- refineChromPeaks(pest_swth, param = prm, msLevel = 1L) + expect_equal(chromPeaks(res, msLevel = 2L), + chromPeaks(pest_swth, msLevel = 2L)) + expect_true(nrow(chromPeaks(res, msLevel = 1L)) == 0) + + res <- refineChromPeaks(pest_swth, param = prm, msLevel = 2L) + expect_equal(chromPeaks(res, msLevel = 1L), + chromPeaks(pest_swth, msLevel = 1L)) + expect_true(nrow(chromPeaks(res, msLevel = 2L)) == 0) +}) + +test_that("filterChromPeaks,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + res <- filterChromPeaks(xod_x, keep = c(5, 23, 3)) + expect_true(nrow(chromPeaks(res)) == 3) + expect_equal(chromPeaks(res), chromPeaks(xod_x)[c(3, 5, 23), ]) + + expect_error(filterChromPeaks(xod_x, keep = 1235), "out of bounds") + + res <- filterChromPeaks(xod_xg, keep = c(8, 197, 14)) + expect_equal(chromPeaks(res), chromPeaks(xod_xg)[c(8, 14, 197), ]) + expect_true(nrow(featureDefinitions(res)) == 2) + ## chromPeak 14 is part of feature 6 + tmp <- featureValues(xod_xg)[6, ] + tmp[3] <- NA + expect_equal(featureValues(res)[1, ], tmp) + ## chromPeaks 8 and 197 are part of feature 46 + expect_equal(featureValues(res)[2, ], featureValues(xod_xg)[46, ]) }) diff --git a/tests/testthat/test_methods-XChromatogram.R b/tests/testthat/test_methods-XChromatogram.R new file mode 100644 index 000000000..6445d6f00 --- /dev/null +++ b/tests/testthat/test_methods-XChromatogram.R @@ -0,0 +1,226 @@ +test_that("show,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + show(XChromatogram()) +}) + +test_that("chromPeaks and chromPeakData for XChromatogram work", { + skip_on_os(os = "windows", arch = "i386") + + chr <- Chromatogram(rtime = 1:10, + intensity = c(4, 12, 18, 24, 23, 18, 15, 3, 2, 5)) + xchr <- as(chr, "XChromatogram") + expect_true(nrow(chromPeaks(xchr)) == 0) + pks <- matrix(nrow = 4, ncol = 6) + colnames(pks) <- .CHROMPEAKS_REQ_NAMES + pks[1, ] <- c(4, 2, 8, 24, NA, NA) + pks[2, ] <- c(3, 2, 7, 24, NA, NA) + pks[3, ] <- c(9, 7, 10, 2, NA, NA) + pks[4, ] <- c(8, 5, 10, 3, NA, NA) + expect_error(chromPeaks(xchr) <- 4) + chromPeaks(xchr) <- pks + expect_equal(chromPeaks(xchr), pks) + expect_equal(nrow(chromPeakData(xchr)), nrow(pks)) + expect_equal(chromPeakData(xchr)$is_filled, rep(FALSE, nrow(pks))) + expect_equal(chromPeakData(xchr)$ms_level, rep(1L, nrow(pks))) + chromPeakData(xchr)$id <- "a" + expect_equal(chromPeakData(xchr)$id, rep("a", nrow(pks))) + expect_error(chromPeakData(xchr) <- 4) + expect_error(chromPeakData(xchr) <- DataFrame()) + expect_error(chromPeakData(xchr) <- DataFrame(id = letters[1:nrow(pks)])) + + expect_true(nrow(chromPeaks(xchr, rt = c(20, 30))) == 0) + expect_equal(chromPeaks(xchr, rt = c(2, 7)), pks) + expect_equal(chromPeaks(xchr, rt = c(2, 7), type = "apex_within"), + pks[1:2, ]) + expect_equal(chromPeaks(xchr, rt = c(2, 7), type = "within"), + pks[2, , drop = FALSE]) + + expect_equal(chromPeaks(xchr, mz = 123), pks) + + ## with m/z + pks <- cbind(pks, mz = c(123, 332, 332, 432)) + pks <- cbind(pks, mzmin = c(122.9, 331.9, 331.8, 431.9)) + pks <- cbind(pks, mzmax = c(123.1, 332.1, 332.1, 432.2)) + chromPeaks(xchr) <- pks + expect_true(nrow(chromPeaks(xchr, mz = 23)) == 0) + expect_equal(chromPeaks(xchr, mz = 123, type = "apex_within"), + pks[1, , drop = FALSE]) + expect_equal(chromPeaks(xchr, mz = 331.89, ppm = 100), pks[2:3, ]) + expect_equal(chromPeaks(xchr, mz = 331.89, ppm = 10), pks[3, , drop = FALSE]) + + ## with msLevel + res <- chromPeaks(xchr, msLevel = 2L) + expect_true(nrow(res) == 0) + chromPeakData(xchr)$ms_level <- c(1L, 2L, 3L, 2L) + res <- chromPeaks(xchr, msLevel = 2L) + expect_equal(res, pks[c(2, 4), ]) +}) + +test_that("plot,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chr <- Chromatogram(rtime = 1:10, + intensity = c(4, 12, 18, 24, 23, 18, 15, 3, 2, 5)) + xchr <- as(chr, "XChromatogram") + pks <- matrix(nrow = 4, ncol = 6) + colnames(pks) <- .CHROMPEAKS_REQ_NAMES + pks[1, ] <- c(4, 2, 8, 24, 24, NA) + pks[2, ] <- c(3, 2, 7, 24, 18, NA) + pks[3, ] <- c(9, 7, 10, 2, 2, NA) + pks[4, ] <- c(8, 5, 10, 3, 3, NA) + chromPeaks(xchr) <- pks + plot(xchr) + plot(xchr, peakType = "point") + plot(xchr, peakType = "rectangle", col = "red", lwd = 3) + plot(xchr, peakType = "polygon", col = "red", + peakCol = c("#00000020", "#ff000020", "#00ff0020", "#0000ff20")) + plot(xchr, peakType = "polygon", col = "red", + peakBg = c("#00000020", "#ff000020", "#00ff0020", "#0000ff20")) + ## highlight the 3rd peak with an apex at 9 + plot(xchr, peakType = "point", peakCol = c("#00000040", "#00000040", + "#ff000060", "#00000040")) + plot(xchr, peakType = "rectangle", peakCol = c("#00000040", "#00000040", + "#ff000060", "#00000040")) + plot(xchr, peakType = "polygon", peakCol = c("#00000040", "#00000040", + "#ff000060", "#00000040")) +}) + +test_that("filterMz,filterRt,XChromatogram work", { + skip_on_os(os = "windows", arch = "i386") + + chr <- Chromatogram(rtime = 1:10, + intensity = c(4, 12, 18, 24, 23, 18, 15, 3, 2, 5)) + xchr <- as(chr, "XChromatogram") + pks <- matrix(nrow = 4, ncol = 6) + colnames(pks) <- .CHROMPEAKS_REQ_NAMES + pks[1, ] <- c(4, 2, 8, 24, 24, NA) + pks[2, ] <- c(3, 2, 7, 24, 18, NA) + pks[3, ] <- c(9, 7, 10, 2, 2, NA) + pks[4, ] <- c(8, 5, 10, 3, 3, NA) + chromPeaks(xchr) <- pks + + expect_equal(filterRt(xchr), xchr) + res <- filterRt(xchr, rt = c(3, 7)) + expect_equal(rtime(res), 3:7) + expect_equal(intensity(res), intensity(xchr)[3:7]) + expect_equal(chromPeaks(res), pks[1:2, ]) + + pks <- cbind(pks, mz = c(123, 124, 232, 234)) + chromPeaks(xchr) <- pks + expect_equal(xchr, filterMz(xchr)) + res <- filterMz(xchr, mz = c(2, 3)) + expect_true(nrow(chromPeaks(res)) == 0) + res <- filterMz(xchr, mz = c(200, 233)) + expect_equal(chromPeaks(res), pks[3, , drop = FALSE]) +}) + +test_that("hasChromPeaks,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chr <- Chromatogram(rtime = 1:10, + intensity = c(4, 12, 18, 24, 23, 18, 15, 3, 2, 5)) + xchr <- as(chr, "XChromatogram") + pks <- matrix(nrow = 4, ncol = 6) + colnames(pks) <- .CHROMPEAKS_REQ_NAMES + pks[1, ] <- c(4, 2, 8, 24, 24, NA) + pks[2, ] <- c(3, 2, 7, 24, 18, NA) + pks[3, ] <- c(9, 7, 10, 2, 2, NA) + pks[4, ] <- c(8, 5, 10, 3, 3, NA) + chromPeaks(xchr) <- pks + expect_true(hasChromPeaks(xchr)) + + xchr <- as(chr, "XChromatogram") + expect_false(hasChromPeaks(xchr)) +}) + +test_that("removeIntensity,XChromatogram(s) works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + xchr <- as(chr1, "XChromatogram") + expect_warning(res <- removeIntensity(xchr, which = "outside_chromPeak")) + expect_equal(res, xchr) + + xchrs <- featureChromatograms(xod_xgrg, features = c("FT07", "FT13"), + expandRt = 20) + res <- removeIntensity(xchrs[[1]], which = "outside_chromPeak") + expect_true(all(is.na(intensity(res)[rtime(res) < chromPeaks(res)[, "rtmin"]]))) + expect_true(all(is.na(intensity(res)[rtime(res) > chromPeaks(res)[, "rtmax"]]))) + expect_true(!all(is.na(intensity(xchrs[1, 1])[rtime(res) < chromPeaks(res)[, "rtmin"]]))) + expect_true(!all(is.na(intensity(xchrs[1, 1])[rtime(res) > chromPeaks(res)[, "rtmax"]]))) + + res <- removeIntensity(xchrs, which = "outside_chromPeak") + expect_true(all(is.na(intensity(res[1, 1])[rtime(res[1, 1]) < chromPeaks(res[1, 1])[, "rtmin"]]))) + expect_true(all(is.na(intensity(res[1, 1])[rtime(res[1, 1]) > chromPeaks(res[1, 1])[, "rtmax"]]))) + expect_true(all(is.na(intensity(res[2, 2])[rtime(res[2, 2]) < chromPeaks(res[2, 2])[, "rtmin"]]))) + expect_true(all(is.na(intensity(res[2, 2])[rtime(res[2, 2]) > chromPeaks(res[2, 2])[, "rtmax"]]))) + expect_true(all(is.na(intensity(res[2, 3])[rtime(res[2, 3]) < chromPeaks(res[2, 3])[, "rtmin"]]))) + expect_true(all(is.na(intensity(res[2, 3])[rtime(res[2, 3]) > chromPeaks(res[2, 3])[, "rtmax"]]))) + + ## with two peaks per chromatogram. + mzr <- 462.2 + c(-0.04, 0.04) + chr <- chromatogram(od_x, mz = mzr) + chr <- findChromPeaks(chr, CentWaveParam()) + res <- removeIntensity(chr, which = "outside_chromPeak") + + ## And if we had an XChromatograms + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chrs <- MChromatograms(list(chr1, chr2, chr3)) + chrs <- as(chrs, "XChromatograms") + + res <- removeIntensity(chrs) + expect_true(is(res, "XChromatograms")) + expect_equal(res, chrs) + + res <- removeIntensity(chrs, threshold = 20) + expect_equal(intensity(res[1, 1]), c(NA_real_, 29, 50, NA_real_, 100, + NA_real_, NA_real_, NA_real_, NA_real_, + NA_real_)) + expect_equal(intensity(res[3, 1]), c(53, 80, 130, NA_real_, NA_real_, + NA_real_, NA_real_)) +}) + +test_that("filterChromPeaks,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- chromatogram(xod_x, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + res <- filterChromPeaks(chrs[1, 1], n = 1L, by = "keepTop") + expect_true(nrow(chromPeaks(res)) == 1L) +}) + +test_that("transformIntensity,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- chromatogram(xod_x, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + + chr <- chrs[1L, 1L] + res <- transformIntensity(chr) + + expect_equal(intensity(res), intensity(chr)) + expect_equal(chromPeaks(res)[, "into"], chromPeaks(chr)[, "into"]) + expect_equal(chromPeaks(res)[, "maxo"], chromPeaks(chr)[, "maxo"]) + + res <- transformIntensity(chr, log2) + expect_equal(intensity(res), log2(intensity(chr))) + expect_equal(chromPeaks(res)[, "into"], log2(chromPeaks(chr)[, "into"])) + expect_equal(chromPeaks(res)[, "maxo"], log2(chromPeaks(chr)[, "maxo"])) + + chr <- XChromatogram() + res <- transformIntensity(chr) + expect_equal(intensity(res), intensity(chr)) + expect_equal(chromPeaks(res)[, "into"], chromPeaks(chr)[, "into"]) + expect_equal(chromPeaks(res)[, "maxo"], chromPeaks(chr)[, "maxo"]) + + res <- transformIntensity(chr, log2) + expect_equal(intensity(res), log2(intensity(chr))) + expect_equal(chromPeaks(res)[, "into"], log2(chromPeaks(chr)[, "into"])) + expect_equal(chromPeaks(res)[, "maxo"], log2(chromPeaks(chr)[, "maxo"])) +}) diff --git a/tests/testthat/test_methods-XChromatograms.R b/tests/testthat/test_methods-XChromatograms.R new file mode 100644 index 000000000..88dce138e --- /dev/null +++ b/tests/testthat/test_methods-XChromatograms.R @@ -0,0 +1,601 @@ +test_that("chromPeaks, chromPeakData for XChromatograms work", { + skip_on_os(os = "windows", arch = "i386") + + pks1 <- matrix(c(3, 2, 4, 339.2, 343, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + chr1 <- XChromatogram( + rtime = 1:8, intensity = c(3, 24.2, 343, 32, 3.3, 5, 2, 9), + chromPeaks = pks1) + chr2 <- XChromatogram(rtime = 1:4, intensity = c(45, 3, 34, 2)) + pks3 <- matrix(c(3, 2, 4, 145, 54, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + chr3 <- XChromatogram( + rtime = 1:7, intensity = c(12, 34, 54, 34, 23, 2, NA), + chromPeaks = pks3) + chr4 <- XChromatogram(rtime = 1:3, intensity = c(3, 4, 1)) + chr5 <- XChromatogram(rtime = 1:6, intensity = c(3, 4, 6, 7, 2, 4)) + pks6 <- matrix(c(2, 2, 3, 108, 65, NA, 3, 5, 7, 123, 4, NA), + nrow = 2, byrow = TRUE, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + chr6 <- XChromatogram( + rtime = 2:5, intensity = c(3, 65, 43, 12), + chromPeaks = pks6) + xchrs <- XChromatograms(list(chr1, chr2, chr3, chr4, chr5, chr6), nrow = 2) + + res <- chromPeaks(xchrs) + expect_equal(res[1, 1:6, drop = FALSE], pks1) + expect_equal(res[2, 1:6, drop = FALSE], pks3) + expect_equal(res[3:4, 1:6, drop = FALSE], pks6) + expect_equal(res[, "row"], c(1, 1, 2, 2)) + expect_equal(res[, "column"], c(1, 2, 3, 3)) + resd <- chromPeakData(xchrs) + expect_equal(resd$row, c(1, 1, 2, 2)) + expect_equal(resd$column, c(1, 2, 3, 3)) + expect_equal(colnames(resd), c("ms_level", "is_filled", "row", "column")) + + xchrs <- XChromatograms(list(chr4, chr5, chr2)) + res <- chromPeaks(xchrs) + expect_true(nrow(res) == 0) + expect_equal(colnames(res), c("rt", "rtmin", "rtmax", "into", "maxo", "sn", + "row", "column")) + + xchrs <- XChromatograms(list(chr2, chr4, chr5, chr1)) + res <- chromPeaks(xchrs) + expect_equal(res[, 1:6, drop = FALSE], pks1) + expect_equal(unname(res[, "row"]), c(4)) + expect_equal(unname(res[, "column"]), c(1)) + resd <- chromPeakData(xchrs) + expect_equal(resd$column, 1) + + xchrs <- XChromatograms(list(chr2, chr4, chr5, chr1), ncol = 2) + res <- chromPeaks(xchrs) + expect_equal(res[, 1:6, drop = FALSE], pks1) + expect_equal(unname(res[, "row"]), c(2)) + expect_equal(unname(res[, "column"]), c(2)) +}) + +test_that("filterRt, filterMz for XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + chr1 <- XChromatogram() + chr2 <- XChromatogram(rtime = 1:4, intensity = c(45, 3, 34, 2)) + pks3 <- matrix(c(3, 2, 4, 145, 54, NA), nrow = 1, + dimnames = list(NULL, .CHROMPEAKS_REQ_NAMES)) + chr3 <- XChromatogram( + rtime = 1:7, intensity = c(12, 34, 54, 34, 23, 2, NA), + chromPeaks = pks3) + chr4 <- XChromatogram(rtime = 1:3, intensity = c(3, 4, 1)) + + xchrs <- XChromatograms() + expect_equal(xchrs, filterRt(xchrs)) + expect_equal(xchrs, filterMz(xchrs)) + + xchrs <- XChromatograms(list(chr1, chr2)) + res <- filterMz(xchrs) + expect_equal(xchrs, res) + res <- filterRt(xchrs, rt = c(3, 4)) + expect_equal(res[1, 1], chr1) + expect_equal(rtime(res[2, 1]), c(3, 4)) + expect_equal(intensity(res[2, 1]), c(34, 2)) + + xchrs <- XChromatograms(list(chr3, chr4), nrow = 1) + res <- filterMz(xchrs) + expect_equal(xchrs, res) + res <- filterRt(xchrs, rt = c(6, 7)) + expect_true(length(rtime(res[1, 2])) == 0) + expect_equal(rtime(res[1, 1]), 6:7) + expect_equal(intensity(res[1, 1]), c(2, NA)) + expect_true(nrow(chromPeaks(res)) == 0) + + res <- filterRt(xchrs, rt = c(3, 6)) + expect_equal(rtime(res[1, 1]), 3:6) + expect_equal(intensity(res[1, 1]), c(54, 34, 23, 2)) + expect_true(nrow(chromPeaks(res)) == 1) + expect_equal(chromPeaks(res[1, 1]), pks3) + + chrs <- as(od_chrs, "XChromatograms") + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + ## Filter on rt on the one above + rtr <- c(2500, 3000) + res <- filterRt(chrs, rt = rtr) + pks_all <- chromPeaks(chrs) + excl <- !(pks_all[, "rtmin"] < rtr[2] & pks_all[, "rtmax"] > rtr[1]) + pks <- chromPeaks(res) + expect_true(all(pks[, "rtmin"] < rtr[2] & pks[, "rtmax"] > rtr[1])) + expect_equal(pks, chromPeaks(chrs)[!excl, ]) + expect_equal(rownames(featureDefinitions(res)), c("FT1", "FT3", "FT4")) + expect_equal(featureDefinitions(res)$peakidx, + list(c(1, 4, 5), c(6, 8, 10), c(7, 9, 11))) + rtr <- c(2500, 2600) + res <- filterRt(chrs, rtr) + expect_equal(featureDefinitions(res)$peakidx, list(1:3)) + expect_equal(rownames(featureDefinitions(res)), "FT3") + + ## Filter on mz for a chrs extracted from a real object. + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + chrs <- chromatogram(xod_xgrg, mz = mzr) + expect_equal(chromPeakData(chrs)$ms_level, rep(1L, 4)) + expect_equal(chromPeakData(chrs)$is_filled, rep(FALSE, 4)) + expect_equal(chromPeakData(chrs)$row, c(1, 2, 2, 2)) + expect_equal(chromPeakData(chrs)$column, c(2, 1, 2, 3)) + + res <- filterMz(chrs, mz = 335) + expect_equal(nrow(featureDefinitions(res)), 0) + expect_equal(nrow(chromPeaks(res)), 1) + expect_equal(nrow(chromPeakData(res)), 1) + + res <- filterMz(chrs, mz = 344) + expect_equal(nrow(featureDefinitions(res)), 1) + expect_equal(nrow(chromPeaks(res)), 3) + expect_equal(nrow(chromPeakData(res)), 3) + + res <- filterMz(chrs, mz = 444) + expect_equal(nrow(featureDefinitions(res)), 0) + expect_equal(nrow(chromPeaks(res)), 0) + expect_equal(nrow(chromPeakData(res)), 0) +}) + +test_that("plot,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) + + ## Full rt range. + xchr_rt <- chromatogram(xod_chr, mz = mzr) + + cols_smple <- c("#ff000060", "#00ff0060", "#0000ff60") + plot(as(xchr_rt[1, ], "MChromatograms"), col = cols_smple) + ## Plotting the data is fine, just as above. + ## Then we have to loop over each chromatogram... + x <- xchr_rt[1, ] + pks <- chromPeaks(x) + .add_chromatogram_peaks(x, pks, type = "rectangle", bg = rep(NA, nrow(pks)), + col = cols_smple[pks[, "sample"]]) + .add_chromatogram_peaks(x, pks, type = "point", bg = rep(NA, nrow(pks)), + col = cols_smple[pks[, "sample"]], pch = 15) + .add_chromatogram_peaks(x, pks, type = "polygon", + bg = cols_smple[pks[, "sample"]], + col = cols_smple[pks[, "sample"]]) + x <- xchr_rt[2, ] + pks <- chromPeaks(x) + plot(as(x, "MChromatograms"), col = cols_smple, lwd = 2) + .add_chromatogram_peaks(x, pks, type = "rectangle", bg = rep(NA, nrow(pks)), + col = cols_smple[pks[, "sample"]]) + .add_chromatogram_peaks(x, pks, type = "point", bg = rep(NA, nrow(pks)), + col = cols_smple[pks[, "sample"]], pch = 15) + .add_chromatogram_peaks(x, pks, type = "polygon", + bg = cols_smple[pks[, "sample"]], + col = cols_smple[pks[, "sample"]]) + + plot(xchr_rt, peakCol = cols_smple[chromPeaks(xchr_rt)[, "sample"]], + peakBg = cols_smple[chromPeaks(xchr_rt)[, "sample"]], xlab = "RT") + xsub <- xchr_rt[2, ] + ## Use one color per peak + library(RColorBrewer) + cls <- paste0(brewer.pal(nrow(chromPeaks(xsub)), "Dark2"), 40) + plot(xsub, peakBg = cls) + + ## Narrow on rt. + xchr <- chromatogram(xod_chr, mz = mzr, rt = rtr) + plot(xchr) +}) + +test_that("processHistory,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + xchrs <- chromatogram(xod_xgrg, mz = mzr, rt = c(2600, 3600)) + res <- processHistory(xchrs) + expect_equal(length(res), 4) + res <- processHistory(xchrs, type = .PROCSTEP.PEAK.DETECTION) + expect_equal(length(res), 1) + + xchrs2 <- findChromPeaks(xchrs, param = CentWaveParam()) + expect_equal(length(processHistory(xchrs2)), 5) + expect_equal(length(processHistory(xchrs2, + type = .PROCSTEP.PEAK.DETECTION)), 2) +}) + +test_that("groupChromPeaks,XChromatograms,PeakDensityParam works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + xchrs <- chromatogram(xod_xgrg, mz = mzr, rt = c(2600, 3600)) + param <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + res <- groupChromPeaks(xchrs, param = param) + expect_true(hasFeatures(res)) + expect_true(nrow(res@featureDefinitions) == 1) + expect_equal(processHistory(xchrs)[1:3], processHistory(res)[1:3]) + expect_true(processHistory(xchrs)[[4]]@date != + processHistory(res)[[4]]@date) + + param <- PeakDensityParam(sampleGroups = c(1, 2, 3)) + res <- groupChromPeaks(xchrs, param = param) + expect_true(nrow(res@featureDefinitions) == 2) + expect_true(length(processHistory(res)) == 4) + expect_equal(processHistory(xchrs)[1:3], processHistory(res)[1:3]) + expect_true(processHistory(xchrs)[[4]]@date != + processHistory(res)[[4]]@date) + + res <- groupChromPeaks(res, + param = PeakDensityParam(sampleGroups = c(1, 1, 1))) + expect_true(length(processHistory(res)) == 4) + expect_true(nrow(res@featureDefinitions) == 1) + + ## The same on artificial data. + chrs <- as(xchrs, "MChromatograms") + res <- findChromPeaks(chrs, param = CentWaveParam(snthresh = 1)) + expect_equal(nrow(chromPeaks(res)), 31) + res <- groupChromPeaks(res, param = param) + expect_equal(nrow(featureDefinitions(res)), 8) +}) + +test_that("dropFeatureDefinitions,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + xchrs <- chromatogram(xod_xgrg, mz = mzr, rt = c(2600, 3600)) + param <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + res <- groupChromPeaks(xchrs, param = param) + expect_true(hasFeatures(res)) + expect_true(nrow(res@featureDefinitions) == 1) + expect_true(length(res@.processHistory) == 4) + res <- dropFeatureDefinitions(res) + expect_false(hasFeatures(res)) + expect_true(length(res@.processHistory) == 3) + expect_equal(chromPeaks(res), chromPeaks(xchrs)) + expect_equal(processHistory(res)[1:3], processHistory(xchrs)[1:3]) +}) + +test_that("featureDefinitions,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + xchrs <- chromatogram(xod_xgrg, mz = mzr, rt = c(2600, 3600)) + param <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + res <- groupChromPeaks(xchrs, param = param) + expect_true(hasFeatures(res)) + expect_true(nrow(featureDefinitions(res)) == 1) + + xchrs <- findChromPeaks(xchrs, param = CentWaveParam()) + res <- groupChromPeaks(xchrs, param = param) + expect_true(nrow(featureDefinitions(res)) == 3) + fts <- featureDefinitions(res, rt = c(2500, 2800)) + expect_equal(nrow(fts), 2) + + res <- dropFeatureDefinitions(res) + expect_equal(nrow(featureDefinitions(res)), 0) +}) + +test_that("[,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- findChromPeaks(od_chrs, param = CentWaveParam()) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + + pks <- chromPeaks(chrs) + fts <- featureDefinitions(chrs) + + res <- chrs[2, 2] + expect_true(is(res, "XChromatogram")) + expect_equal(chromPeaks(res), pks[pks[, "row"] == 2 & + pks[, "column"] == 2, + colnames(chromPeaks(res))]) + res <- chrs[2, 2, drop = FALSE] + expect_true(is(res, "XChromatograms")) + expect_equal(chromPeaks(res)[, 1:7], + pks[pks[, "row"] == 2 & pks[, "column"] == 2, 1:7]) + + res <- chrs[2, 2:3] + expect_true(is(res, "XChromatograms")) + expect_true(ncol(res) == 2) + res_2 <- chrs[2, 2:3, drop = TRUE] + expect_equal(res, res_2) + expect_equal(res[1, 1], chrs[2, 2]) + expect_equal(res[1, 2], chrs[2, 3]) + pks_tmp <- pks[pks[, "row"] == 2 & pks[, "column"] %in% 2:3, ] + pks_tmp <- pks_tmp[order(pks_tmp[, "column"], pks_tmp[, "row"]), ] + expect_equal(chromPeaks(res)[, 1:6], pks_tmp[, 1:6]) + expect_equal(rownames(featureDefinitions(res)), c("FT3", "FT4")) + res_fts <- featureDefinitions(res) + expect_equal(res_fts$peakidx, list(c(1, 3), c(2, 4))) + + res <- chrs[c(2, 1), 1] + expect_true(is(res, "XChromatograms")) + expect_true(ncol(res) == 1) + expect_true(nrow(res) == 2) + expect_equal(res[1, 1], chrs[2, 1]) + expect_equal(res[2, 1], chrs[1, 1]) + expect_equal(as.character(res$sampleNames), "ko15.CDF") + expect_equal(fData(res), fData(chrs)[c(2, 1), ]) + pks_tmp <- pks[pks[, "row"] %in% c(1, 2) & pks[, "column"] == 1, ] + pks_tmp <- pks_tmp[c(5, 6, 1, 2, 3, 4), ] + expect_equal(chromPeaks(res)[, 1:6], pks_tmp[, 1:6]) + res_fts <- featureDefinitions(res) + expect_equal(rownames(res_fts), c("FT3", "FT4", "FT1", "FT2")) + expect_equal(res_fts$peakidx, list(c(1), c(2), c(3), c(6))) + + res <- chrs[c(2, 1), c(1, 3)] + expect_true(is(res, "XChromatograms")) + expect_true(ncol(res) == 2) + expect_true(nrow(res) == 2) + expect_equal(res[1, 1], chrs[2, 1]) + expect_equal(res[2, 1], chrs[1, 1]) + expect_equal(res[1, 2], chrs[2, 3]) + expect_equal(res[2, 2], chrs[1, 3]) + expect_equal(as.character(res$sampleNames), c("ko15.CDF", "ko18.CDF")) + expect_equal(fData(res), fData(chrs)[c(2, 1), ]) + pks_tmp <- pks[c(8, 9, 12, 13, 1, 2, 3, 4, 6, 7), ] + expect_equal(chromPeaks(res)[, 1:6], pks_tmp[, 1:6]) + res_fts <- featureDefinitions(res) + expect_equal(rownames(res_fts), c("FT3", "FT4", "FT1", "FT2")) + expect_equal(res_fts$peakidx, list(c(1, 3), c(2, 4), c(5, 9), c(8, 10))) + + ## Data from an XCMSnExp. + mzm <- rbind(305.1 + c(-0.01, 0.01), 496.2 + c(-0.01, 0.01)) + xchr <- chromatogram(xod_xgrg, mz = mzm) + pks <- chromPeaks(xchr) + fts <- featureDefinitions(xchr) + res <- xchr[2:1, ] + pks_sub <- chromPeaks(res) + fts_sub <- featureDefinitions(res) + expect_equal(pks_sub[pks_sub[, "row"] == 1, "into"], + pks[pks[, "row"] == 2, "into"]) + expect_equal(pks_sub[fts_sub$peakidx[[1]], "into"], + pks[fts$peakidx[[4]], "into"]) + + mzm <- rbind(mzm, mzm[1, ]) + xchr <- chromatogram(xod_xgrg, mz = mzm) + pks_2 <- chromPeaks(xchr) + expect_equal(pks_2[pks_2[, "row"] == 3, "into"], + pks_2[pks_2[, "row"] == 1, "into"]) + expect_error(xchr[c(2, 1, 1, 2), ], "rownames of object") + + res <- xchr[3:2, ] + pks_2 <- chromPeaks(res) + fts_2 <- featureDefinitions(res) + expect_equal(pks_2, pks) + expect_equal(fts_2, fts) + + res <- xchr[3:2, 2] + expect_equal(chromPeaks(res)[, "into"], + pks[pks[, "row"] == 2 & pks[, "column"] == 2, "into"]) + expect_equal(featureDefinitions(res)[, "rtmed"], + fts[fts$row == 2, "rtmed"]) + + expect_equal(unname(featureValues(res)[, 1]), + unname(chromPeaks(res)[, "into"])) +}) + +test_that("featureValues,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- as(od_chrs, "XChromatograms") + expect_error(featureValues(chrs)) + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + expect_error(featureValues(chrs)) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + + vls <- featureValues(chrs, value = "index") + expect_equal(colnames(vls), colnames(chrs)) + expect_equal(rownames(vls), rownames(featureDefinitions(chrs))) + exp_mat <- matrix(c(1, 5, 6, + 4, NA, 7, + 8, 10, 12, + 9, 11, 13), byrow = TRUE, ncol = 3, + dimnames = list(rownames(featureDefinitions(chrs)), + colnames(chrs))) + expect_equal(exp_mat, vls) + ## into. + vls <- featureValues(chrs, value = "into") + vls_exp <- matrix(chromPeaks(chrs)[exp_mat, "into"], ncol = 3, byrow = FALSE, + dimnames = dimnames(exp_mat)) + expect_equal(vls, vls_exp) + + vls <- featureValues(chrs, value = "into", missing = 13) + vls_exp[is.na(vls_exp)] <- 13 + expect_equal(vls, vls_exp) + + ## After subsetting/re-ordering. + chrs_sub <- chrs[2, c(3, 1, 2)] + vls_sub <- featureValues(chrs_sub, value = "into") + vls <- featureValues(chrs, value = "into") + expect_equal(colnames(vls_sub), colnames(chrs_sub)) + expect_equal(rownames(vls_sub), rownames(featureDefinitions(chrs_sub))) + expect_equal(vls_sub, vls[3:4, c(3, 1, 2)]) + + chrs_sub <- chrs[c(2, 1), 2] + vls_sub <- featureValues(chrs_sub, value = "into") + expect_equal(colnames(vls_sub), colnames(chrs_sub)) + expect_equal(rownames(vls_sub), rownames(featureDefinitions(chrs_sub))) + expect_equal(vls_sub, vls[c(3, 4, 1), 2, drop = FALSE]) +}) + +test_that("plotChromPeakDensity,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- as(od_chrs, "XChromatograms") + chrs <- findChromPeaks(chrs, param = CentWaveParam()) + prm <- PeakDensityParam(sampleGroups = c(1, 1, 1)) + chrs <- groupChromPeaks(chrs, param = prm) + expect_error(plotChromPeakDensity(chrs)) + frst <- chrs[1, ] + library(RColorBrewer) + plotChromPeakDensity(frst, peakBg = paste0(brewer.pal(7, "Set1"), 60), + peakPch = 16, peakCol = paste0(brewer.pal(7, "Set1"))) + plotChromPeakDensity(frst, peakBg = paste0(brewer.pal(7, "Set1"), 60), + peakPch = 16, peakCol = paste0(brewer.pal(7, "Set1")), + simulate = FALSE) + frst <- dropFeatureDefinitions(frst) + expect_error(plotChromPeakDensity(frst)) + plotChromPeakDensity(frst, param = prm) + scnd <- chrs[2, ] + plotChromPeakDensity(scnd) + plotChromPeakDensity(scnd[, c(1, 3)]) +}) + +test_that("dropFilledChromPeaks,XChromatogram and XChromatograms work", { + skip_on_os(os = "windows", arch = "i386") + + ## With filled-in data + mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) + rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) + ## group + xod_tmp <- groupChromPeaks( + xod_xgr, param = PeakDensityParam(sampleGroups = rep(1, 3), + minFraction = 0.25)) + xod_tmpf <- fillChromPeaks( + xod_tmp, param = FillChromPeaksParam(fixedRt = 30)) + expect_true(.hasFilledPeaks(xod_tmpf)) + xchr <- chromatogram(xod_tmpf, rt = rtr, mz = mzr) + expect_false(any(.hasFilledPeaks(xchr))) + ch <- dropFilledChromPeaks(xchr[1, 1]) + expect_equal(ch, xchr[1, 1]) + ch <- dropFilledChromPeaks(xchr[1, 2]) + expect_equal(ch, xchr[1, 2]) + res <- dropFilledChromPeaks(xchr) + expect_true(length(res@.processHistory) < length(xchr@.processHistory)) + res@.processHistory <- list() + xchr@.processHistory <- list() + expect_equal(res, xchr) + + xchrf <- chromatogram(xod_tmpf, rt = rtr, mz = mzr, filled = TRUE) + res <- hasFilledChromPeaks(xchrf) + expect_true(is.matrix(res)) + expect_true(nrow(res) == 2) + expect_true(ncol(res) == 3) + expect_true(all(res[1, ] == c(TRUE, FALSE, TRUE))) + expect_true(all(res[2, ] == FALSE)) + expect_equal(nrow(chromPeaks(xchrf)), 6) + expect_equal(chromPeakData(xchrf)$is_filled, c(TRUE, FALSE, TRUE, + FALSE, FALSE, FALSE)) + ch <- dropFilledChromPeaks(xchr[1, 1]) + expect_equal(ch, xchr[1, 1]) + ch <- dropFilledChromPeaks(xchr[1, 2]) + expect_equal(ch, xchrf[1, 2]) + res <- dropFilledChromPeaks(xchrf) + expect_true(all(hasFilledChromPeaks(res) == FALSE)) + expect_equal(nrow(chromPeaks(res)), 4) + expect_equal(chromPeakData(res)$is_filled, c(FALSE, FALSE, FALSE, FALSE)) + expect_true(length(res@.processHistory) < length(xchrf@.processHistory)) + expect_equal(chromPeaks(res), chromPeaks(xchr)) + expect_equal(featureDefinitions(res), featureDefinitions(xchr)) +}) + +test_that("refineChromPeaks,XChromatograms,MergeNeighboringPeaksParam works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- 305.1 + c(-0.01, 0.01) + chr <- chromatogram(filterFile(xod_x, 1), mz = mzr) + res <- refineChromPeaks(chr, MergeNeighboringPeaksParam()) + expect_equal(chromPeaks(res), chromPeaks(chr)) + expect_true(length(processHistory(res)) > length(processHistory(chr))) + + res <- refineChromPeaks(chr, MergeNeighboringPeaksParam(expandRt = 3)) + expect_true(nrow(chromPeaks(res)) < nrow(chromPeaks(chr))) + expect_true(sum(chromPeakData(res)$merged) == 1) + + ## With multiple files: + chr <- chromatogram(xod_x, mz = mzr) + res <- refineChromPeaks(chr, MergeNeighboringPeaksParam(expandRt = 5, + minProp = 0)) + expect_true(sum(chromPeakData(res)$merged) == 2) + expect_true(validObject(res)) + res <- refineChromPeaks(chr, MergeNeighboringPeaksParam(expandRt = 5)) + expect_true(sum(chromPeakData(res)$merged) == 1) + expect_true(validObject(res)) + + ## Doing peak detection from scratch + mzr <- 462.2 + c(-0.04, 0.04) + chr <- chromatogram(od_x, mz = mzr) + chr <- findChromPeaks(chr, CentWaveParam()) + res <- refineChromPeaks(chr, MergeNeighboringPeaksParam(minProp = 0.5, + expandRt = 20)) + expect_true(nrow(chromPeaks(res)) < nrow(chromPeaks(chr))) + expect_true(sum(chromPeakData(res)$merged) == 3) +}) + +test_that("filterColumnsIntensityAbove,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04)) + chrs <- chromatogram(xod_x, mz = mzr) + + res <- filterColumnsIntensityAbove(chrs) + expect_true(is(res, "XChromatograms")) + expect_equal(res, chrs) + + res <- filterColumnsIntensityAbove(chrs, threshold = 20000, value = "maxo") + expect_true(is(res, "XChromatograms")) + expect_equal(res[, 1], chrs[, 1]) + expect_equal(res[, 2], chrs[, 3]) + + res <- filterColumnsIntensityAbove(chrs, threshold = 20000, value = "maxo", + which = "all") + expect_equal(res[, 1], chrs[, 1]) + expect_true(ncol(res) == 1) +}) + +test_that("filterColumnsKeepTop,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + mzr <- rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04)) + chrs <- chromatogram(xod_x, mz = mzr) + + res <- filterColumnsKeepTop(chrs, n = 3) + expect_true(is(res, "XChromatograms")) + expect_equal(res, chrs) + + res <- filterColumnsKeepTop(chrs, n = 1) + expect_equal(res, chrs[, 3]) + + res <- filterColumnsKeepTop(chrs, n = 1, sortBy = "maxo") + expect_equal(res, chrs[, 3]) +}) + +test_that("filterChromPeaks,XChromatograms works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- chromatogram(xod_x, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + res <- filterChromPeaks(chrs, n = 2L) + expect_equal(nrow(chromPeaks(res[1, 1])), 2L) + expect_equal(nrow(chromPeaks(res[1, 2])), 0L) + expect_equal(nrow(chromPeaks(res[1, 3])), 2L) + + chrs <- chromatogram(xod_xg, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + res <- filterChromPeaks(chrs, n = 2L) + a <- featureValues(res) + b <- featureValues(chrs) + expect_equal(a, b[2:3, ]) +}) + +test_that("transformIntensity,XChromatogram works", { + skip_on_os(os = "windows", arch = "i386") + + chrs <- chromatogram(xod_x, mz = rbind(305.1 + c(-0.01, 0.01), + 462.2 + c(-0.04, 0.04))) + + res <- transformIntensity(chrs) + + expect_equal(intensity(res[1, 2]), intensity(chrs[1, 2])) + expect_equal(chromPeaks(res[1, 2])[, "into"], + chromPeaks(chrs[1, 2])[, "into"]) + expect_equal(chromPeaks(res[1, 2])[, "maxo"], + chromPeaks(chrs[1, 2])[, "maxo"]) + + res <- transformIntensity(chrs, log2) + expect_equal(intensity(res[1, 2]), log2(intensity(chrs[1, 2]))) + expect_equal(chromPeaks(res[1, 2])[, "into"], + log2(chromPeaks(chrs[1, 2])[, "into"])) + expect_equal(chromPeaks(res[1, 2])[, "maxo"], + log2(chromPeaks(chrs[1, 2])[, "maxo"])) +}) diff --git a/tests/testthat/test_methods-group-features.R b/tests/testthat/test_methods-group-features.R new file mode 100644 index 000000000..f8fd80a79 --- /dev/null +++ b/tests/testthat/test_methods-group-features.R @@ -0,0 +1,312 @@ +xodg <- xod_xgrg +library(MsFeatures) +xodgg <- groupFeatures(xodg, param = SimilarRtimeParam(4)) +xodgg <- groupFeatures(xodgg, param = AbundanceSimilarityParam(threshold = 0.3)) + + +test_that("featureGroups,featureGroups<-,XCMSnExp works", { + skip_on_os(os = "windows", arch = "i386") + + expect_error(featureGroups(xod_x), "Please run") + res <- featureGroups(xodg) + expect_true(all(is.na(res))) + tmp <- xodg + featureGroups(tmp) <- "a" + expect_true(all(featureGroups(tmp) == "a")) + + expect_error(featureGroups(xod_x) <- "a", "Please run") + expect_error(featureGroups(xodg) <- 1:2, "length") +}) + +test_that("SimilarRtimeParam works", { + skip_on_os(os = "windows", arch = "i386") + + prm <- SimilarRtimeParam(3) + + expect_error(groupFeatures(xod_x, prm), "No feature definitions") + expect_error(groupFeatures(xodg, prm, msLevel = 1:2), "single MS") + res <- groupFeatures(xodg, prm) + expect_true(any(colnames(featureDefinitions(res)) == "feature_group")) + expect_false(any(is.na(featureGroups(res)))) + expect_true(is.character(featureGroups(res))) + + res2 <- groupFeatures(xodg, + SimilarRtimeParam(3, groupFun = MsCoreUtils::group)) + expect_true(length(table(featureGroups(res2))) < + length(table(featureGroups(res)))) + + ## Different MS levels + tmp <- xodg + idx <- c(1:3, 5, 45, 47) + featureDefinitions(tmp)$ms_level[idx] <- 2 + res <- groupFeatures(tmp, prm) + expect_true(all(is.na(featureGroups(res))[idx])) + expect_false(any(is.na(featureGroups(res))[-idx])) + res <- groupFeatures(tmp, prm, msLevel = 2L) + expect_false(any(is.na(featureGroups(res))[idx])) + expect_true(all(is.na(featureGroups(res))[-idx])) + + ## Pre-defined groups + fgs <- rep("AB", nrow(featureDefinitions(xodg))) + fgs[idx] <- NA + tmp <- xodg + featureGroups(tmp) <- fgs + res <- groupFeatures(tmp, prm) + expect_true(all(is.na(featureGroups(res))[idx])) + expect_false(any(is.na(featureGroups(res))[-idx])) +}) + +test_that("AbundanceSimilarityParam works", { + skip_on_os(os = "windows", arch = "i386") + + prm <- AbundanceSimilarityParam(threshold = 0.5, value = "maxo") + expect_equal(prm@threshold, 0.5) + expect_equal(prm@dots, list(value = "maxo")) + + expect_error(AbundanceSimilarityParam(subset = "4"), "integer") + + expect_error(groupFeatures(xod_x, AbundanceSimilarityParam()), "feature") + expect_error( + groupFeatures(xodg, AbundanceSimilarityParam(subset = c(1, 4, 5))), + "should be between") + + res <- groupFeatures(xodg, AbundanceSimilarityParam()) + expect_true(any(colnames(featureDefinitions(res)) == "feature_group")) + expect_true(length(unique(featureDefinitions(res)$feature_group)) < + nrow(featureDefinitions(res))) + res_2 <- groupFeatures(xodg, AbundanceSimilarityParam(subset = c(2, 3))) + + plotFeatureGroups(res_2) + expect_error(plotFeatureGroups(res_2, featureGroups = "a"), "None of the") + expect_error(plotFeatureGroups(xodg), "None of the") + + ## With pre-defined grps. + tmp <- xodg + featureDefinitions(tmp)$feature_group <- "FG.2" + idx <- c(4, 12, 23, 46) + featureDefinitions(tmp)$ms_level[idx] <- 2 + + res <- groupFeatures(tmp, AbundanceSimilarityParam(), msLevel = 1) + expect_true(all(featureGroups(res)[idx] == "FG.2")) + expect_true(all(featureGroups(res)[-idx] != "FG.2")) + res_2 <- groupFeatures(tmp, AbundanceSimilarityParam(), msLevel = 2) + expect_true(all(featureGroups(res_2)[-idx] == "FG.2")) + expect_true(all(featureGroups(res_2)[idx] != "FG.2")) + + tmp <- quantify(xodg, filled = TRUE, method = "sum", value = "maxo") + res <- groupFeatures(xodg, AbundanceSimilarityParam(), filled = TRUE, + method = "sum", value = "maxo") + res_2 <- groupFeatures(tmp, AbundanceSimilarityParam()) + expect_equal(featureGroups(res), featureGroups(res_2)) +}) + +## test_that("featureGroupPseudoSpectrum works", { + ## skip_on_os(os = "windows", arch = "i386") + +## fvals <- featureValues(xodgg, method = "maxint", value = "maxo") +## ## 3 feature +## ft_idx <- which(featureGroups(xodgg) == "FG.010.001") +## res <- featureGroupPseudoSpectrum("FG.010.001", xodgg, fvals = fvals, +## intensityFun = median) +## expect_true(is(res, "Spectrum1")) +## expect_true(peaksCount(res) == 3) +## expect_true(validObject(res)) +## expect_equal(intensity(res), apply(fvals[ft_idx, ], MARGIN = 1, +## median, na.rm = TRUE)) +## expect_equal(mz(res), featureDefinitions(xodgg)$mzmed[ft_idx]) +## expect_equal(rtime(res), median(featureDefinitions(xodgg)$rtmed[ft_idx])) + +## ## 1 feature +## res <- featureGroupPseudoSpectrum("FG.010.002", xodgg, fvals = fvals, +## intensityFun = median) +## ft_idx <- which(featureGroups(xodgg) == "FG.010.002") +## expect_true(is(res, "Spectrum1")) +## expect_true(peaksCount(res) == 1) +## expect_true(validObject(res)) +## expect_equal(unname(intensity(res)), +## unname(median(fvals[ft_idx, ], na.rm = TRUE))) +## expect_equal(mz(res), featureDefinitions(xodgg)$mzmed[ft_idx]) +## expect_equal(rtime(res), median(featureDefinitions(xodgg)$rtmed[ft_idx])) + +## expect_error( +## featureGroupPseudoSpectrum("FG.009.1", xodgg, fvals = fvals, n = 12), +## "has to be an integer") +## }) + +## test_that("featureGroupFullScan works", { + ## skip_on_os(os = "windows", arch = "i386") + +## fvals <- featureValues(xodgg, method = "maxint", value = "maxo") +## ## 3 feature +## res <- featureGroupFullScan("FG.010.001", xodgg, fvals = fvals) +## ft_idx <- which(featureGroups(xodgg) == "FG.010.001") +## expect_true(is(res, "Spectrum1")) +## expect_true( +## abs(rtime(res) - +## median(featureDefinitions(xodgg)[ft_idx, "rtmed"])) < 0.1) +## expect_true(all(featureDefinitions(xodgg)[ft_idx, "mzmed"] %in% mz(res))) + +## ## 1 feature +## res <- featureGroupFullScan("FG.010.002", xodgg, fvals = fvals) +## ft_idx <- which(featureGroups(xodgg) == "FG.010.002") +## expect_true(is(res, "Spectrum1")) +## expect_true( +## abs(rtime(res) - +## median(featureDefinitions(xodgg)[ft_idx, "rtmed"])) < 0.8) +## expect_true(all(featureDefinitions(xodgg)[ft_idx, "mzmed"] %in% mz(res))) +## }) + +## test_that("featureGroupSpectra works", { + ## skip_on_os(os = "windows", arch = "i386") + +## ## Errors +## expect_error(featureGroupSpectra(xodgg, subset = 1:5), "an integer") +## expect_error(featureGroupSpectra(xod), "feature definitions") +## expect_error(featureGroupSpectra(xodgg, featureGroup = c("a")), "all feature") + +## ## Get all of them +## res_all <- featureGroupSpectra(xodgg) +## expect_true(is(res_all, "MSpectra")) +## expect_equal(mcols(res_all)$feature_group, unique(featureGroups(xodgg))) +## expect_equal(unname(peaksCount(res_all)), +## unname(lengths(mcols(res_all)$feature_id))) + +## ## Get them in a subset +## res_sub <- featureGroupSpectra(xodgg, subset = c(1, 3)) +## expect_true(sum(is.na(rtime(res_sub))) == 59) + +## ## Get only selected ones +## res <- featureGroupSpectra( +## xodgg, featureGroup = c("FG.010.001", "FG.010.002")) +## expect_true(length(res) == 2) +## expect_equal(mcols(res)$feature_group, c("FG.010.001", "FG.010.002")) +## idx <- which(mcols(res_all)$feature_group %in% c("FG.010.001", "FG.010.002")) +## expect_equal(res[[1]], res_all[[idx[1]]]) +## expect_equal(res[[2]], res_all[[idx[2]]]) +## }) + +test_that(".group_eic_similarity works", { + skip_on_os(os = "windows", arch = "i386") + + set.seed(123) + chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) + chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), + intensity = c(80, 50, 20, 10, 9, 4, 3, 4, 1, 3)) + chr3 <- Chromatogram(rtime = 3:9 + rnorm(7, sd = 0.3), + intensity = c(53, 80, 130, 15, 5, 3, 2)) + chrs <- MChromatograms(list(chr1, chr2, chr3)) + + res <- .group_eic_similarity(chrs, ALIGNFUNARGS = list(method = "closest")) + expect_true(is.factor(res)) + expect_equal(res, factor(c(1L, 2L, 1L))) + res <- .group_eic_similarity( + chrs, ALIGNFUNARGS = list(method = "closest", tolerance = 0)) + expect_equal(res, factor(c(1L, 2L, 3L))) + + chrs <- MChromatograms(list(chr1, chr2, chr3, chr1, chr2, chr3), ncol = 2) + res <- .group_eic_similarity(chrs, aggregationFun = mean, + ALIGNFUNARGS = list(method = "closest")) + expect_equal(res, factor(c(1L, 2L, 1L))) + res <- .group_eic_similarity(chrs, aggregationFun = max, + ALIGNFUNARGS = list(method = "closest")) + expect_equal(res, factor(c(1L, 2L, 1L))) + res <- .group_eic_similarity(chrs, aggregationFun = min, + ALIGNFUNARGS = list(method = "closest")) + expect_equal(res, factor(c(1L, 2L, 1L))) + + chrs <- MChromatograms(list(chr1, chr2, chr3, chr1, chr2, chr3, + chr2, chr3, chr1), ncol = 3) + res <- .group_eic_similarity(chrs, ALIGNFUNARGS = list(method = "closest")) + expect_true(is.factor(res)) + expect_equal(res, factor(c(1L, 2L, 3L))) + + res <- .group_eic_similarity(chrs, aggregationFun = max, + threshold = 0.1, + ALIGNFUNARGS = list(method = "closest")) + expect_true(is.factor(res)) + expect_equal(res, factor(c(1L, 1L, 1L))) + + res <- .group_eic_similarity(chrs, aggregationFun = median, + ALIGNFUNARGS = list(method = "closest")) + expect_true(is.factor(res)) + expect_equal(res, factor(c(1L, 2L, 1L))) +}) + +test_that("EicSimilarityParam works", { + skip_on_os(os = "windows", arch = "i386") + + res <- EicSimilarityParam() + expect_equal(res@threshold, 0.9) + expect_equal(res@ALIGNFUNARGS, list(tolerance = 0, method = "closest")) + expect_equal(res@ALIGNFUN, alignRt) + expect_equal(res@FUN, stats::cor) + expect_equal(res@FUNARGS, list(use = "pairwise.complete.obs")) + expect_equal(res@n, 1L) + expect_equal(res@onlyPeak, TRUE) + expect_equal(res@dots, list()) + + res <- EicSimilarityParam(FUN = dist) + expect_equal(res@FUN, dist) + res <- EicSimilarityParam(ALIGNFUN = sum) + expect_equal(res@ALIGNFUN, sum) + res <- EicSimilarityParam(groupFun = max) + expect_equal(res@groupFun, max) + res <- EicSimilarityParam(threshold = 0, n = 10, onlyPeak = FALSE) + expect_equal(res@threshold, 0) + expect_equal(res@n, 10) + expect_equal(res@onlyPeak, FALSE) + res <- EicSimilarityParam(ALIGNFUNARGS = list(a = 4)) + expect_equal(res@ALIGNFUNARGS, list(a = 4)) + res <- EicSimilarityParam(FUNARGS = list(b = 5)) + expect_equal(res@FUNARGS, list(b = 5)) + res <- EicSimilarityParam(someother = 5) + expect_equal(res@dots, list(someother = 5)) + + expect_error(EicSimilarityParam(threshold = c(4, 3)), "positive numeric") + expect_error(EicSimilarityParam(n = 1:2), "positive numeric") + expect_error(EicSimilarityParam(onlyPeak = c(TRUE, FALSE)), "length 1") + expect_error(EicSimilarityParam(value = "other")) + +}) + +test_that("groupFeatures,EicSimilarityParam works", { + skip_on_os(os = "windows", arch = "i386") + + ## n bigger than 3 + expect_error(groupFeatures(xodg, param = EicSimilarityParam(n = 5)), + "smaller than or") + ## no feature definitions + expect_error(groupFeatures(xod_x, param = EicSimilarityParam()), "No") + ## MS level length > 1 + expect_error( + groupFeatures(xodg, param = EicSimilarityParam(), msLevel = 1:2), + "single MS level") + + tmp <- xodg + res_all <- groupFeatures(tmp, param = EicSimilarityParam()) + expect_true(is.character(featureGroups(res_all))) + + idx <- c(3, 12, 13, 34, 39, 40) + tmp <- xodg + featureDefinitions(tmp)$feature_group <- NA + featureDefinitions(tmp)$feature_group[idx] <- "FG" + res <- groupFeatures(tmp, param = EicSimilarityParam()) + expect_true(all(is.na(featureGroups(res)[-idx]))) + expect_true(length(unique(featureGroups(res))) < length(idx)) + a <- featureGroups(res)[idx] + b <- featureGroups(res_all)[idx] + expect_equal(as.integer(factor(a, levels = unique(a))), + as.integer(factor(b, levels = unique(b)))) + + featureDefinitions(tmp)$feature_group <- NULL + featureDefinitions(tmp)$ms_level[idx] <- 2 + + res_2 <- groupFeatures(tmp, param = EicSimilarityParam(), msLevel = 2) + expect_equal(featureDefinitions(res)$feature_group, + featureDefinitions(res_2)$feature_group) + + res <- groupFeatures(xodgg, param = EicSimilarityParam(threshold = 0.7)) + expect_true(length(table(featureGroups(xodgg))) < + length(table(featureGroups(res)))) +}) diff --git a/tests/testthat/test_old_methods-IO.R b/tests/testthat/test_old_methods-IO.R deleted file mode 100644 index 3bda5de36..000000000 --- a/tests/testthat/test_old_methods-IO.R +++ /dev/null @@ -1,57 +0,0 @@ -test_that("write.cdf,xcmsRaw works", { - file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - xraw <- xcmsRaw(file, profstep = 0) - cdffile <- paste(tempdir(), "ko15.cdf", sep="/") - write.cdf(xraw, cdffile) - xrawCopy <- xcmsRaw(cdffile) - expect_true(all(xraw@env$mz == xrawCopy@env$mz)) - expect_true(all(xraw@env$intensity == xrawCopy@env$intensity)) -}) - -test_that("write.mzdata,xcmsRaw works", { - file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - xraw <- xcmsRaw(file, profstep = 0) - mzdataFile <- paste(tempdir(), "ko15.mzData", sep="/") - write.mzdata(xraw, mzdataFile) - xrawCopy <- xcmsRaw(mzdataFile) - expect_true(all(xraw@env$mz == xrawCopy@env$mz)) - expect_true(all(xraw@env$intensity == xrawCopy@env$intensity)) -}) - -test_that("write.mzdata,xcmsRaw works with MS2 data", { - file <- system.file('microtofq/MSMSpos20_6.mzML', package = "msdata") - xraw <- xcmsRaw(file, includeMSn=TRUE, profstep = 0) - mzdataFile <- paste(tempdir(), "MSMSpos20_6.mzData", sep="/") - write.mzdata(xraw, mzdataFile) - xrawCopy <- xcmsRaw(mzdataFile) - expect_true(all(xraw@env$intensity == xrawCopy@env$intensity)) - expect_true(all(xraw@env$msnIntensity == xrawCopy@env$msnIntensity)) -}) - -test_that("write.mzdata,xcmsRaw works with MSn data", { - file <- system.file('threonine/threonine_i2_e35_pH_tree.mzXML', package = "msdata") - xraw <- xcmsRaw(file, includeMSn=TRUE, profstep = 0) - mzdataFile <- paste(tempdir(), "threonine_i2_e35_pH_tree.mzData", sep="/") - write.mzdata(xraw, mzdataFile) - xrawCopy <- xcmsRaw(mzdataFile) - expect_true(all(xraw@env$intensity == xrawCopy@env$intensity)) - expect_true(all(xraw@env$msnIntensity == xrawCopy@env$msnIntensity)) -}) - -test_that("write.mzdata,xcmsRaw writes polarity", { - file <- system.file('microtofq/MM14.mzdata', package = "msdata") - xraw <- xcmsRaw(file, profstep = 0) - oldpolarity <- xraw@polarity - mzdataFile <- paste(tempdir(), "MM14.mzdata", sep="/") - write.mzdata(xraw, mzdataFile) - xrawCopy <- xcmsRaw(mzdataFile) - expect_true(all(xraw@polarity == xrawCopy@polarity)) -}) - -test_that("write.mzQuantML,xcmsSet works", { - xsg <- group(faahko) - mzqFile <- paste(tempdir(), "faahKO.mzq.xml", sep="/") - expect_warning(write.mzQuantML(xsg, mzqFile)) - v <- verify.mzQuantML(filename=mzqFile) - expect_true(v$status == "0") -}) diff --git a/tests/testthat/test_old_xcmsSource.R b/tests/testthat/test_old_xcmsSource.R deleted file mode 100644 index 8cce23e10..000000000 --- a/tests/testthat/test_old_xcmsSource.R +++ /dev/null @@ -1,46 +0,0 @@ -test_that("xcmsSource works", { - mz_file <- system.file("microtofq/MM8.mzML", package = "msdata") - src <- xcms:::xcmsSource(mz_file) - expect_true(is(src, "xcmsFileSource")) - tmp <- loadRaw(src) - expect_equal(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", - "mz", "intensity", "polarity")) - - cdf_file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") - src <- xcms:::xcmsSource(cdf_file) - expect_true(is(src, "xcmsFileSource")) - tmp <- loadRaw(src) - expect_equal(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", - "mz", "intensity", "polarity")) - - ## MSn: - mzdatapath <- system.file("iontrap", package = "msdata") - mzdatafiles <- list.files(mzdatapath, pattern="extracted.mzData", - recursive = TRUE, full.names = TRUE) - src <- xcms:::xcmsSource(mzdatafiles[1]) - tmp <- loadRaw(src, includeMSn = TRUE) - - ## OLD code: - rid <- mzR:::rampOpen(mzdatafiles[1]) - rawdata <- mzR:::rampRawData(rid) - rawdata$MSn <- mzR:::rampRawDataMSn(rid) - mzR:::rampClose(rid) - rm(rid) - ## Ramp does not read polarity! - tmp$polarity <- rawdata$polarity - expect_equal(rawdata, tmp) - - ## Next example: - msnfile <- system.file("microtofq/MSMSpos20_6.mzML", package = "msdata") - src <- xcms:::xcmsSource(msnfile) - tmp <- loadRaw(src, includeMSn = TRUE) - ## expect_true(all(tmp$polarity == 1)) - ## OLD code: - rid <- mzR:::rampOpen(msnfile) - rawdata <- mzR:::rampRawData(rid) - rawdata$MSn <- mzR:::rampRawDataMSn(rid) - mzR:::rampClose(rid) - rm(rid) - rawdata$polarity <- tmp$polarity - expect_equal(rawdata, tmp) -}) diff --git a/vignettes/LC-MS-feature-grouping.Rmd b/vignettes/LC-MS-feature-grouping.Rmd new file mode 100644 index 000000000..6a32d9ddd --- /dev/null +++ b/vignettes/LC-MS-feature-grouping.Rmd @@ -0,0 +1,436 @@ +--- +title: "Compounding (grouping) of LC-MS features" +package: xcms +output: + BiocStyle::html_document: + toc_float: true +vignette: > + %\VignetteIndexEntry{LC-MS feature grouping} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + %\VignetteDepends{xcms,msdata,BiocStyle,faahKO,pheatmap,MsFeatures} + %\VignettePackage{xcms} + %\VignetteKeywords{mass spectrometry, metabolomics} +--- + +```{r biocstyle, echo = FALSE, results = "asis"} +BiocStyle::markdown() +knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) +``` + +**Package**: `r Biocpkg("xcms")`
+**Authors**: Johannes Rainer
+**Modified**: `r file.info("LC-MS-feature-grouping.Rmd")$mtime`
+**Compiled**: `r date()` + +```{r init, results = "hide", echo = FALSE} +## Silently loading all packages +library(BiocStyle) +library(xcms) +library(MsFeatures) +register(SerialParam()) + +``` + +# Introduction + +In a typical LC-MS-based metabolomics experiment compounds eluting from the +chromatography are first ionized before being measured by mass spectrometry +(MS). During the ionization different (multiple) ions can be generated from the +same compound which all will be measured by MS. In general, the resulting data +is then pre-processed to identify chromatographic peaks in the data and to group +these across samples in the correspondence analysis. The result are distinct +LC-MS features, characterized by their specific m/z and retention time +range. Different ions generated during ionization will be detected as different +features. *Compounding* aims now at grouping such features presumably +representing signal from the same originating compound to reduce data set +complexity (and to aid in subsequent annotation steps). General MS feature +grouping functionality if defined by the `r Biocpkg("MsFeatures")` package with +additional functionality being implemented in the `xcms` package to enable the +compounding of LC-MS data. + +This document provides a simple compounding workflow using `xcms`. Note that the +present functionality does not (yet) *aggregate* or combine the actual features +per values, but does only define the feature groups (one per compound). + + +# Compounding of LC-MS data + +We demonstrate the compounding (feature grouping) functionality on the simple +toy data set used also in the `r Biocpkg("xcms")` package and provided through +the `faahKO` package. This data set consists of samples from 4 mice with +knock-out of the fatty acid amide hydrolase (FAAH) and 4 wild type +mice. Pre-processing of this data set is described in detail in the *xcms* +vignette of the `xcms` package. Below we load all required packages and the +result from this pre-processing updating also the location of the respective raw +data files on the current machine. + +```{r load-data} +library(xcms) +library(faahKO) +library(MsFeatures) + +data("xdata") +## Update the path to the files for the local system +dirname(xdata) <- c(rep(system.file("cdf", "KO", package = "faahKO"), 4), + rep(system.file("cdf", "WT", package = "faahKO"), 4)) +``` + +Before performing the feature grouping we inspect the result object. With +`featureDefinitions` we can extract the results from the correspondence +analysis. + +```{r fdev} +featureDefinitions(xdata) +``` + +Each row in this data frame represents the definition of one feature, with its +average and range of m/z and retention time. Column `"peakidx"` provides the +index of each chromatographic peak which is assigned to the feature in the +`chromPeaks` matrix of the result object. The `featureValues` function allows to +extract *feature values*, i.e. a matrix with feature abundances, one row per +feature and columns representing the samples of the present data set. + +Below we extract the feature values with and without *filled-in* peak +data. Without the gap-filled data only abundances from **detected** +chromatographic peaks are reported. In the gap-filled data, for samples in which +no chromatographic peak for a feature was detected, all signal from the m/z - +retention time range defined based on the detected chromatographic peaks was +integrated. + +```{r filled-not-filled} +head(featureValues(xdata, filled = FALSE)) +head(featureValues(xdata, filled = TRUE)) +``` + +In total `r nrow(featureDefinitions(xdata))` features have been defined in the +present data set, many of which most likely represent signal from different ions +(adducts or isotopes) of the same compound. The aim of the grouping functions of +are now to define which features most likely come from the same original +compound. The feature grouping functions base on the following +assumptions/properties of LC-MS data: + +- Features (ions) of the same compound should have similar retention time. +- The abundance of features (ions) of the same compound should have a similar + pattern across samples, i.e. if a compound is highly concentrated in one + sample and low in another, all ions from it should follow the same pattern. +- The peak shape of extracted ion chromatograms (EIC) of features of the same + compound should be similar as it should follow the elution pattern of the + original compound from the LC. + +The main method to perform the feature grouping is called `groupFeatures` which +takes an `XCMSnExp` object (result object from the `xcms` pre-processing) as +input as well as a parameter object to chose the grouping algorithm and specify +its settings. `xcms` provides and supports the following grouping approaches: + +- `SimilarRtimeParam`: perform an initial grouping based on similar retention + time. +- `AbundanceSimilarityParam`: perform a feature grouping based on correlation + of feature abundances (values) across samples. +- `EicSimilarityParam`: perform a feature grouping based on correlation of + EICs. + +Calling `groupFeatures` on an `xcms` result object will perform a feature +grouping assigning each feature in the data set to a *feature group*. These +feature groups are stored as an additional column called `"feature_group"` in +the `featureDefinition` data frame of the result object and can be accessed with +the `featureGroups` function. Any subsequent `groupFeature` call will +*sub-group* (refine) the identified feature groups further. It is thus possible +to use a single grouping approach, or to combine multiple of them to generate +the desired feature grouping. While the individual feature grouping algorithms +can be called in any order, it is advisable to use the `EicSimilarityParam` as +last refinement step, because it is the computationally most expensive one, +especially if applied to a result object without any pre-defined feature groups +or if the feature groups are very large. In the subsequent sections we will +apply the various feature grouping approaches subsequently. + +Note also that we perform here a grouping of all defined features, but it would +also be possible to *just* group a subset of interesting features (e.g. features +found significant by a statistical analysis of the data set). This is described +in the last section of this vignette. + + +## Grouping of features by similar retention time + +The most intuitive and simple way to group features is based on their retention +time. Before we perform this initial grouping we evaluate retention times and +m/z of all features in the present data set. + +```{r feature-rt-mz-plot, fig.width = 8, fig.height = 6, fig.cap = "Plot of retention times and m/z for all features in the data set."} +plot(featureDefinitions(xdata)$rtmed, featureDefinitions(xdata)$mzmed, + xlab = "retention time", ylab = "m/z", main = "features", + col = "#00000080") +grid() +``` + +Several features with about the same retention time (but different m/z) can be +seen, especially at the beginning of the LC. We thus below group features within +a retention time window of 10 seconds into *feature groups*. + +```{r} +xdata <- groupFeatures(xdata, param = SimilarRtimeParam(10)) +``` + +The results from the feature grouping can be accessed with the `featureGroups` +function. Below we determine the size of each of these feature groups (i.e. how +many features are grouped together). + +```{r} +table(featureGroups(xdata)) +``` + +In addition we visualize these feature groups with the `plotFeatureGroups` +function which shows all features in the m/z - retention time space with grouped +features being connected with a line. + +```{r feature-groups-rtime-plot, fig.width = 8, fig.height = 6, fig.cap = "Feature groups defined with a rt window of 10 seconds"} +plotFeatureGroups(xdata) +grid() +``` + +Let's assume we don't agree with this feature grouping, also knowing that there +were quite large shifts in retention times between runs. We thus re-perform the +feature grouping based on similar retention time with a larger rt window. Prior +to the `groupFeatures` call we have however to drop the previously defined +feature groups as otherwise these would be simply *refined* (i.e. further +subgrouped). + +```{r repeat} +## Remove previous feature grouping results to repeat the rtime-based +## feature grouping with different setting +featureDefinitions(xdata)$feature_group <- NULL + +## Repeat the grouping +xdata <- groupFeatures(xdata, SimilarRtimeParam(20)) +table(featureGroups(xdata)) +``` + +```{r feature-groups-rtime-plot2, fig.width = 8, fig.height = 6, fig.cap = "Feature groups defined with a rt window of 20 seconds"} +plotFeatureGroups(xdata) +grid() +``` + +Grouping by similar retention time grouped the in total +`r nrow(featureDefinitions(xdata))` features into +`r length(unique(featureGroups(xdata)))` feature groups. + + +## Grouping of features by abundance correlation across samples + +Assuming we are OK with the *crude* initial feature grouping from the previous +section, we can next *refine* the feature groups considering also the feature +abundances across samples. We can use the `groupFeatures` method with an +`AbundanceSimilarityParam` object. This approach performs a pairwise +correlation between the feature values (abundances; across samples) between all +features of a predefined feature group (such as defined in the previous +section). Features that have a correlation `>= threshold` are grouped +together. Feature grouping based on this approach works best for features with a +higher variability in their concentration across samples. Parameter `subset` +allows to restrict the analysis to a subset of samples and allows thus to +e.g. exclude QC sample pools from this correlation as these could artificially +increase the correlation. Other parameters are passed directly to the internal +`featureValues` call that extracts the feature values on which the correlation +should be performed. + +Before performing the grouping we could also evaluate the correlation of +features based on their (log2 transformed) abundances across samples with a +heatmap. + +```{r abundance-correlation-heatmap, fig.cap = "Correlation of features based on feature abundances.", fig.width = 6, fig.height = 16} +library(pheatmap) +fvals <- log2(featureValues(xdata, filled = TRUE)) + +cormat <- cor(t(fvals), use = "pairwise.complete.obs") +ann <- data.frame(fgroup = featureGroups(xdata)) +rownames(ann) <- rownames(cormat) + +res <- pheatmap(cormat, annotation_row = ann, cluster_rows = TRUE, + cluster_cols = TRUE) +``` + +Some large correlations can be observed for several groups of features, but many +of them are not within the same *feature group* that were defined in the +previous section (i.e. are not eluting at the same time). + +Below we use the `groupFeatures` with the `AbundanceSimilarityParam` to group +features with a correlation higher than 0.7 including both detected and +filled-in signal. Whether filled-in or only detected signal should be used in +the correlation analysis should be evaluated from data set to data set. By +specifying `transform = log2` we tell the function to log2 transform the +abundance prior to the correlation analysis. See the help page for +`groupFeatures` with `AbundanceSimilarityParam` in the `xcms` package for +details and options. + +```{r abundance-correlation} +xdata <- groupFeatures(xdata, AbundanceSimilarityParam(threshold = 0.7, + transform = log2), + filled = TRUE) +table(featureGroups(xdata)) +``` + +Many of the larger retention time-based feature groups have been splitted into +two or more sub-groups based on the correlation of their feature abundances. We +evaluate this for one specific feature group `"FG.040"` by plotting their +pairwise correlation. + +```{r abundance-correlation-fg040, fig.width = 8, fig.height = 8, fig.cap = "Pairwise correlation plot for all features initially grouped into the feature group FG.040."} +fts <- grep("FG.040", featureGroups(xdata)) +pairs(t(fvals[fts, ]), gap = 0.1, main = "FG.040") +``` + +Indeed, correlations can be seen only between some of the features in this +retention time feature group, e.g. between *FT117* and *FT120* and between +*FT195* and *FT200*. Note however that this abundance correlation suffers from +relatively few samples (8 in total), and a relatively small variance in +abundances across these samples. + +After feature grouping by abundance correlation, the +`r nrow(featureDefinitions(xdata))` features have been grouped into +`r length(unique(featureGroups(xdata)))` feature groups. + + +## Grouping of features by similarity of their EICs + +The chromatographic peak shape of an ion of a compound should be highly similar +to the elution pattern of this compound. Thus, features from the same compound +are assumed to have similar peak shapes of their EICs **within the same +sample**. A grouping of features based on similarity of their EICs can be +performed with the `groupFeatures` and the `EicSimilarityParam` object. It is +advisable to perform the peak shape correlation only on a subset of samples +(because peak shape correlation is computationally intense and because +chromatographic peaks of low intensity features are notoriously noisy). The +`EicSimilarityParam` approach has thus the parameter `n` which allows to select +the number of top samples (ordered by total intensity of feature abundances per +feature group) on which the correlation should be performed. With an value of `n += 3`, the 3 samples with the highest signal for all features in that group will +be first identified for each feature group and then a pairwise similarity +calculation will be performed within each of these samples. The resulting +similarity score from these 3 samples will then be aggregated into a single +score by taking the 75% quantile across the 3 samples. This value is then +subsequently compared with the cut-off for similarity (parameter `threshold`) +and features with a score `>= threshold` are grouped into the same feature +group. + +Below we group the features based on similarity of their EICs in the two samples +with the highest total signal for the respective feature groups. By default, a +Pearson correlation coefficient is used as similarity score but any +similarity/distance metric function could be used instead (parameter `FUN` of +the `EicSimilarityParam` - see the respective help page `?EicSimilarityParam` +for details and options). We define as a threshold a correlation coefficient of +0.7. + +```{r correlate-eic, message = FALSE} +xdata <- groupFeatures(xdata, EicSimilarityParam(threshold = 0.7, n = 2)) +``` + +This is the most computationally intense approach since it involves also loading +the raw MS data to extract the ion chromatograms for each feature. The results +of the grouping are shown below. + +```{r correlate-eic-result} +table(featureGroups(xdata)) +``` + +In most cases, pre-defined feature groups (by the abundance correlation) were +not further subdivided. Below we evaluate some of the feature groups, starting +with *FG.008.001* which was split into two different feature groups based on EIC +correlation. We first extract the EICs for all features from this initial +feature group. With `n = 1` we specify to extract the EIC only from the sample +with the highest intensity. + +```{r} +fts <- grep("FG.008.001", featureGroups(xdata)) +eics <- featureChromatograms(xdata, features = fts, + filled = TRUE, n = 1) +``` + +Next we plot the EICs using a different color for each of the subgroups. With +`peakType = "none"` we disable the highlighting of the detected chromatographic +peaks. + +```{r example-1-eic, fig.width = 8, fig.height = 6, fig.cap = "EICs of features from feature group FG.008.001 in the same sample. Shown are the actual intensities (left) and intensities normalized to 1 (right). Features being part of the same feature group after grouping by EIC similarity are shown in the same color."} +cols <- c("#ff000080", "#00ff0080") +names(cols) <- unique(featureGroups(xdata)[fts]) + +par(mfrow = c(1, 2)) +plotChromatogramsOverlay(eics, col = cols[featureGroups(xdata)[fts]], + lwd = 2, peakType = "none") +plotChromatogramsOverlay(normalize(eics), + col = cols[featureGroups(xdata)[fts]], + lwd = 2, peakType = "none") +``` + +One of the features within the original feature group was separated from the +other two because of a low similarity of their EICs. In fact, the feature's EIC +is shifted in retention time dimension and can thus not represent the signal +from the same compound. + +We evaluate next the sub-grouping in another feature group. + +```{r} +fts <- grep("FG.068.001", featureGroups(xdata)) +eics <- featureChromatograms(xdata, features = fts, + filled = TRUE, n = 1) +``` + +Next we plot the EICs using a different color for each of the subgroups. + +```{r example-2-eic, fig.width = 8, fig.height = 6, fig.cap = "EICs for features from feature group FG.068.001 in the same sample. Shown are the actual intensities (left) and intensities normalized to 1 (right). Features being part of the same feature group after grouping by EIC similarity are shown in the same color."} +cols <- c("#ff000080", "#00ff0080") +names(cols) <- unique(featureGroups(xdata)[fts]) + +par(mfrow = c(1, 2)) +plotChromatogramsOverlay(eics, col = cols[featureGroups(xdata)[fts]], + lwd = 2, peakType = "none") +plotChromatogramsOverlay(normalize(eics), + col = cols[featureGroups(xdata)[fts]], + lwd = 2, peakType = "none") + +``` + +Based on the EIC correlation, the initial feature group *FG.068.001* was grouped +into two separate sub-groups. + +The grouping based on EIC correlation on the pre-defined feature groups from the +previous sections grouped the `r nrow(featureDefinitions(xdata))` features into +`r length(unique(featureGroups(xdata)))` feature groups. + +## Grouping of subsets of features + +In the previous sections we were always considering all features from the data +set, but sometimes it could be desirable to just group a pre-defined set of +features, for example features found to be of particular interest in a certain +experiment (e.g. significant features). This can be easily achieved by assigning +the features of interest to a initial feature group, using `NA` as group ID +for all other features. + +To illustrate this we *reset* all feature groups by setting them to `NA` and +assign our features of interest (in this example just 30 randomly selected +features) to an initial feature group `"FG"`. + +```{r reset-feature-groups} +featureDefinitions(xdata)$feature_group <- NA_character_ + +set.seed(123) +fts_idx <- sample(1:nrow(featureDefinitions(xdata)), 30) +featureDefinitions(xdata)$feature_group[fts_idx] <- "FG" +``` + +Any call to `groupFeatures` would now simply sub-group this set of 30 +features. Any feature which has an `NA` in the `"feature_group"` column will be +ignored. + +```{r rtime-grouping} +xdata <- groupFeatures(xdata, SimilarRtimeParam(diffRt = 20)) +xdata <- groupFeatures(xdata, AbundanceSimilarityParam(threshold = 0.7)) +table(featureGroups(xdata)) +``` + + +# Session information + +```{r sessionInfo} +sessionInfo() +``` + +# References diff --git a/vignettes/new_functionality.Rmd b/vignettes/new_functionality.Rmd-notrun similarity index 97% rename from vignettes/new_functionality.Rmd rename to vignettes/new_functionality.Rmd-notrun index a989cfdc5..e5bb1138b 100644 --- a/vignettes/new_functionality.Rmd +++ b/vignettes/new_functionality.Rmd-notrun @@ -213,7 +213,7 @@ and the linear imputation approach that was defined in the `profBinLinBase` method. The latter performs linear interpolation only considering a certain neighborhood of missing values otherwise replacing the `NA` with a base value. -```{r binning-imputation-example, message = FALSE, fig.width = 10, fig.height = 7, fig.cap = 'Binning and missing value imputation results. Black points represent the input values, red the results from the binning and blue and green the results from the imputation (with method lin and linbase, respectively).' } +```{r binning-imputation-example, fig = TRUE, message = FALSE, fig.width = 10, fig.height = 7, fig.cap = 'Binning and missing value imputation results. Black points represent the input values, red the results from the binning and blue and green the results from the imputation (with method lin and linbase, respectively).' } ## Plot the actual data values. plot(X, Y, pch = 16, ylim = c(0, max(Y))) ## Visualizing the bins @@ -339,7 +339,7 @@ from the `profBinLin` method. The bin values for the first and 4th bin are clear wrong. The green colored points and lines represent the results from the `binYonX` and `imputeLinInterpol` functions (showing the correct binning and interpolation). -```{r profBinLin-problems, message = FALSE, fig.align = 'center', fig.width=10, fig.height = 7, fig.cap = "Illustration of the two bugs in profBinLin. The input values are represented by black points, grey vertical lines indicate the bins. The results from binning and interpolation with profBinLin are shown in blue and those from binYonX in combination with imputeLinInterpol in green." } +```{r profBinLin-problems, fig = TRUE, message = FALSE, fig.align = 'center', fig.width=10, fig.height = 7, fig.cap = "Illustration of the two bugs in profBinLin. The input values are represented by black points, grey vertical lines indicate the bins. The results from binning and interpolation with profBinLin are shown in blue and those from binYonX in combination with imputeLinInterpol in green."} plot(x = X, y = Y, pch = 16, ylim = c(0, max(Y, na.rm = TRUE)), xlim = c(0, 12)) ## Plot the breaks diff --git a/vignettes/new_functionality.org b/vignettes/new_functionality.org deleted file mode 100644 index 057cb3fa3..000000000 --- a/vignettes/new_functionality.org +++ /dev/null @@ -1,1221 +0,0 @@ -#+TITLE: New and modified functionality in xcms -#+AUTHOR: Johannes Rainer -#+EMAIL: johannes.rainer@eurac.edu -#+DESCRIPTION: -#+KEYWORDS: -#+LANGUAGE: en -#+OPTIONS: ^:{} toc:nil -#+PROPERTY: header-args :exports code -#+PROPERTY: header-args :session *R* - -#+BEGIN_EXPORT html ---- -title: "New and modified functionality in xcms" -author: -- name: Johannes Rainer -package: xcms -output: - BiocStyle::html_document: - toc_float: true -vignette: > - %\VignetteIndexEntry{New and modified functionality in xcms} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,RColorBrewer} -bibliography: references.bib -csl: biomed-central.csl -references: -- id: dummy - title: no title - author: - - family: noname - given: noname ---- - -#+END_EXPORT - -#+NAME: biocstyle -#+BEGIN_SRC R :ravel echo = FALSE, results = "asis" - BiocStyle::markdown() -#+END_SRC - -* New functionality in =xcms= - -This document describes new functionality and changes to existing functionality -in the =xcms= package introduced during the update to version /3/. - -#+BEGIN_SRC R :ravel message = FALSE, warning = FALSE - library(xcms) - library(RColorBrewer) - register(SerialParam()) -#+END_SRC - -** Modernized user interface - -The modernization of the user interface comprises new classes for data -representation and new data analysis methods. In addition, the core logic for -the data processing has been extracted from the old methods and put into a set -of R functions, the so called core API functions (or =do_= functions). These -functions take standard R data structures as input and return standard R data -types as result and can hence be easily included in other R packages. - -The new user interface aims at simplifying and streamlining the =xcms= workflow -while guaranteeing data integrity and performance also for large scale -metabolomics experiments. Importantly, a simplified access to the original raw -data should be provided throughout the whole metabolomics data analysis workflow. - -# All objects in the new user interface ensuring -# data integrity /via/ validation methods and class versioning, all methods are -# tested internally in extensive unit tests to guarantee proper functionality. - -The new interface re-uses objects from the =MSnbase= Bioconductor package, such as -the =OnDiskMSnExp= object. This object is specifically designed for large scale MS -experiments as it initially reads just the scan header information from the mzML -while the mz-intensity value pairs from all or from selected spectra of a file -are read on demand hence minimizing the memory demand. Also, in contrast to -the old =xcmsRaw= object, the =OnDiskMSnExp= contains information from all files of -an experiment. In addition, all data normalization and adjustment methods -implemented in the =MSnbase= package can be directly applied to the MS data -without the need to re-implement such methods in =xcms=. Results from =xcms= -preprocessings, such as chromatographic peak detection or correspondence are -stored into the new =XCMSnExp= object. This object extends the =OnDiskMSnExp= object -and inherits thus all of its methods including raw data access. - -Class and method/function names follow also a new naming convention trying tp -avoid the partially confusing nomenclature of the original =xcms= methods (such as -the =group= method to perform the correspondence of peaks across samples). To -distinguish them from mass peaks, the peaks identified by the peak detection in -an LS/GC-MS experiment are referred to as /chromatographic peaks/. The respective -method to identify such peaks is hence called =findChromPeaks= and the identified -peaks can be accessed using the =XCMSnExp= =chromPeaks= method. The results from an -correspondence analysis which aims to match and group chromatographic peaks -within and between samples are called /features/. A feature corresponds to -individual ions with a unique mass-to-charge ratio (mz) and a unique retention -time (rt). The definition of such mz-rt features (i.e. the result from the -=groupChromPeaks= method) can be accessed /via/ the =featureDefinitions= method of -the =XCMSnExp= class. Finally, alignment (retention time correction) can be -performed using the =adjustRtime= method. - -The settings for any of the new analysis methods are bundled in /parameter/ -classes, one class for each method. This encapsulation of the parameters to a -function into a parameter class (such as =CentWaveParam=) avoids busy function -calls (with many single parameters) and enables saving, reloading and reusing -the settings. In addition, the parameter classes are added, along with other -information to the process history of an =XCMSnExp= object thus providing a -detailed documentation of each processing step of an analysis, with the -possibility to recall all settings of the performed analyses at any stage. In -addition, validation of the parameters can be performed within the parameter -object and hence is no longer required in the analysis function. - -** New naming convention - -Peaks identified in LC/GC-MS metabolomics are referred to as /chromatographic -peaks/ where possible to avoid any misconceptions with /mass peaks/ identified in -mz dimension. - -Methods for data analysis from the original =xcms= code have been renamed to avoid -potential confusions: - -+ *Chromatographic peak detection*: =findChromPeaks= instead of =findPeaks=: for new - functions and methods the term /peak/ is avoided as much as possible, as it is - usually used to describe a mass peak in mz dimension. To clearly distinguish - between these peaks and peaks in retention time space, the latter are referred - to as /chromatographic peak/, or =chromPeak=. - -+ *Correspondence*: =groupChromPeaks= instead of =group= to clearly indicate what is - being grouped. Group might be a sample group or a peak group, the latter being - referred to also by (mz-rt) /feature/. - -+ *Alignment*: =adjustRtime= instead of =retcor= for retention time correction. The - word /cor/ in /retcor/ might be easily misinterpreted as /correlation/ instead of - correction. - -** New data classes - -*** =OnDiskMSnExp= - -This object is defined and documented in the =MSnbase= package. In brief, it is a -container for the full raw data from an MS-based experiment. To keep the memory -footprint low the mz and intensity values are only loaded from the raw data -files when required. The =OnDiskMSnExp= object replaces the =xcmsRaw= object. - -*** =XCMSnExp= - -The =XCMSnExp= class extends the =OnDiskMSnExp= object from the =MSnbase= package and -represents a container for the xcms-based preprocessing results while (since it -inherits all functionality from its parent class) keeping a direct relation to -the (raw) data on which the processing was performed. An additional slot -=.processHistory= in the object allows to keep track of all performed processing -steps. Each analysis method, such as =findChromPeaks= adds an =XProcessHistory= -object which includes also the parameter class passed to the analysis -method. Hence not only the time and type of the analysis, but its exact settings -are reported within the =XCMSnExp= object. The =XCMSnExp= is thus equivalent to the -=xcmsSet= from the original =xcms= implementation, but keeps in addition a link to -the raw data on which the preprocessing was performed. - -*** =Chromatogram= - -The =Chromatogram= class (available in the =MSnbase= package since version 2.3.8) -allows a data representation that is orthogonal to the =Spectrum= class (also -defined in =MSnbase=). The =Chromatogram= class stores retention time and intensity -duplets and is designed to accommodate most use cases, from total ion -chromatogram, base peak chromatogram to extracted ion chromatogram and SRM/MRM -ion traces. - -=Chromatogram= objects can be extracted from =XCMSnExp= (and =MSnExp= and -=OnDiskMSnExp=) objects using the =chromatogram= method. - -Note that this class is still considered developmental and might thus undergo -some changes in the future. - -** Binning and missing value imputation functions - -The binning/profile matrix generation functions have been completely -rewritten. The new =binYonX= function replaces the binning of intensity values -into bins defined by their m/z values implemented in the =profBin=, =profBinLin= and -=profBinLinBase= methods. The =binYonX= function provides also additional functionality: - -+ Breaks for the bins can be defined based on either the number of desired bins - (=nBins=) or the size of a bin (=binSize=). In addition it is possible to provide - a vector with pre-defined breaks. This allows to bin data from multiple files - or scans on the same bin-definition. - -+ The function returns a list with element =y= containing the binned values and - element =x= the bin mid-points. - -+ Values in input vector =y= can be aggregated within each bin with different - methods: =max=, =min=, =sum= and =mean=. - -+ The index of the largest (or smallest for =method= being "min") within each bin - can be returned by setting argument =returnIndex= to =TRUE=. - -+ Binning can be performed on single or multiple sub-sets of the input vectors - using the =fromIdx= and =toIdx= arguments. This replaces the /M/ methods (such as - =profBinM=). These sub-sets can be overlapping. - -The missing value imputation logic inherently build into the =profBinLin= and -=profBinLinBase= methods has been implemented in the =imputeLinInterpol= function. - -The example below illustrates the binning and imputation with the =binYtoX= and -=imputeLinInterpol= functions. After binning of the test vectors below some of the -bins have missing values, for which we impute a value using -=imputeLinInterpol=. By default, =binYonX= selects the largest value within each -bin, but other aggregation methods are also available (i.e. min, max, mean, -sum). - -#+BEGIN_SRC R :ravel message = FALSE - ## Defining the variables: - set.seed(123) - X <- sort(abs(rnorm(30, mean = 20, sd = 25))) ## 10 - Y <- abs(rnorm(30, mean = 50, sd = 30)) - - ## Bin the values in Y into 20 bins defined on X - res <- binYonX(X, Y, nBins = 22) - - res -#+END_SRC - -As a result we get a =list= with the bin mid-points (=$x=) and the binned =y= values -(=$y=). - -Next we use two different imputation approaches, a simple linear interpolation -and the linear imputation approach that was defined in the =profBinLinBase= -method. The latter performs linear interpolation only considering a certain -neighborhood of missing values otherwise replacing the =NA= with a base value. - -#+BEGIN_SRC R :ravel binning-imputation-example, message = FALSE, fig.width = 10, fig.height = 7, fig.cap = 'Binning and missing value imputation results. Black points represent the input values, red the results from the binning and blue and green the results from the imputation (with method lin and linbase, respectively).' - ## Plot the actual data values. - plot(X, Y, pch = 16, ylim = c(0, max(Y))) - ## Visualizing the bins - abline(v = breaks_on_nBins(min(X), max(X), nBins = 22), col = "grey") - - ## Define colors: - point_colors <- paste0(brewer.pal(4, "Set1"), 80) - ## Plot the binned values. - points(x = res$x, y = res$y, col = point_colors[1], pch = 15) - - ## Perform the linear imputation. - res_lin <- imputeLinInterpol(res$y) - - points(x = res$x, y = res_lin, col = point_colors[2], type = "b") - - ## Perform the linear imputation "linbase" - res_linbase <- imputeLinInterpol(res$y, method = "linbase") - points(x = res$x, y = res_linbase, col = point_colors[3], type = "b", lty = 2) -#+END_SRC - -The difference between the linear interpolation method =lin= and =linbase= is that -the latter only performs the linear interpolation in a pre-defined neighborhood -of the bin with the missing value (=1= by default). The other missing values are -set to a base value corresponding to half of the smallest bin value. Both -methods thus yield same results, except for bins 15-17 (see Figure above). - -** Core functionality exposed /via/ simple functions - -The core logic from the chromatographic peak detection methods -=findPeaks.centWave=, =findPeaks.massifquant=, =findPeaks.matchedFilter= and -=findPeaks.MSW= and from all alignment (=group.*=) and correspondence (=retcor.*=) -methods has been extracted and put into functions with the common prefix -=do_findChromPeaks=, =do_adjustRtime= and =do_groupChromPeaks=, respectively, with the -aim, as detailed in issue [[https://github.com/sneumann/xcms/issues/30][#30]], to separate the core logic from the analysis -methods invoked by the users to enable also the use these methods using base R -parameters (i.e. without specific classes containing the data such as the -=xcmsRaw= class). This simplifies also the re-use of these functions in other -packages and simplifies the future implementation of the peak detection -algorithms for e.g. the =MSnExp= or =OnDiskMSnExp= objects from the =MSnbase= -Bioconductor package. The implemented functions are: - -+ *peak detection methods*: - + =do_findChromPeaks_centWave=: peak density and wavelet based peak detection - for high resolution LC/MS data in centroid mode \cite{Tautenhahn:2008fx}. - + =do_findChromPeaks_matchedFilter=: identification of peak in the - chromatographic domain based on matched filtration \cite{Smith:2006ic}. - + =do_findChromPeaks_massifquant=: identification of peaks using Kalman - filters. - + =do_findChromPeaks_MSW=: single spectrum, non-chromatographic peak detection. - -+ *alignment methods*: - + =do_adjustRtime_peakGroups=: perform sample alignment (retention time - correction) using alignment of /well behaved/ chromatographic peaks that are - present in most samples (and are expected to have the same retention time). - -+ *correspondence methods*: - + =do_groupChromPeaks_density=: perform chromatographic peak grouping (within - and across samples) based on the density distribution of peaks along the - retention time axis. - + =do_groupChromPeaks_nearest=: groups peaks across samples similar to the - method implemented in mzMine. - + =do_groupChromPeaks_mzClust=: performs high resolution correspondence on - single spectra samples. - -One possible drawback from the introduction of this new layer is, that more -objects get copied by R which /could/ eventually result in a larger memory demand -or performance decrease (while no such was decrease was observed up to now). - -** Usability improvements in the /old/ user interface - -+ =[= subsetting method for =xcmsRaw= objects that enables to subset an =xcmsRaw= - object to specific scans/spectra. -+ =profMat= method to extract the /profile/ matrix from the =xcmsRaw= object. This - method should be used instead of directly accessing the =@env$profile= slot, as - it will create the profile matrix on the fly if it was not pre-calculated (or - if profile matrix generation settings have been changed). - -* Changes due to bug fixes and modified functionality - -** Differences in linear interpolation of missing values (=profBinLin=). - -From =xcms= version 1.51.1 on the new binning functions are used, thus, the bug -described here are fixed. - -Two bugs are present in the =profBinLin= method (reported as issues [[https://github.com/sneumann/xcms/issues/46][#46]] and [[https://github.com/sneumann/xcms/issues/49][#49]] on -github) which are fixed in the new =binYonX= and =imputeLinInterpol= functions: - -+ The first bin value calculated by =profBinLin= can be wrong (i.e. not being the - max value within that bin, but the first). -+ If the last bin contains also missing values, the method fails to determine - a correct value for that bin. - -The =profBinLin= method is used in =findPeaks.matchedFilter= if the profile -method is set to "binlin". - -The example below illustrates both differences. - -#+BEGIN_SRC R - ## Define a vector with empty values at the end. - X <- 1:11 - set.seed(123) - Y <- sort(rnorm(11, mean = 20, sd = 10)) - Y[9:11] <- NA - nas <- is.na(Y) - ## Do interpolation with profBinLin: - resX <- xcms:::profBinLin(X[!nas], Y[!nas], 5, xstart = min(X), - xend = max(X)) - resX - res <- binYonX(X, Y, nBins = 5L, shiftByHalfBinSize = TRUE) - resM <- imputeLinInterpol(res$y, method = "lin", - noInterpolAtEnds = TRUE) - resM -#+END_SRC - -Plotting the results helps to better compare the differences. The black points -in the figure below represent the actual values of =Y= and the grey vertical lines -the breaks defining the bins. The blue lines and points represent the result -from the =profBinLin= method. The bin values for the first and 4th bin are clearly -wrong. The green colored points and lines represent the results from the =binYonX= -and =imputeLinInterpol= functions (showing the correct binning and interpolation). - -#+BEGIN_SRC R :ravel profBinLin-problems, message = FALSE, fig.align = 'center', fig.width=10, fig.height = 7, fig.cap = "Illustration of the two bugs in profBinLin. The input values are represented by black points, grey vertical lines indicate the bins. The results from binning and interpolation with profBinLin are shown in blue and those from binYonX in combination with imputeLinInterpol in green." - plot(x = X, y = Y, pch = 16, ylim = c(0, max(Y, na.rm = TRUE)), - xlim = c(0, 12)) - ## Plot the breaks - abline(v = breaks_on_nBins(min(X), max(X), 5L, TRUE), col = "grey") - ## Result from profBinLin: - points(x = res$x, y = resX, col = "blue", type = "b") - ## Results from imputeLinInterpol - points(x = res$x, y = resM, col = "green", type = "b", - pch = 4, lty = 2) - -#+END_SRC - -Note that by default =imputeLinInterpol= would also interpolate missing values at -the beginning and the end of the provided numeric vector. This can be disabled -(to be compliant with =profBinLin=) by setting parameter =noInterpolAtEnds= to -=TRUE= (like in the example above). - -** Differences due to updates in =do_findChromPeaks_matchedFilter=, respectively =findPeaks.matchedFilter=. - -The original =findPeaks.matchedFilter= (up to version 1.49.7) had several -shortcomings and bugs that have been fixed in the new -=do_findChromPeaks_matchedFilter= method: - -+ The internal iterative processing of smaller chunks of the full data (also - referred to as /iterative buffering/) could result, for some bin (step) sizes to - unstable binning results (discussed in issue [[https://github.com/sneumann/xcms/issues/47][#47]] on github): calculation of - the breaks, or to be precise, the actually used bin size was performed in each - iteration and could lead to slightly different sizes between iterations (due - to rounding errors caused by floating point number representations in C). - -+ The iterative buffering raises also a conceptual issue when linear - interpolation is performed to impute missing values: the linear imputation - will only consider values within the actually processed buffer and can thus - lead to wrong or inaccurate imputations. - -+ The =profBinLin= implementation contains two bugs, one that can result in - failing to identify the maximal value in the first and last bin (see issue - [[https://github.com/sneumann/xcms/issues/46][#46]]) and one that fails to assign a value to a bin (issue [[https://github.com/sneumann/xcms/issues/49][#49]]). Both are fixed - in the =do_findChromPeaks_matchedFilter= implementation. - -A detailed description of tests comparing all implementations is available in -issue [[https://github.com/sneumann/xcms/issues/52][#52]] on github. Note also that in course of these changes also the =getEIC= -method has been updated to use the new binning and missing value imputation -function. - -While it is strongly discouraged, it is still possible to use to /old/ code (from -1.49.7) by calling =useOriginalCode(TRUE)=. - -** Differences in =findPeaks.massifquant= - -+ Argument =scanrange= was ignored in the /original/ old code (issue [[https://github.com/sneumann/xcms/issues/61][#61]]). -+ The method returned a =matrix= if =withWave= was =0= and a =xcmsPeaks= object - otherwise. The updated version returns *always* an =xcmsPeaks= object (issue #60). - -** Differences in /obiwarp/ retention time correction - -Retention time correction using the obiwarp method uses the /profile/ matrix -(i.e. intensities binned in discrete bins along the mz axis). Profile matrix -generation uses now the =binYonX= method which fixed some problems in the original -binning and linear interpolation methods. Thus results might be slightly -different. - -Also, the =retcor.obiwarp= method reports (un-rounded) adjusted retention times, -but adjusts the retention time of eventually already identified peaks using -rounded adjusted retention times. The new =adjustRtime= method(s) does adjust -identified peaks using the reported adjusted retention times (not rounded). This -guarantees that e.g. removing retention time adjustment/alignment results from -an object restores the object to its initial state (i.e. the adjusted retention -times of the identified peaks are reverted to the retention times before -alignment). -See issue [[https://github.com/sneumann/xcms/issues/122][#122]] for more details. - -** =retcor.peaksgroups=: change in the way how /well behaved/ peak groups are ordered - -The =retcor.peakgroups= defines first the chromatographic peak groups that are -used for the alignment of all spectra. Once these are identified, the retention -time of the peak with the highest intensity in a sample for a given peak group -is returned and the peak groups are ordered increasingly by retention time -(which is required for the later fitting of either a polynomial or a linear -model to the data). The selection of the retention time of the peak with the -highest intensity within a feature (peak group) and samples, denoted as -/representative/ peak for a given feature in a sample, ensures that only the -retention time of a single peak per sample and feature is selected (note that -multiple chromatographic peaks within the same sample can be assigned to a -feature). In the original code the ordering of the peak groups was however -performed using the median retention time of the complete peak group (which -includes also potential additional peaks per sample). This has been changed and -the features are ordered now by the median retention time across samples of the -representative chromatographic peaks. - -** =scanrange= parameter in all =findPeaks= methods - -The =scanrange= in the =findPeaks= methods is supposed to enable the peak detection -only within a user-defined range of scans. This was however not performed in -each method. Due to a bug in =findPeaks.matchedFilter='s original code the -argument was ignored, except if the upper scan number of the user defined range -was larger than the total number of available scans (see issue [[https://github.com/sneumann/xcms/issues/63][#63]]). In -=findPeaks.massifquant= the argument was completely ignored (see issue [[https://github.com/sneumann/xcms/issues/61][#61]]) and, -while the argument was considered in =findPeaks.centWave= and feature detection -was performed within the specified scan range, but the original =@scantime= slot -was used throughout the code instead of just the scan times for the specified -scan indices (see issue [[https://github.com/sneumann/xcms/issues/64][#64]]). - -These problems have been fixed in version 1.51.1 by first sub-setting the -=xcmsRaw= object (using the =[= method) before actually performing the feature -detection. - -** =fillPeaks= (=fillChromPeaks=) differences - -In the original =fillPeaks.MSW=, the mz range from which the signal is to be -integrated was defined using - -#+BEGIN_SRC R :eval = "never", :ravel eval = FALSE - mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), - which.min(abs(mzs - peakArea[i, "mzmax"]))) - -#+END_SRC - -Depending on the data this could lead to the inclusion of signal in the -integration that are just outside of the mz range. In the new =fillChromPeaks= -method signal is integrated only for mz values >= mzmin and <= mzmax thus -ensuring that only signal is used that is truly within the peak area defined by -columns ="mzmin"=, ="mzmax"=, ="rtmin"= and ="rtmax"=. - -Also, the =fillPeaks.chrom= method did return ="into"= and ="maxo"= values of =0= if no -signal was found in the peak area. The new method does not integrate any signal -in such cases and does not fill in that peak. - -See also issue [[https://github.com/sneumann/xcms/issues/130][#130]] for more -information. - -** Problems with iterative binning of small data sub-sets in =findPeaks.matchedFilter= :noexport: - -The problem described here has been fixed in =xcms= >= 1.51.1. - -The iterative binning of only small sub-sets of data causes problems with -=profBinLinBase=, in which data imputation might be skipped in some iterations -while it is performed in others (also discussed in issue [[https://github.com/sneumann/xcms/issues/47][#47]] on github). - -Iterative buffering has both conceptual and computational issues. -+ Conceptual: =profBinLin= and =profBinLinBase= do a linear interpolation to impute - missing values. This is obviously affected by the input data, i.e. if only a - small subset of input data is considered, the imputation can change. - -+ Computational: the iterative buffering is slower than binning of the full - data. - -An additional problem comes with the implementation of the =profBin= method in -=xcms= that was used in the =findPeaks.matchedFilter= method for method being =lin=: -the bin size is calculated anew in each call, thus, due to rounding errors -(imprecision of floating point numbers), the bin size will be slightly different -in each call, which can lead to wrong binning results (see issue [[https://github.com/sneumann/xcms/issues/47][#47]] on github). - -Example with =profBinLinBase= resulting in an error: if =step= and =basespace= are -both =0.1= it seems that not in all buffer-generation iterations a interpolation -is initiated, i.e. the variable =ibase= in the C-function is sometimes set to =1= -(interpolation with neighboring bins) and sometimes to =0=. - -This is also extensively documented in issue [[https://github.com/sneumann/xcms/issues/52][#52]]. - -** Different binning results due to /internal/ and /external/ breaks definition :noexport: - -*FIXED*: the bin calculation in C uses now also a multiplication instead of a -addition thus resulting in identical breaks! - -Breaks calculated by the =breaks_on_nBins= function are equal as breaks calculated -using the =seq= function, but they are not identical. - -#+BEGIN_SRC R - library(xcms) - - ## Define breaks from 200 to 600 - brks <- seq(200, 600, length.out = 2002) - brks2 <- xcms:::breaks_on_nBins(200, 600, nBins = 2001) - all.equal(brks, brks2) - identical(brks, brks2) - - ## The difference is very small, but could still, in the binning - ## yield slightly different results depending on which breaks are - ## used. - range(brks - brks2) -#+END_SRC - -** Implementation and comparison for =matchedFilter= :noexport: - -These results base on the test =dontrun_test_do_findChromPeaks_matchedFilter_impl= -defined in /test_do_findChromPeaks_matchedFilter.R/ - -We have 4 different functions to test and compare to the original one: -+ *A*: =.matchedFilter_orig=: it's the original code. -+ *B*: =.matchedFilter_binYonX_iter=: uses the same sequential - buffering than the original code, but uses =binYonX= for binning and - =imputeLinInterpol= for interpolation. -+ *C*: =.matchedFilter_no_iter=: contains the original code, but - avoids sequential buffering, i.e. creates the whole matrix in one go. -+ *D*: =.matchedFilter_binYonX_no_iter=: my favorite: uses =binYonX= and - =imputeLinInterpol= and avoids the sequential buffering by creating the full - matrix in one go. - -Notes: for plain =bin= we expect that results with and without iterative buffering -are identical. - -*Comparisons*: -+ [X] *A* /vs/ original: - - =bin=: always OK. - - =binlin=: always OK. - - =binlinbase=: always OK. -+ [X] *B* /vs/ original: - - =bin=: OK unless =step= is =0.2=: most likely rounding problem. - - =binlin=: only once OK. Results are not equal, but comparable. - - =binlinbase=: similar but not equal. -+ [X] *C* /vs/ original: - - =bin=: OK unless =step= is =0.2=: - - =binlin=: never OK: due to interpolation on full, or subset data. - - =binlinbase=: similar but not equal. -+ [X] *D* /vs/ original: - - =bin=: OK unless =step= is =0.2=: most likely rounding problem. - - =binlin=: never OK: due to interpolation on full, or subset data AND due to - fix of the bug in =profBinLin=. - - =binlinbase=: similar but not equal. -+ [X] *B* /vs/ *C*: - - =bin=: always OK. - - =binlin=: results similar but not equal; higher =snthresh= results in higher - similarity. - - =binlinbase=: highly similar. -+ [X] *B* /vs/ *D*: - - =bin=: always OK. - - =binlin=: results similar but not equal; higher =snthresh= results in higher - similarity. - - =binlinbase=: highly similar. -+ [X] *C* /vs/ *D*: - - =bin=: always OK. - - =binlin=: results almost identical; higher =snthresh= results in higher - similarity. - - =binlinbase=: always OK. - - -*Conclusions*: -+ =none= (only binning, but no linear interpolation; corresponds to method =bin= in - =findPeaks.matchedFilter=): The results are identical between all methods for - all except one setting: with =step= being =0.2= (or =0.4= etc) on one test file the - results differ between methods with and without iterative buffering. The - reason for this is most likely rounding errors in floating point number - representation: =profBin= calculates the size of the bin in each call, thus, - when called repeatedly based on different input values, the size is slightly - different, which then can lead to binning differences (see also [[https://github.com/sneumann/xcms/issues/47][issue #47]] on - github). - -+ =lin= (binning followed by linear interpolation to impute missing values; method - =binlin= in =findPeaks.matchedFilter=): There are two reasons for differences - observed here: 1) the first bin value (and eventually the last bin value) are - sometimes wrong (issue [[https://github.com/sneumann/xcms/issues/46][#46]]). This results in differences between =binYonX= and - =imputeKinInterpol= based approach and =profBinLin= (with the former being - presumably correct). Also, this has a bigger influence when the - binning/missing value imputation is performed iteratively. Thus, the - difference between the =binYonX= - =imputeLinInterpol= and =profBinLin= approach - without iterative buffering are only very small. 2) Linear interpolation on - the full data set compared to subsequent sub-sets will undoubtedly lead to - differences. Because based on the full data set, the non-iterative approach - results in the expected and more accurate results. - -+ =linbase=: results are identical if =basespace= (respectively =distance=) is such - that no interpolation takes place. With interpolation (e.g. =distance= being =1=) - differences (albeit small) are present between approaches with and without iterative - buffering. The results for the approaches without iterative buffering (using - =profBinBase= respectively =binYonX= with =imputeLinIterpol=) are identical, again - arguing in favor of these approaches. - -Thus, summarizing, the approaches without the iterative buffering yield more -reliable (and presumably correct) results. Given also that the =binYonX= in -combination with =imputeLinInterpol= identify similar peaks than the non-iterative -approaches using the original code, we can change the code to use these former -methods as default. - -* Under the hood changes - -These changes and updates will not have any large impact on the day-to-day use of -=xcms= and are listed here for completeness. - -+ From =xcms= version 1.51.1 on the default methods from the =mzR= package are used - for data import. Besides ensuring easier maintenance, this enables also data - import from /gzipped/ mzML files. - - -* Introducing =DRanges=. :noexport: - -*Note*: the code for this is in the =dranges= branch. The last status/problem is -that it is not quite clear how to determine the /correct/ number of decimal -places: =as.character= uses =options()$scipen= to determine how many decimal places -are represented, =sprintf= allows much more decimal places, e.g. with =%.30f=, but -these become unstable and random. The /best/ solution for now would be to limit to -a certain number of /secure/ decimal places (16?) and specify this as global -option that might be changed later. Check also =.Machine= for details on -precision, max integer etc. Note also that we are pretty much limited by the -largest =integer= that can be represented. - -The =multiplier= thus has definitely be smaller than: -#+BEGIN_SRC R - maxPos <- nchar(as.character(.Machine$integer.max)) - maxMult <- 10^maxPos - -#+END_SRC - -Note that we would actually just have to check that the to-be-transformed -integers don't get too large; thus we could allow more decimal places. - -The idea is to use all of the =IRanges= functionality, but for any =numeric= -ranges. Examples for such ranges could be the m/z range of a feature, or the -retention time range defining a feature. - -The idea is pretty simple, the =DRanges= (/D/ standing for /double/, alternatively /N/ -for /numeric/) extends the =IRanges=, the =start= and =end= of the =IRanges= are -calculated by multiplying the start and end defining the numeric range by =10^d= -with =d= being the number of decimal places. - -First thing is to get the number of decimal places: using code from a pretty old -post on stackoverflow -(http://stackoverflow.com/questions/5173692/how-to-return-number-of-decimal-places-in-r): - - -#+BEGIN_SRC R - decimalplaces <- function(x) { - if ((x %% 1) != 0) { - nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed=TRUE)[[1]][[2]]) - } else { - return(0) - } - } - - num.decimals <- function(x) { - stopifnot(class(x)=="numeric") - x <- sub("0+$","",x) - x <- sub("^.+[.]","",x) - nchar(x) - } - - -#+END_SRC - -The former is actually faster. - -Eventually even =C=? -http://stackoverflow.com/questions/1083304/c-c-counting-the-number-of-decimals - -#+BEGIN_EXAMPLE - string number = "543.014"; - size_t dotFound; - stoi(number, &dotFound)); - string(number).substr(dotFound).size() -#+END_EXAMPLE - -Be aware that =number= MUST be a float/double! - -alternatively: -http://stackoverflow.com/questions/9843999/calculate-number-of-decimal-places-for-a-float-value-without-libraries. - -* Currently internal functionality :noexport: - -** =ProcessHistory=: track processing steps - -This functionality comprises the =ProcessHistory= class and the =.processHistory= -slot of the =xcmsSet= objects. The =xcmsSet= function already adds a feature -detection processing step for each file to this slot. Subsetting of =xcmsSet= -objects with =[= or =split= correctly process also this slot as does concatenation -using =c=. For processing steps other than /feature detection/ a new element should -be added to the variable =.PROCSTEPS= (defined in /DataClasses.R/. -At some point we could implement methods =getProcessErrors= and =getProcessHistory= -(essentially just calling the =.getProcessErrors= and =.getProcessHistory= -functions in /functions-xcmsSet.R/. - -Some additional functionality that could be implemented: -+ Sort the processing history by the =date= slot. -+ Save also analysis properties into an object extending the =ProcessHistory=: - this would enable to get the exact settings for each processing step. - -* Internal changes :noexport: - -** Changing the way how data is imported - -Random errors happen when processing a large number of files with =xcms=. This -might indicate some memory problems, eventually related to the =mzR= package -(similar to the ones spotted in =MSnbase=). - -What I want to test: -+ [X] Does =mzR::openMSFile= work also for /netCDF/? No. we would have to check for - the file type and specify the =backend= based on that. -+ [X] What about writing a new importer that does not need all the objects and - the presumably old code in =mzR=? -> =readRawData=. - -That has been fixed (see above). The /default/ methods for data import form =mzR= -are now used by default. - -** Functions and methods to be deprecated and removed. - -+ [ ] =xcmsSource= method: not needed anymore, reading is done by =readRawData=. -+ [ ] =loadRaw=, =initialize= for =netCdfSource= and =rampSource=: replaced by - =readRawData=. -+ [ ] =netCdfSource= and =rampSource= S4 classes: not needed anymore, reading is - done by =readRawData=. - -** Unneeded /R/ files - -+ [ ] /netCDF.R/. -+ [ ] /ramp.R/. - -*** Unit tests to be removed - -+ [ ] /runit.ramp.R/. - -* Deprecated functions and files - -Here we list all of the functions and related files that are deprecated. - -+ =xcmsParallelSetup=, =xcmsPapply=, =xcmsClusterApply=: use =BiocParallel= package - instead to setup and perform parallel processing, either /via/ the =BPPARAM= - parameter to function and methods, or by calling =register= to globally set - parallel processing. - -+ =profBin=, =profBinM=, =profBinLin=, =profBinLinM=, =profBinLinBase=, =profBinLinBaseM=: - replaced by the =binYonX= and =imputeLinInterpol= functions. Also, to create or - extract the profile matrix from an =xcmsRaw= object, the =profMat= method. - - -** Deprecated - -*** xcms 1.49: - -+ =xcmsParallelSetup= (Deprecated.R) -+ =xcmsPapply= (Deprecated.R) -+ =xcmsClusterApply= (Deprecated.R) - -*** xcms 1.51: - -+ =profBin= (c.R) -+ =profBinM= (c.R) -+ =profBinLin= (c.R) -+ =profBinLinM= (c.R) -+ =profBinLinBase= (c.R) -+ =profBinLinBaseM= (c.R) - -** Defunct - -* TODOs :noexport: - -** DONE Deprecate binning functions. - CLOSED: [2017-02-23 Thu 07:47] - - - State "DONE" from "TODO" [2017-02-23 Thu 07:47] -All done except for the retention time correction!!! - -** DONE Continue implementing the =do_= functions. - CLOSED: [2017-02-23 Thu 07:47] - - State "DONE" from "TODO" [2017-02-23 Thu 07:47] -** DONE Define a new object to contain the preprocessing results - CLOSED: [2017-02-23 Thu 07:47] - - - State "DONE" from "TODO" [2017-02-23 Thu 07:47] -This object should replace in the long run the =xcmsSet= object providing the same -functionality while in addition add a better integration of the original raw -data files. The object should contain: - -+ Peak/feature data (similar to the =xcmsSet@peaks= slot). -+ Alignment across samples information (similar to the =xcmsSet@groups= slot). -+ Corrected retention time (similar to the =xcmsSet@rt$adjusted= slot). -+ All experimental and phenotypical information. -+ A /link/ to the raw data. -+ History on data manipulation and processing. - -Based on these prerequisites, an object extending Biobase's =MSnExp= or -=OnDiskMSnExp= would be ideal. The =MSnExp= would however be /too mighty/ (as it -contains all of the raw data) and the more light weight =OnDiskMSnExp= should -hence be used. While being somewhat similar to the =xcmsSet= =xcmsRaw= object setup, -the new implementation would ensure a better and less error prone import of the -raw (or even processed) data. Some data (TIC etc) are even cached within the -=OnDiskMSnExp= enabling faster data access. - -Note that the lack of easy access to raw data disqualifies the =MSnSet= object -from the =MSnbase= package. - -The feature data should be placed into the =assayData= environment of the object -to avoid copying etc of the data. Check also =assayDataElement()= in =MSnbase=. - -*** Some notes on data usage: -+ Subset by sample: have to extract the corresponding features from the - features matrix in =assayData= and remove all grouping/alignment - information. This actually bypasses also the problem to check that feature - indexes have to be updated. - -+ Rename =peaks= to =features=. - -+ Better alternative for =groups=: =alignedFeatures=. -+ =groupval=? =featureValues=. - -*** Design and implementation: -+ =features= should be still implemented as =matrix= (for performance issues). -+ Alignment information could be implemented as =DataFrame= with the indices added - to a column =idx=. - -** DONE Rename objects, functions and methods - CLOSED: [2017-02-23 Thu 07:47] - - - State "DONE" from "TODO" [2017-02-23 Thu 07:47] -+ [X] =features=: =chromPeaks=. -+ [X] =hasDetectedFeatures=: =hasChromPeaks=. -+ [ ] feature: chromatographic peak. -+ [X] =detectFeatures=: =findChromPeaks=. -+ [X] =dropFeatures=: =dropChromPeaks=. -+ [X] featureDetection-centWave: findChromPeaks-centWave -+ [X] =validFeatureMatrix=: =validChromPeaksMatrix=. - -Correspondence. -+ [ ] feature groups: features (aligned and grouped chromatographic peaks). -+ [X] =groupFeatures=: =groupChromPeaks=. -+ [X] =hasAlignedFeatures=: =hasFeatures=. -+ [X] =featureGroups=: =featureDefinitions=, =featureValue= (=groupval=). -+ [X] =FeatureDensityParam=: =PeakDensityParam=. -+ [X] =NearestFeaturesParam=: =NearestPeaksParam= -+ [ ] feature alignment methods: peak alignment methods -+ [X] =$features=: =$chromPeaks=. -+ [X] =featureidx=: =peakidx=. -+ [X] =featureIndex=: =peakIndex=. -+ [X] =dropFeatureGroups=: =dropFeatureDefinitions=. -+ [ ] Peak alignment: Peak grouping -+ [X] =.PROCSTEP.PEAK.ALIGNMENT=: =.PROCSTEP.PEAK.GROUPING=. - -Param classes: -+ [X] =extraFeatures=: =extraPeaks=. - -RT correction. -+ [X] =featureGroups= retention time correction: =peakGroups=. -+ [X] =FeatureGroupsParam=: =PeakGroupsParam=. -+ [X] =features=: =peaks= -+ [X] =featureIndex=: =peakIndex= -+ [X] =getFeatureGroupsRtMatrix=: =getPeakGroupsRtMatrix= -+ [X] =applyRtAdjToFeatures=: =applyRtAdjToPeaks=. -+ [X] =do_groupFeatures_mzClust=: =do_groupPeaks_mzClust=. - -+ [X] Check =maxFeatures= parameter for =do_groupChromPeaks_density=. Is it really - the maximum number of features, or of peaks? - -+ [X] Alignment: retention time correction between samples - \cite{Sugimoto:2012jt}. -+ [X] Correspondence: (grouping) registration of recurring signals from the same - analyte over replicate samples \cite{Smith:2014di}. - - -** DONE Implement the =Chromatogram= class - CLOSED: [2017-07-10 Mon 15:12] - - - State "DONE" from "TODO" [2017-07-10 Mon 15:12] -Now, to accommodate all possibilities: -https://en.wikipedia.org/wiki/Triple_quadrupole_mass_spectrometer -Triple Q-TOF measurements: -+ Product Ion Scan - - Q1 fixed - - Q3 scan -+ Precursor Ion Scan - - Q1 scan - - Q3 fixed -+ Neutral Loss Scan - - Q1 scan at mz = m_{product} - - Q3 scan at mz = m_{product} - m_{neutral molecule} -+ Selected Reaction monitoring (SRM, MRM): Q1 is used to select the precursor - ion, Q3 cycles through the product ions. Precursor/product pair is referred to - as a /transition/. - - Q1 fixed at mz = m_{precursor} - - Q3 scan at mz = m_{product} - - -Other resources: -https://en.wikipedia.org/wiki/Mass_chromatogram#Selected-ion_monitoring_chromatogram_.28SIM.29 -http://proteowizard.sourceforge.net/dox/structpwiz_1_1msdata_1_1_chromatogram.html -https://sourceforge.net/p/proteowizard/mailman/message/27571266/ - -*** Move =Chromatogram= to MSnbase - -+ [X] Add =Chromatogram= to MSnbase. -+ [ ] Remove =Chromatogram= from xcms. -+ [ ] Move functions and methods to MSnbase. -+ [ ] Fix xcms to import all required stuff from MSnbase. - - -** TODO Implement a =findBackgroundIons= method - -Check on one of our own files. - -#+BEGIN_SRC R - library(xcms) - - rd <- readMSData("/Volumes/Ext64/data/2016/2016-11/NoSN/250516_QC_NORM_3_POS_3.mzML", - mode = "onDisk") - - ## Evaluate the mz-rt matrix - can we spot already something there? - sps <- spectra(rd) - dfs <- lapply(sps, as.data.frame) - ## cut the intensities at 5000 - dfs <- lapply(dfs, function(z) { - z[z[, "i"] > 5000, "i"] <- 5000 - return(z) - }) - - library(RColorBrewer) - library(lattice) - colR <- colorRampPalette(brewer.pal(9, "YlOrRd"))(255) - brks <- do.breaks(c(0, 5000), length(colR)) - - mzR <- range(mz(rd)) - rtR <- range(rtime(rd)) - - plot(3, 3, pch = NA, xlim = rtR, ylim = mzR) - for(i in 1:length(dfs)) { - intC <- level.colors(dfs[[i]]$i, at = brks, col.regions = colR) - xs <- rep(rtime(rd)[i], length(intC)) - points(x = xs, y = dfs[[i]]$mz, col = intC, cex = 0.1, pch = 16) - } - ## level.colors(x, at = brks, col.regions = colR) -#+END_SRC - -A simple approach would be to walk along the mz and evaluate whether, for a -certain mz (bin?) the signal is higher than a threshold in 70% of the spectra, -i.e. that the % of values is larger than a percentage. - - -** DONE Reduce R CMD check time: - CLOSED: [2017-07-10 Mon 15:12] - - - State "DONE" from "TODO" [2017-07-10 Mon 15:12] -- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: 18m34.630s -- xcms 2.99.3, MSnbase 2.3.4, mzR 2.9.12: 20m41.440s - -After tuning xcms: -- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: 14m30.454s - -After enabling parallel processing for the unit tests: -- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: user 21m46.385s - -After enabling parallel processing (registering multicoreparam) for the unit -tests: -- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: user 15m53.039s. - -tests with long runtime: -+ [ ] testPresentAbsentSumAfterFillPeaks: 13.241 -+ [X] test_extractChromatograms (runit.Chromatogram.R): 23.800: Can not reduce - this. -+ [X] test_obiwarp (runit.do_adjustRtime.R): 17.594: Can not reduce this. -+ [ ] test_findChromPeaks_centWaveWithPredIsoROIs - (runit.do_findChromPeaks_centWave_isotopes.R): 13.623 -+ [X] test_do_groupChromPeaks_nearest (runit.do_groupChromPeaks.R): 25.193: OK. -+ [X] test_fillChromPeaks_matchedFilter (runit.fillChromPeaks.R): 16.843: Can - not reduce. -+ [X] test.fillPeaks_old_vs_new (runit.fillPeaks.R): 37.924: dontrun -+ [X] test.fillPeaksColumns (runit.fillPeaks.R): 33.552: OK. -+ [X] testFillPeaksPar (runit.fillPeaks.R): 24.752: dontrun -+ [X] test_getEICxset (runit.getEIC.R): 27.144: might be faster. -+ [X] test.getEICretcor (runit.getEIC.R): 17.018: nope. -+ [X] test.issue7 (runit.getEIC.R): 66.020: dontrun -+ [X] test.getXcmsRaw (runit.getXcmsRaw.R): 26.558: might be faster. -+ [X] testMultiFactorDiffreport (runit.phenoData.R): 13.067: nothing to do. - - - - -** DONE mzR/MSnbase timings - CLOSED: [2017-06-14 Wed 11:02] - - - State "DONE" from "TODO" [2017-06-14 Wed 11:02] -#+BEGIN_SRC R - library(MSnbase) - library(msdata) - fl <- proteomics(full.names = TRUE)[3] - - - ## MSnbase: 2.3.4 - ## mzR: 2.11.2 - of <- mzR::openMSfile(fl, backend = "pwiz") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.953 0.036 0.986 - mzR::close(of) - - of <- mzR::openMSfile(fl, backend = "Ramp") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.449 0.011 0.460 - mzR::close(of) - - system.time(tmp <- readMSData(fl, mode = "onDisk")) - ## user system elapsed - ## 1.515 0.089 1.596 - - ########################################### - ## MSnbase: 2.3.4 - ## mzR: 2.11.3 - of <- mzR::openMSfile(fl, backend = "pwiz") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.974 0.039 1.009 - mzR::close(of) - - of <- mzR::openMSfile(fl, backend = "Ramp") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.422 0.010 0.433 - mzR::close(of) - - system.time(tmp <- readMSData(fl, mode = "onDisk")) - ## user system elapsed - ## 1.509 0.093 1.594 - - fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_14.mzML" - of <- mzR::openMSfile(fl, backend = "pwiz") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.138 0.042 0.180 - mzR::close(of) - - of <- mzR::openMSfile(fl, backend = "Ramp") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.067 0.023 0.089 - mzR::close(of) - - system.time(tmp <- readMSData(fl, mode = "onDisk")) - ## user system elapsed - ## 0.708 0.105 0.814 - - ## tmp: 1720 spectra. - - ############################################ - ## MSnbase: 2.3.4 - ## mzR: 2.11.3, without reading the ion injection time - of <- mzR::openMSfile(fl, backend = "pwiz") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.969 0.040 1.007 - mzR::close(of) - - of <- mzR::openMSfile(fl, backend = "Ramp") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.449 0.011 0.460 - mzR::close(of) - - system.time(tmp <- readMSData(fl, mode = "onDisk")) - ## user system elapsed - ## 1.556 0.089 1.638 - - fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_14.mzML" - of <- mzR::openMSfile(fl, backend = "pwiz") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.138 0.064 0.214 - mzR::close(of) - - of <- mzR::openMSfile(fl, backend = "Ramp") - system.time(hdr <- header(of)) - ## user system elapsed - ## 0.065 0.022 0.088 - mzR::close(of) - - system.time(tmp <- readMSData(fl, mode = "onDisk")) - ## user system elapsed - ## 0.709 0.110 0.833 - - ## tmp: 1720 spectra. -#+END_SRC - - -** TODO Re-add plotting functions - -There was some request to re-add the plotting functionality to back to =xcms=. -I would however like to create such plots not *during*, but *after* the -analysis. One first example would be the results from the grouping, i.e. the -=group.density= method. - -+ =groupDensity=: loop through the features and create a plot for each one. The - function could be called something like =plotGroupingResult=. - - loop through each feature. - - plot all peaks in the mz range of all peaks in the group (+/- something) and - highlight the peaks belonging to the group. - #+BEGIN_SRC R - pks <- chromPeaks(xod) - pks <- pks[pks[, "sample"] == 1, ] - ## plot the rectangular data. - xod_file <- filterFile(xod, file = 1, keepAdjustedRtime = TRUE) - mzr <- range(mz(xod_file)) - rtr <- range(rtime(xod_file)) - plot(3, 3, pch = NA, xlim = rtr, ylim = mzr, xlab = "rentention time", - ylab = "mz", main = basename(fileNames(xod_file))) - rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], ybottom = pks[, "mzmin"], - ytop = pks[, "mzmax"], border = "#00000060") - - ## peak density along retention time axis. - dens <- density(pks[, "rt"]) - plot(dens) - hst <- hist(pks[, "rt"], breaks = 64) - plot(hst$mids, hst$counts, type = "S") - plot(hst) - addi <- diff(hst$mids)[1] / 2 - points(hst$mids + addi, hst$counts, type = "S", col = "red") - - ## Plot of all peaks along retention time axis. - hst <- lapply(split(chromPeaks(xod)[, "rt"], - f = chromPeaks(xod)[, "sample"]), - hist, breaks = 64) - max_count <- max(unlist(lapply(hst, function(z) max(z$counts)))) - ## Initialize plot: - plot(3, 3, pch = NA, xlab = "retention time", ylab = "peak count", - xlim = range(rtime(xod)), ylim = c(0, max_count)) - addi <- diff(hst[[1]]$mids)[1] / 2 - lapply(hst, function(z) points(z$mids + addi, z$counts, col = "#00000060", - type = "S")) - #+END_SRC - -+ Plot identified chromatographic peaks. Identified and failed peaks could be - simply plotted manually. One needs to know however where to look. - - =plot,Chromatogram=. - - =highlightChromPeaks=. - - Eventually it might be nice to create a plot from above, plotting the mz vs - rt of one file and highlighting the identified peaks: =plotChromPeaks=. -+ Plot retention time adjustment results. - - =plotAdjustedRtime= should do the trick. -+ Plot grouping results: - - =plot,Chromatogram=. - - =highlightChromPeaks=. - - =plotChromPeakDensity=. - -** TODO Implement the =calibrate= method in the new user interface - -First thing is to understand what the method does. -See /methods-xcmsSet.R/ for the =calibrate= method. See /matchpeaks.R/ for the -=matchpeaks= and =estimate= functions. -Input: =xcmsSet= object and list of numeric vectors representing the m/z values of -the calibrants. Apparently, the calibrants have to be close to real peaks, -otherwise they will not be adjusted/matched correctly. -For each sample: -- get the peaks of that sample, i.e. the =@peaks= matrix. -- call the =matchpeaks= function on the peaks matrix and the calibrants (which is - supposed to be a numeric vector of mz values. - -Global concept: calibration is done on the peaks. Questions: -+ Is there a global calibration value for a file we could store into the - =XCMSnExp= object? If yes we could even apply the calibration to the individual - mz values of a file. Actually, yes, the calibration results could be stored on - a per-file basis in the =XCMSnExp=. Problem is we can not apply one global - calibration to all files. So adding this to the processing queue seems to be a - no-go. - -+ We can add a function to the =processingQueue= that applies different - adjustments depending on the =fileIdx=. Be aware! All subsetting/filtering - approaches do have to update the file index in the =processingQueue=. - -*Idea*: don't need the result class below - should be enough to add the -calibration function (inclusive parameters) to the =processingQueue= of the =MSnExp= object! - -*NOTE*: to enable calibration of =mz= values of a =Spectrum=: -+ Implement a =CalibrationResult= object with slots: - - method - - minMz - - maxMz - - fileIdx - - slope - - intercept -+ Enable adding a =list= of these objects into =MsFeatureData=. -+ Add methods to drop/delete such objects from =MsFeatureData=. -+ =dropChromPeaks= should also drop the =list=. -+ Add function to subset the =list= in the =MsFeatureData=. -+ On subsetting: do also subset the =list=. -+ Implement a =dropCalibration= method that does restore the original mz values. - - - -* References diff --git a/vignettes/references.bib b/vignettes/references.bib index 698e1ef2b..67db858cb 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -69,3 +69,24 @@ @article{Smith:2013gr month = nov } +@article{Ludwig:2018hv, +author = {Ludwig, Christina and Gillet, Ludovic and Rosenberger, George and Amon, Sabine and Collins, Ben C and Aebersold, Ruedi}, +title = {{Data-independent acquisition-based SWATH-MS for quantitative proteomics: a tutorial.}}, +journal = {Molecular systems biology}, +year = {2018}, +volume = {14}, +number = {8}, +pages = {e8126}, +month = aug, +affiliation = {Bavarian Center for Biomolecular Mass Spectrometry (BayBioMS), Technical University of Munich (TUM), Freising, Germany tina.ludwig@tum.de.}, +doi = {10.15252/msb.20178126}, +pmid = {30104418}, +pmcid = {PMC6088389}, +language = {English}, +read = {Yes}, +rating = {0}, +date-added = {2019-10-03T11:48:47GMT}, +date-modified = {2019-10-03T13:31:59GMT}, +abstract = {Many research questions in fields such as personalized medicine, drug screens or systems biology depend on obtaining consistent and quantitatively accurate proteomics data from many samples. SWATH-MS is a specific variant of data-independent acquisition (DIA) methods and is emerging as a technology that combines deep proteome coverage capabilities with quantitative consistency and accuracy. In a SWATH-MS measurement, all ionized peptides of a given sample that fall within a specified mass range are fragmented in a systematic and unbiased fashion using rather large precursor isolation windows. To analyse SWATH-MS data, a strategy based on peptide-centric scoring has been established, which typically requires prior knowledge about the chromatographic and mass spectrometric behaviour of peptides of interest in the form of spectral libraries and peptide query parameters. This tutorial provides guidelines on how to set up and plan a SWATH-MS experiment, how to perform the mass spectrometric measurement and how to analyse SWATH-MS data using peptide-centric scoring. Furthermore, concepts on how to improve SWATH-MS data acquisition, potential trade-offs of parameter settings and alternative data analysis strategies are discussed.}, +url = {https://onlinelibrary.wiley.com/doi/abs/10.15252/msb.20178126} +} diff --git a/vignettes/swath_tomato.Rmd_notrun b/vignettes/swath_tomato.Rmd_notrun new file mode 100644 index 000000000..480b40428 --- /dev/null +++ b/vignettes/swath_tomato.Rmd_notrun @@ -0,0 +1,67 @@ +--- +title: "SWATH data analysis with xcms (Tomato)" +package: xcms +output: + BiocStyle::html_document: + toc_float: true +vignette: > + %\VignetteIndexEntry{SWATH data analysis with xcms (Tomato)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + %\VignetteDepends{xcms,RColorBrewer,faahKO,pander,magrittr,BiocStyle,pheatmap} + %\VignettePackage{xcms} + %\VignetteKeywords{mass spectrometry, metabolomics} +bibliography: references.bib +csl: biomed-central.csl +references: +- id: dummy + title: no title + author: + - family: noname + given: noname +--- + +```{r biocstyle, echo = FALSE, results = "asis" } +BiocStyle::markdown() +``` + +**Package**: `r Biocpkg("xcms")`
+**Authors**: Johannes Rainer, Michael Witting
+**Modified**: `r file.info("swath_tomato.Rmd")$mtime`
+**Compiled**: `r date()` + +```{r init, message = FALSE, echo = FALSE, results = "hide" } +## Silently loading all packages +library(BiocStyle) +library(xcms) +library(pander) +register(SerialParam()) +``` + + +```{r metabolites} +library(Risa) +ISAmtbls297 <- readISAtab(find.package("mtbls297")) +assay <- ISAmtbls297@assay.tabs[[1]] +msfiles <- paste(find.package("mtbls297"), "mzML", + assay@assay.file$"Derived Spectral Data File", + sep="/") + +tomato <- readMSData(msfiles, mode = "onDisk") + +table(msLevel(tomato)) +head(isolationWindowLowerMz(tomato)) +head(isolationWindowUpperMz(tomato)) + +head(fData(tomato)[, c("isolationWindowTargetMZ", "isolationWindowLowerOffset", + "isolationWindowUpperOffset", "msLevel", "retentionTime")]) + +cwp <- CentWaveParam(ppm = 25, peakwidth = c(10, 20), snthresh = 10, + prefilter = c(3, 100)) +tomato <- findChromPeaks(tomato, cwp) +tomato <- findChromPeaksIsolationWindow(tomato, cwp) + +chromPeakData(tomato) +table(chromPeakData(tomato)$isolationWindow) + +``` diff --git a/vignettes/xcms-direct-injection.Rmd b/vignettes/xcms-direct-injection.Rmd index c9ba92e90..d21d0c391 100644 --- a/vignettes/xcms-direct-injection.Rmd +++ b/vignettes/xcms-direct-injection.Rmd @@ -1,6 +1,6 @@ --- title: "Grouping FTICR-MS data with xcms" -author: +author: - name: Joachim Bargsten - name: Johannes Rainer package: xcms @@ -12,7 +12,7 @@ vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteKeywords{Mass Spectrometry, MS, Metabolomics, Bioinformatics} %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,msdata,MassSpecWavelet,BiocStyle} + %\VignetteDepends{xcms,msdata,MassSpecWavelet,BiocStyle,signal} --- ```{r style, echo = FALSE, results = 'asis'} @@ -37,8 +37,8 @@ identified in the chromatographic (time) dimension, in direct injection mass spec data sets peaks are identified in the m/z dimension. `r Biocpkg("xcms")` uses functionality from the `MassSpecWavelet` package to identify such peaks. -Below we load the required packages. We disable parallel processing. To enable -and customize parallel processing please see the `BiocParallel` vignette. +Below we load the required packages. For information on the parallel processing +setup please see the `BiocParallel` vignette. ```{r load-libs, message = FALSE, results = "hide"} library(xcms) @@ -46,7 +46,6 @@ library(MassSpecWavelet) register(SerialParam()) - ``` In this documentation we use an example data set from the `r Biocpkg("msdata")` @@ -55,16 +54,19 @@ the package and load the data set. We create also a `data.frame` describing the experimental setup based on the file names. ```{r load-data, message = FALSE, results = "hide"} -mzdata_path <- system.file("fticr", package = "msdata") -mzdata_files <- list.files(mzdata_path, recursive = TRUE, full.names = TRUE) +mzML_path <- system.file("fticr-mzML", package = "msdata") +mzML_files <- list.files(mzML_path, recursive = TRUE, full.names = TRUE) + +## We're subsetting to 2 samples per condition +mzML_files <- mzML_files[c(1, 2, 6, 7)] ## Create a data.frame assigning samples to sample groups, i.e. ham4 and ham5. -grp <- rep("ham4", length(mzdata_files)) -grp[grep(basename(mzdata_files), pattern = "^HAM005")] <- "ham5" -pd <- data.frame(filename = basename(mzdata_files), sample_group = grp) +grp <- rep("ham4", length(mzML_files)) +grp[grep(basename(mzML_files), pattern = "^HAM005")] <- "ham5" +pd <- data.frame(filename = basename(mzML_files), sample_group = grp) ## Load the data. -ham_raw <- readMSData(files = mzdata_files, +ham_raw <- readMSData(files = mzML_files, pdata = new("NAnnotatedDataFrame", pd), mode = "onDisk") ``` @@ -136,7 +138,7 @@ first_file_calibrated <- calibrate(first_file, param = prm) To evaluate the calibration we plot below the difference between the adjusted and raw m/z values (y-axis) against the raw m/z values. -```{r calibration-result, fig = TRUE, fig.align = "center"} +```{r calibrationresult, fig = TRUE, fig.width = 6, fig.height = 5, fig.align = "center"} diffs <- chromPeaks(first_file_calibrated)[, "mz"] - chromPeaks(first_file)[, "mz"] @@ -183,7 +185,7 @@ subsequently use the `filterMz` function to sub-set the full data set to the signal associated with the feature's peaks. On that object we can then call the `mz` and `intensity` functions to extract the data. -```{r feature-FT01, fig = TRUE, fig.width = 6, fig.height = 4, fig.align = "center"} +```{r feature1, fig = TRUE, fig.width = 6, fig.height = 4, fig.align = "center"} ## Get the peaks belonging to the first feature pks <- chromPeaks(ham_prep)[featureDefinitions(ham_prep)$peakidx[[1]], ] @@ -239,4 +241,3 @@ head(featureValues(ham_prep, value = "into")) Further analysis, i.e. detection of features/metabolites with significantly different abundances, or PCA analyses can be performed on the feature matrix using functionality from other R packages, such as `r Biocpkg("limma")`. - diff --git a/vignettes/xcms-lcms-ms.Rmd b/vignettes/xcms-lcms-ms.Rmd new file mode 100644 index 000000000..270000cdf --- /dev/null +++ b/vignettes/xcms-lcms-ms.Rmd @@ -0,0 +1,757 @@ +--- +title: "LC-MS/MS data analysis with xcms" +package: xcms +output: + BiocStyle::html_document: + toc_float: true + includes: + in_header: xcms-lcms-ms.bioschemas.html +vignette: > + %\VignetteIndexEntry{LC-MS/MS data analysis with xcms} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + %\VignetteDepends{xcms,msdata,BiocStyle,magrittr,pander,Spectra,MsBackendMgf} + %\VignettePackage{xcms} + %\VignetteKeywords{mass spectrometry, metabolomics} +bibliography: references.bib +csl: biomed-central.csl +--- + +```{r biocstyle, echo = FALSE, results = "asis"} +BiocStyle::markdown() +``` + +**Package**: `r Biocpkg("xcms")`
+**Authors**: Johannes Rainer, Michael Witting
+**Modified**: `r file.info("xcms-lcms-ms.Rmd")$mtime`
+**Compiled**: `r date()` + +```{r init, message = FALSE, echo = FALSE, results = "hide"} +## Silently loading all packages +library(BiocStyle) +library(xcms) +library(Spectra) +library(pander) +register(SerialParam()) + +``` + +# Introduction + +Metabolite identification is an important step in non-targeted metabolomics and +requires different steps. One involves the use of tandem mass spectrometry to +generate fragmentation spectra of detected metabolites (LC-MS/MS), which are +then compared to fragmentation spectra of known metabolites. Different +approaches exist for the generation of these fragmentation spectra, whereas the +most used is data dependent acquisition (DDA) also known as the top-n method. In +this method the top N most intense m/z values from a MS1 scan are selected for +fragmentation in the next N scans before the cycle starts again. This method +allows to generate clean MS2 fragmentation spectra on the fly during acquisition +without the need for further experiments, but suffers from poor coverage of the +detected metabolites (since only a limited number of ions are fragmented). + +Data independent approaches (DIA) like Bruker bbCID, Agilent AllIons or Waters +MSe don't use such a preselection, but rather fragment all detected molecules at +once. They are using alternating schemes with scan of low and high collision +energy to collect MS1 and MS2 data. Using this approach, there is no problem in +coverage, but the relation between the precursor and fragment masses is lost +leading to chimeric spectra. Sequential Window Acquisition of all Theoretical +Mass Spectra (or SWATH [@Ludwig:2018hv]) combines both approaches through a +middle-way approach. There is no precursor selection and acquisition is +independent of acquired data, but rather than isolating all precusors at once, +defined windows (i.e. ranges of m/z values) are used and scanned. This reduces +the overlap of fragment spectra while still keeping a high coverage. + +This document showcases the analysis of two small LC-MS/MS data sets using +`r Biocpkg("xcms")`. The data files used are reversed-phase LC-MS/MS runs from the +Agilent Pesticide mix obtained from a Sciex 6600 Triple ToF operated in SWATH +acquisition mode. For comparison a DDA file from the same sample is included. + +# Analysis of DDA data + +Below we load the example DDA data set using the `readMSData` function from the +`r Biocpkg("MSnbase")` package. + +```{r load-dda-data, message = FALSE} +library(xcms) + +dda_file <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", + package = "msdata") +dda_data <- readMSData(dda_file, mode = "onDisk") +``` + +```{r subset-dda, echo = FALSE, message = FALSE, eval = TRUE} +#' Silently sub-setting the object to speed-up analysis +dda_data <- filterRt(dda_data, rt = c(200, 600)) +``` + +The variable `dda_data` contains now all MS1 and MS2 spectra from the specified +mzML file. The number of spectra for each MS level is listed below. + +```{r dda-table-mslevel} +table(msLevel(dda_data)) +``` + +For the MS2 spectra we can get the m/z of the precursor ion with the +`precursorMz` function. Below we first filter the data set by MS level, extract +the precursor m/z and call `head` to just show the first 6 elements. For easier +readability we use the forward pipe operator `%>%` from the `magrittr` package. + +```{r precursor} +library(magrittr) + +dda_data %>% + filterMsLevel(2L) %>% + precursorMz() %>% + head() +``` + +With the `precursorIntensity` function it is also possible to extract the +intensity of the precursor ion. + +```{r precursor-intensity} +dda_data %>% + filterMsLevel(2L) %>% + precursorIntensity() %>% + head() +``` + +Some manufacturers (like Sciex for the present test data) don't define/export +the precursor intensity and thus either `NA` or `0` is reported. We can however +use the `estimatePrecursorIntensity` function from the `xcms` package to +determine the precursor intensity for a MS 2 spectrum based on the intensity of +the respective ion in the previous MS1 scan (note that with +`method = "interpolation"` the precursor intensity would be defined based on +interpolation between the intensity in the previous and subsequent MS1 scan). +Below we estimate the precursor intensities, on the full data (for MS1 spectra +a `NA` value is reported). Note also that we use `xcms::` to call the function +from the `xcms` package, because a function with the same name is also +implemented in the `Spectra` package, which would however not support +`OnDiskMSnExp` objects as input. + +```{r estimate-precursor} +prec_int <- xcms::estimatePrecursorIntensity(dda_data) +``` + +We next set the precursor intensity in the spectrum metadata of `dda_data`. So +that it can be extracted later with the `precursorIntensity` function. + +```{r set-precursor-intensity} +fData(dda_data)$precursorIntensity <- prec_int + +dda_data %>% + filterMsLevel(2L) %>% + precursorIntensity() %>% + head() +``` + +Next we perform the chromatographic peak detection on the MS level 1 data with +the `findChromPeaks` method. Below we define the settings for a *centWave*-based +peak detection and perform the analysis. + +```{r dda-find-chrom-peaks-ms1, message = FALSE} +cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 30)) +dda_data <- findChromPeaks(dda_data, param = cwp) +``` + +In total `r nrow(chromPeaks(dda_data))` peaks were identified in the present +data set. + +The advantage of LC-MS/MS data is that (MS1) ions are fragmented and the +corresponding MS2 spectra measured. Thus, for some of the ions (identified as +MS1 chromatographic peaks) MS2 spectra are available. These can facilitate the +annotation of the respective MS1 chromatographic peaks (or MS1 features after a +correspondence analysis). Spectra for identified chromatographic peaks can be +extracted with the `chromPeakSpectra` method. MS2 spectra with their precursor +m/z and retention time within the rt and m/z range of the chromatographic peak +are returned. Parameter `return.type` allows to define in which format these are +returned. With `return.type = "List"` or `return.type = "Spectra"` the data is +represented by a `Spectra` object from the `r BiocStyle::Biocpkg("Spectra")`. + +```{r dda-spectra, message = FALSE, eval = TRUE} +library(Spectra) +dda_spectra <- chromPeakSpectra( + dda_data, msLevel = 2L, return.type = "Spectra") +dda_spectra +``` + +By default `chromPeakSpectra` returns all spectra associated with a MS1 +chromatographic peak, but parameter `method` allows to choose and return only +one spectrum per peak (have a look at the `?chromPeakSpectra` help page for more +details). Also, it would be possible to extract MS1 spectra for each peak by +specifying `msLevel = 1L` in the call above (e.g. to evaluate the full MS1 +signal at the peak's apex position). + +In the example above we selected to return the data as a `Spectra` +object. Spectra variables `"peak_id"` and `"peak_index"` contain the identifiers +and the index (in the `chromPeaks` matrix) of the chromatographic peaks the MS2 +spectrum is associated with. + +```{r peak_id, eval = TRUE} +dda_spectra$peak_id +``` + +Note also that with `return.type = "List"` a list parallel to the `chromPeaks` +matrix would be returned, i.e. each element in that list would contain the +spectra for the chromatographic peak with the same index. This data +representation might eventually simplify further processing. + +We next use the MS2 information to aid in the annotation of a chromatographic +peak. As an example we use a chromatographic peak of an ion with an m/z of +304.1131 which we extract in the code block below. + +```{r dda-ms2-example, message = FALSE, eval = TRUE} +ex_mz <- 304.1131 +chromPeaks(dda_data, mz = ex_mz, ppm = 20) +``` + +A search of potential ions with a similar m/z in a reference database +(e.g. [Metlin](https://metlin.scripps.edu)) returned a large list of potential +hits, most with a very small ppm. For two of the hits, +[Flumazenil](https://en.wikipedia.org/wiki/Flumazenil) (Metlin ID 2724) and +[Fenamiphos](https://en.wikipedia.org/wiki/Fenamiphos) (Metlin ID 72445) +experimental MS2 spectra are available. Thus, we could match the MS2 spectrum +for the identified chromatographic peak against these to annotate our ion. Below +we extract all MS2 spectra that were associated with the candidate +chromatographic peak using the ID of the peak in the present data set. + +```{r dda-ms2-get-ms2, message = FALSE, eval = TRUE} +ex_id <- rownames(chromPeaks(dda_data, mz = ex_mz, ppm = 20)) +ex_spectra <- dda_spectra[dda_spectra$peak_id == ex_id] +ex_spectra +``` + +There are 5 MS2 spectra representing fragmentation of the ion(s) measured +in our candidate chromatographic peak. We next reduce this to a single MS2 +spectrum using the `combineSpectra` method employing the `combinePeaks` +function to determine which peaks to keep in the resulting spectrum (have a look +at the `?combinePeaks` help page for details). Parameter `f` allows to specify +which spectra in the input object should be combined into one. + +```{r dda-ms2-consensus, message = FALSE, eval = TRUE} +ex_spectrum <- combineSpectra(ex_spectra, FUN = combinePeaks, ppm = 20, + peaks = "intersect", minProp = 0.8, + intensityFun = median, mzFun = median, + f = ex_spectra$peak_id) +ex_spectrum +``` + +Mass peaks from all input spectra with a difference in m/z smaller 20 ppm +(parameter `ppm`) were combined into one peak and the median m/z and intensity +is reported for these. Due to parameter `minProp = 0.8`, the resulting MS2 +spectrum contains only peaks that were present in 80% of the input spectra. + +A plot of this *consensus* spectrum is shown below. + +```{r dda-ms2-consensus-plot, message = FALSE, fig.cap = "Consensus MS2 spectrum created from all measured MS2 spectra for ions of chromatographic peak CP53.", fig.width = 8, fig.height = 8, eval = TRUE} +plotSpectra(ex_spectrum) +``` + +We could now match the consensus spectrum against a database of MS2 spectra. In +our example we simply load MS2 spectra for the two compounds with matching m/z +exported from Metlin. For each of the compounds MS2 spectra created with +collision energies of 0V, 10V, 20V and 40V are available. Below we import the +respective data and plot our candidate spectrum against the MS2 spectra of +Flumanezil and Fenamiphos (from a collision energy of 20V). To import files in +MGF format we have to load the `MsBackendMgf` R package which adds MGF file +support to the `Spectra` package. This package can be installed with +`BiocManager::install("RforMassSpectrometry/MsBackendMgf")`. + +Prior plotting we *normalize* our experimental spectra. + +```{r normalize, eval = TRUE} +norm_fun <- function(z, ...) { + z[, "intensity"] <- z[, "intensity"] / + max(z[, "intensity"], na.rm = TRUE) * 100 + z +} +ex_spectrum <- addProcessing(ex_spectrum, FUN = norm_fun) +``` + +```{r dda-ms2-metlin-match, fig.cap = "Mirror plots for the candidate MS2 spectrum against Flumanezil (left) and Fenamiphos (right). The upper panel represents the candidate MS2 spectrum, the lower the target MS2 spectrum. Matching peaks are indicated with a dot.", fig.width = 12, fig.height = 6, eval = TRUE} +library(MsBackendMgf) +flumanezil <- Spectra( + system.file("mgf", "metlin-2724.mgf", package = "xcms"), + source = MsBackendMgf()) +fenamiphos <- Spectra( + system.file("mgf", "metlin-72445.mgf", package = "xcms"), + source = MsBackendMgf()) + +par(mfrow = c(1, 2)) +plotSpectraMirror(ex_spectrum, flumanezil[3], main = "against Flumanezil", + ppm = 40) +plotSpectraMirror(ex_spectrum, fenamiphos[3], main = "against Fenamiphos", + ppm = 40) +``` + +Our candidate spectrum matches Fenamiphos, thus, our example chromatographic +peak represents signal measured for this compound. In addition to plotting the +spectra, we can also calculate similarities between them with the +`compareSpectra` method (which uses by default the normalized dot-product to +calculate the similarity). + +```{r dda-ms2-dotproduct, eval = TRUE} +compareSpectra(ex_spectrum, flumanezil, ppm = 40) +compareSpectra(ex_spectrum, fenamiphos, ppm = 40) +``` + +Clearly, the candidate spectrum does not match Flumanezil, while it has a high +similarity to Fenamiphos. While we performed here the MS2-based annotation on a +single chromatographic peak, this could be easily extended to the full list of +MS2 spectra (returned by `chromPeakSpectra`) for all chromatographic peaks in an +experiment. See also [here](https://jorainer.github.io/SpectraTutorials/). + +In the present example we used only a single data file and we did thus not need +to perform a sample alignment and correspondence analysis. These tasks could +however be performed similarly to *plain* LC-MS data, retention times of +recorded MS2 spectra would however also be adjusted during alignment based on +the MS1 data. After correspondence analysis (peak grouping) MS2 spectra for +*features* can be extracted with the `featureSpectra` function which returns all +MS2 spectra associated with any chromatographic peak of a feature. + +Note also that this workflow can be included into the *Feature-Based +Molecular Networking* +[FBMN](https://ccms-ucsd.github.io/GNPSDocumentation/featurebasedmolecularnetworking/) +to match MS2 spectra against [GNPS](https://gnps.ucsd.edu/). See +[here](https://ccms-ucsd.github.io/GNPSDocumentation/featurebasedmolecularnetworking-with-xcms3/) +for more details and examples. + + + +# DIA (SWATH) data analysis + +In this section we analyze a small SWATH data set consisting of a single mzML +file with data from the same sample analyzed in the previous section but +recorded in SWATH mode. We again read the data with the `readMSData` +function. The resulting object will contain all recorded MS1 and MS2 +spectra in the specified file. + +```{r load-swath-data, message = FALSE} +swath_file <- system.file("TripleTOF-SWATH", + "PestMix1_SWATH.mzML", + package = "msdata") + +swath_data <- readMSData(swath_file, mode = "onDisk") +``` + +```{r echo = FALSE, message = FALSE} +swath_data <- filterRt(swath_data, rt = c(200, 600)) +``` + +Below we determine the number of MS level 1 and 2 spectra in the present data +set. + +```{r swath-table-mslevel} +table(msLevel(swath_data)) +``` + +As described in the introduction, in SWATH mode all ions within pre-defined +isolation windows are fragmented and MS2 spectra measured. The definition of +these isolation windows (SWATH pockets) is imported from the mzML files and +stored in the object's `fData` (which provides additional annotations for each +individual spectrum). Below we inspect the respective information for the first +few spectra. The upper and lower isolation window m/z can be extracted with the +`isolationWindowLowerMz` and `isolationWindowUpperMz`. + +```{r fdata-isolationwindow} +head(fData(swath_data)[, c("isolationWindowTargetMZ", + "isolationWindowLowerOffset", + "isolationWindowUpperOffset", + "msLevel", "retentionTime")]) + +head(isolationWindowLowerMz(swath_data)) +head(isolationWindowUpperMz(swath_data)) +``` + +In the present data set we use the value of the *isolation window target m/z* to +define the individual SWATH pockets. Below we list the number of spectra that +are recorded in each pocket/isolation window. + +```{r} +table(isolationWindowTargetMz(swath_data)) +``` + +We have thus 1,000 MS2 spectra measured in each isolation window. + +Note that also DIA data from other manufacturers (e.g. Waters MSe) are supported +as long as a spectra variable `isolationWindowTargetMZ` is available. If that +variable is empty it can also manually be initialized as shown below (assuming +that for the `precursorMz` of MS2 spectra the a constant value per isolation +window (usually the m/z in the middle between the used lower and upper m/z of +the isolation window). + +```{r, eval = FALSE} +fData(swath_data)$isolationWindowTargetMZ <- precursorMz(swath_data) +``` + + +## Chromatographic peak detection in MS1 and MS2 data + +Similar to a *conventional* LC-MS analysis, we perform first a chromatographic +peak detection (on the MS level 1 data) with the `findChromPeaks` method. Below +we define the settings for a *centWave*-based peak detection and perform the +analysis. + +```{r find-chrom-peaks-ms1, message = FALSE} +cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 30)) +swath_data <- findChromPeaks(swath_data, param = cwp) +``` + +Next we perform a chromatographic peak detection in the MS level 2 data of each +isolation window. We use the `findChromPeaksIsolationWindow` function employing +the same peak detection algorithm reducing however the required signal-to-noise +ratio. The `isolationWindow` parameter allows to specify which MS2 spectra +belong to which isolation window and hence defines in which set of MS2 spectra +chromatographic peak detection should be performed. While the default value for +this parameter uses isolation windows provided by calling +`isolationWindowTargetMz` on the object, it would also be possible to manually +define the isolation windows, e.g. if the corresponding information is not +available in the input mzML files. + +```{r find-chrom-peaks-ms2, message = FALSE} +cwp <- CentWaveParam(snthresh = 3, noise = 10, ppm = 10, + peakwidth = c(3, 30)) +swath_data <- findChromPeaksIsolationWindow(swath_data, param = cwp) +``` + +The `findChromPeaksIsolationWindow` function added all peaks identified in the +individual isolation windows to the `chromPeaks` matrix containing already the +MS1 chromatographic peaks. These newly added peaks can be identified by the +value of the `"isolationWindow"` column in the corresponding row in +`chromPeakData`, which lists also the MS level in which the peak was identified. + +```{r} +chromPeakData(swath_data) +``` + +Below we count the number of chromatographic peaks identified within each +isolation window (the number of chromatographic peaks identified in MS1 is +`r sum(chromPeakData(swath_data)$ms_level == 1)`). + +```{r} +table(chromPeakData(swath_data)$isolationWindow) +``` + +We thus successfully identified chromatographic peaks in the different MS levels +and isolation windows, but don't have any actual MS2 *spectra* yet. These have +to be reconstructed from the available chromatographic peak data which we will +done in the next section. + + +## Reconstruction of MS2 spectra + +Identifying the signal of the fragment ions for the precursor measured by each +MS1 chromatographic peak is a non-trivial task. The MS2 spectrum of the fragment +ion for each MS1 chromatographic peak has to be reconstructed from the available +MS2 signal (i.e. the chromatographic peaks identified in MS level 2). For SWATH +data, fragment ion signal should be present in the isolation window that +contains the m/z of the precursor ion and the chromatographic peak shape of the +MS2 chromatographic peaks of fragment ions of a specific precursor should have a +similar retention time and peak shape than the precursor's MS1 chromatographic +peak. + +After detection of MS1 and MS2 chromatographic peaks has been performed, we can +reconstruct the MS2 spectra using the `reconstructChromPeakSpectra` +function. This function defines an MS2 spectrum for each MS1 chromatographic +peak based on the following approach: + +- Identify MS2 chromatographic peaks in the isolation window containing the m/z + of the ion (the MS1 chromatographic peak) that have approximately the same + retention time than the MS1 chromatographic peak (the accepted difference in + retention time can be defined with the `diffRt` parameter). +- Extract the MS1 chromatographic peak and all MS2 chromatographic peaks + identified by the previous step and correlate the peak shapes of the candidate + MS2 chromatographic peaks with the shape of the MS1 peak. MS2 chromatographic + peaks with a correlation coefficient larger than `minCor` are retained. +- Reconstruct the MS2 spectrum using the m/z of all above selected MS2 + chromatographic peaks and their intensity; each MS2 chromatographic peak + selected for an MS1 peak will thus represent one **mass peak** in the + reconstructed spectrum. + +To illustrate this process we perform the individual steps on the example of +Fenamiphos (exact mass 303.105800777 and m/z of [M+H]+ adduct 304.113077). As +a first step we extract the chromatographic peak for this ion. + +```{r fena-extract-peak} +fenamiphos_mz <- 304.113077 +fenamiphos_ms1_peak <- chromPeaks(swath_data, mz = fenamiphos_mz, ppm = 2) +fenamiphos_ms1_peak +``` + +Next we identify all MS2 chromatographic peaks that were identified in the +isolation window containing the m/z of Fenamiphos. The information on the +isolation window in which a chromatographic peak was identified is available in +the `chromPeakData` (which contains arbitrary additional annotations to each +individual chromatographic peak). + +```{r fena-identify-ms2} +keep <- chromPeakData(swath_data)$isolationWindowLowerMz < fenamiphos_mz & + chromPeakData(swath_data)$isolationWindowUpperMz > fenamiphos_mz +``` + +We also require the retention time of the MS2 chromatographic peaks to be +similar to the retention time of the MS1 peak and extract the corresponding peak +information. + +```{r fena-check-rt} +keep <- keep & + chromPeaks(swath_data)[, "rtmin"] < fenamiphos_ms1_peak[, "rt"] & + chromPeaks(swath_data)[, "rtmax"] > fenamiphos_ms1_peak[, "rt"] + +fenamiphos_ms2_peak <- chromPeaks(swath_data)[which(keep), ] +``` + +In total `r sum(keep, na.rm = TRUE)` MS2 chromatographic peaks match all the +above condition. Next we extract their corresponding ion chromatograms, as well +as the ion chromatogram of the MS1 peak. In addition we have to filter the +object first by isolation window, keeping only spectra that were measured in +that specific window and to specify to extract the chromatographic data from MS2 +spectra (with `msLevel = 2L`). + +```{r fena-eic-extract, warning = FALSE} +rtr <- fenamiphos_ms1_peak[, c("rtmin", "rtmax")] +mzr <- fenamiphos_ms1_peak[, c("mzmin", "mzmax")] +fenamiphos_ms1_chr <- chromatogram(swath_data, rt = rtr, mz = mzr) + +rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] +mzr <- fenamiphos_ms2_peak[, c("mzmin", "mzmax")] +fenamiphos_ms2_chr <- chromatogram( + filterIsolationWindow(swath_data, mz = fenamiphos_mz), + rt = rtr, mz = mzr, msLevel = 2L) +``` + +We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 +data. + +```{r fena-eic-plot, fig.width = 10, fig.height = 5, fig.cap = "Extracted ion chromatograms for Fenamiphos from MS1 (blue) and potentially related signal in MS2 (grey)."} +plot(rtime(fenamiphos_ms1_chr[1, 1]), + intensity(fenamiphos_ms1_chr[1, 1]), + xlab = "retention time [s]", ylab = "intensity", pch = 16, + ylim = c(0, 5000), col = "blue", type = "b", lwd = 2) +#' Add data from all MS2 peaks +tmp <- lapply(fenamiphos_ms2_chr@.Data, + function(z) points(rtime(z), intensity(z), + col = "#00000080", + type = "b", pch = 16)) +``` + +Next we can calculate correlations between the peak shapes of each MS2 +chromatogram with the MS1 peak. We perform the correlation below for one of the +MS2 chromatographic peaks. Note that, because spectra are recorded +consecutively, the retention times of the individual data points will differ for +the MS2 and MS1 chromatographic data and data points have thus to be matched +(aligned) before performing the correlation analysis. This is done automatically +by the `correlate` function. See the help for the `align` method for more +information on alignment options. + +```{r fena-cor} +correlate(fenamiphos_ms2_chr[1, 1], + fenamiphos_ms1_chr[1, 1], align = "approx") +``` + +After identifying the MS2 chromatographic peaks with shapes of enough high +similarity to the MS1 chromatographic peaks, an MS2 spectrum could be +*reconstructed* based on the m/z and intensities of the MS2 chromatographic +peaks. + +The `reconstructChromPeakSpectra` function performs the above analysis for each +individual MS1 chromatographic peak in a SWATH data set. Below we reconstruct +MS2 spectra for our example data requiring a peak shape correlation higher than +`0.9` between the candidate MS2 chromatographic peak and the target MS1 +chromatographic peak. Again, we use `return.type = "Spectra"` to return the +results as a `Spectra` object (instead to the default, but older/obsolete +`MSpectra` object). + +```{r reconstruct-ms2, message = FALSE} +swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9, + return.type = "Spectra") +swath_spectra +``` + +As a result we got a `Spectra` object of length equal to the number of MS1 peaks +in our data. A `peaksCount` of `0` indicates that no MS2 spectrum could be +defined based on the used settings. For reconstructed spectra additional +annotations are available such as the IDs of the MS2 chromatographic peaks from +which the spectrum was reconstructed (`"ms2_peak_id"`) as well as the +correlation coefficient of their chromatographic peak shape with the precursor's +shape (`"ms2_peak_cor"`). Metadata column `"peak_id"` contains the ID of the MS1 +chromatographic peak: + +```{r} +swath_spectra$ms2_peak_id +swath_spectra$peak_id +``` + +We next extract the MS2 spectrum for our example peak most likely representing +[M+H]+ ions of Fenamiphos using its chromatographic peak ID: + +```{r fena-swath-peak} +fenamiphos_swath_spectrum <- swath_spectra[ + swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] +``` + +We can now compare the reconstructed spectrum to the example consensus spectrum +from the DDA experiment in the previous section (variable `ex_spectrum`) as well +as to the MS2 spectrum for Fenamiphos from Metlin (with a collision energy of +10V). For better visualization we *normalize* also the peak intensities of the +reconstructed SWATH spectrum with the same function we used for the experimental +DDA spectrum. + +```{r} +fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, + norm_fun) +``` + +```{r fena-swath-plot, fig.cap = "Mirror plot comparing the reconstructed MS2 spectrum for Fenamiphos (upper panel) against the measured spectrum from the DDA data and the Fenamiphhos spectrum from Metlin.", fig.width = 12, fig.height = 6} +par(mfrow = c(1, 2)) +plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, + ppm = 50, main = "against DDA") +plotSpectraMirror(fenamiphos_swath_spectrum, fenamiphos[2], + ppm = 50, main = "against Metlin") +``` + +If we wanted to get the EICs for the MS2 chromatographic peaks used to generate +this MS2 spectrum we can use the IDs of these peaks which are provided with +`$ms2_peak_id` of the result spectrum. + +```{r} +pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] +pk_ids +``` + +With these peak IDs available we can extract their retention time window and m/z +ranges from the `chromPeaks` matrix and use the `chromatogram` function to +extract their EIC. Note however that for SWATH data we have MS2 signal from +different isolation windows. Thus we have to first filter the `swath_data` +object by the isolation window containing the precursor m/z with the +`filterIsolationWindow` to subset the data to MS2 spectra related to the ion of +interest. In addition, we have to use `msLevel = 2L` in the `chromatogram` call +because `chromatogram` extracts by default only data from MS1 spectra. + +```{r} +rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] +mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] + +pmz <- precursorMz(fenamiphos_swath_spectrum)[1] +swath_data_iwindow <- filterIsolationWindow(swath_data, mz = pmz) +ms2_eics <- chromatogram(swath_data_iwindow, rt = rt_range, + mz = mz_range, msLevel = 2L) +``` + +Each row of this `ms2_eics` contains now the EIC of one of the MS2 +chromatographic peaks. + +As a second example we analyze the signal from an [M+H]+ ion with an m/z of +376.0381 (which would match +[Prochloraz](https://en.wikipedia.org/wiki/Prochloraz)). We first identify the +MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 +spectrum for that peak. + +```{r pro-swath} +prochloraz_mz <- 376.0381 + +prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, + mz = prochloraz_mz, ppm = 5) +prochloraz_ms1_peak + +prochloraz_swath_spectrum <- swath_spectra[ + swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] +``` + +In addition we identify the corresponding MS1 peak in the DDA data set, extract +all measured MS2 chromatographic peaks and build the consensus spectrum from +these. + +```{r pro-dda} +prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, + mz = prochloraz_mz, ppm = 5) +prochloraz_dda_peak +``` + +The retention times for the chromatographic peaks from the DDA and SWATH data +match almost perfectly. Next we get the MS2 spectra for this peak. + +```{r pro-dda-ms2} +prochloraz_dda_spectra <- dda_spectra[ + dda_spectra$peak_id == rownames(prochloraz_dda_peak)] +prochloraz_dda_spectra +``` + +In total 5 spectra were measured, some with a relatively high number of +peaks. Next we combine them into a consensus spectrum. + +```{r pro-dda-consensus} +prochloraz_dda_spectrum <- combineSpectra( + prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, + peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, + f = prochloraz_dda_spectra$peak_id) +``` + +At last we load also the Prochloraz MS2 spectra (for different collision +energies) from Metlin. + +```{r prochloraz-metlin} +prochloraz <- Spectra( + system.file("mgf", "metlin-68898.mgf", package = "xcms"), + source = MsBackendMgf()) +``` + +To validate the reconstructed spectrum we plot it against the corresponding DDA +spectrum and the MS2 spectrum for Prochloraz (for a collision energy of 10V) +from Metlin. + +```{r pro-swath-plot, fig.cap = "Mirror plot comparing the reconstructed MS2 spectrum for Prochloraz (upper panel) against the measured spectrum from the DDA data and the Prochloraz spectrum from Metlin.", fig.width = 12, fig.height = 6} +prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, norm_fun) +prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, norm_fun) + +par(mfrow = c(1, 2)) +plotSpectraMirror(prochloraz_swath_spectrum, prochloraz_dda_spectrum, + ppm = 40, main = "against DDA") +plotSpectraMirror(prochloraz_swath_spectrum, prochloraz[2], + ppm = 40, main = "against Metlin") +``` + +The spectra fit relatively well. Interestingly, the peak representing the +precursor (the right-most peak) seems to have a slightly shifted m/z value in +the reconstructed spectrum. + +Similar to the DDA data, the reconstructed MS2 spectra from SWATH data could be +used in the annotation of the MS1 chromatographic peaks. + + + + + + + + + + + +# Outlook + +Currently, spectra data representation, handling and processing is being +re-implemented as part of the +[RforMassSpectrometry](https://rformassspectrometry.org) initiative aiming at +increasing the performance of methods and simplifying their use. Thus, parts of +the workflow described here will be changed (improved) in future. + +Along with these developments, improved matching strategies for larger data sets +will be implemented as well as functionality to compare `Spectra` directly to +reference MS2 spectra from public annotation resources (e.g. Massbank or +HMDB). See for example [here](https://jorainer.github.io/SpectraTutorials) for +more information. + +Regarding SWATH data analysis, future development will involve improved +selection of the correct MS2 chromatographic peaks considering also correlation +with intensity values across several samples. + +# Session information + +```{r sessionInfo} +sessionInfo() +``` + +# References diff --git a/vignettes/xcms-lcms-ms.bioschemas.html b/vignettes/xcms-lcms-ms.bioschemas.html new file mode 100644 index 000000000..d43a36427 --- /dev/null +++ b/vignettes/xcms-lcms-ms.bioschemas.html @@ -0,0 +1,31 @@ + diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 3ef9db994..acbed7829 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -4,25 +4,21 @@ package: xcms output: BiocStyle::html_document: toc_float: true + includes: + in_header: xcms.bioschemas.html vignette: > %\VignetteIndexEntry{LCMS data preprocessing and analysis with xcms} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,RColorBrewer,faahKO,pander,magrittr} + %\VignetteDepends{xcms,RColorBrewer,faahKO,pander,magrittr,BiocStyle,pheatmap,SummarizedExperiment} %\VignettePackage{xcms} %\VignetteKeywords{mass spectrometry, metabolomics} bibliography: references.bib csl: biomed-central.csl -references: -- id: dummy - title: no title - author: - - family: noname - given: noname --- -```{r biocstyle, echo = FALSE, results = "asis" } -BiocStyle::markdown() +```{r biocstyle, echo = FALSE, results = "asis" } +BiocStyle::markdown() ``` **Package**: `r Biocpkg("xcms")`
@@ -30,7 +26,7 @@ BiocStyle::markdown() **Modified**: `r file.info("xcms.Rmd")$mtime`
**Compiled**: `r date()` -```{r init, message = FALSE, echo = FALSE, results = "hide" } +```{r init, message = FALSE, echo = FALSE, results = "hide"} ## Silently loading all packages library(BiocStyle) library(xcms) @@ -38,11 +34,12 @@ library(faahKO) library(pander) ## Use socket based parallel processing on Windows systems ## if (.Platform$OS.type == "unix") { -## register(bpstart(MulticoreParam(2))) +## register(bpstart(MulticoreParam(3))) ## } else { -## register(bpstart(SnowParam(2))) +## register(bpstart(SnowParam(3))) ## } -register(SerialParam()) +register(SerialParam()) + ``` @@ -51,64 +48,83 @@ register(SerialParam()) This documents describes data import, exploration, preprocessing and analysis of LCMS experiments with `xcms` version >= 3. The examples and basic workflow was adapted from the original *LC/MS Preprocessing and Analysis with xcms* vignette -from Colin A. Smith. +from Colin A. Smith. -The new user interface and methods use the `XCMSnExp` object (instead of the *old* -`xcmsSet` object) as a container for the pre-processing results. To support -packages and pipelines relying on the `xcmsSet` object, it is however possible to -convert an `XCMSnExp` into a `xcmsSet` object using the `as` method (i.e. `xset <- -as(x, "xcmsSet")`, with `x` being an `XCMSnExp` object. +The new user interface and methods use the `XCMSnExp` object (instead of the +*old* `xcmsSet` object) as a container for the pre-processing results. To +support packages and pipelines relying on the `xcmsSet` object, it is however +possible to convert an `XCMSnExp` into a `xcmsSet` object using the `as` method +(i.e.`xset <- as(x, "xcmsSet")`, with `x` being an `XCMSnExp` object. # Data import -`xcms` supports analysis of LC/MS data from files in (AIA/ANDI) NetCDF, mzML/mzXML -and mzData format. For the actual data import Bioconductor's SRC\_R[:exports -both]{Biocpkg("mzR")} is used. For demonstration purpose we will analyze a -subset of the data from [@Saghatelian04] in which the metabolic consequences -of knocking out the fatty acid amide hydrolase (FAAH) gene in mice was -investigated. The raw data files (in NetCDF format) are provided with the `faahKO` -data package. The data set consists of samples from the spinal cords of 6 -knock-out and 6 wild-type mice. Each file contains data in centroid mode -acquired in positive ion mode form 200-600 m/z and 2500-4500 seconds. - -Below we load all required packages, locate the raw CDF files within the `faahKO` -package and build a *phenodata* data frame describing the experimental setup. - -```{r load-libs-pheno, message = FALSE } +`xcms` supports analysis of LC/MS data from files in (AIA/ANDI) NetCDF, +mzXML and mzML format. For the actual data import Bioconductor's +`r Biocpkg("mzR")` is used. For demonstration purpose we will analyze a +subset of the data from [@Saghatelian04] in which the metabolic consequences of +knocking out the fatty acid amide hydrolase (FAAH) gene in mice was +investigated. The raw data files (in NetCDF format) are provided with the +`faahKO` data package. The data set consists of samples from the spinal cords of +6 knock-out and 6 wild-type mice. Each file contains data in centroid mode +acquired in positive ion mode form 200-600 m/z and 2500-4500 seconds. To speed +up processing of this vignette we will restrict the analysis to only 8 files and +to the retention time range from 2500 to 3500 seconds. + +Below we load all required packages, locate the raw CDF files within the +`faahKO` package and build a *phenodata* data frame describing the experimental +setup. Note that for *real* experiments it is suggested to define a file (table) +that contains the file names of the raw data files along with descriptions of +the samples for each file as additional columns. Such a file could then be +imported with e.g. `read.table` as variable `pd` (instead of being defined +within R as in the example below) and the file names could be passed along to +the `readMSData` function below with e.g. +`files = paste0(MZML_PATH, "/", pd$mzML_file)` where `MZML_PATH` would be the +path to directory in which the files are located and `"mzML_file"` the name of +the column in the phenodata file that contains the file names. + +```{r load-libs-pheno, message = FALSE } library(xcms) library(faahKO) library(RColorBrewer) library(pander) library(magrittr) +library(pheatmap) +library(SummarizedExperiment) ## Get the full path to the CDF files cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE, - recursive = TRUE) + recursive = TRUE)[c(1, 2, 5, 6, 7, 8, 11, 12)] ## Create a phenodata data.frame pd <- data.frame(sample_name = sub(basename(cdfs), pattern = ".CDF", - replacement = "", fixed = TRUE), - sample_group = c(rep("KO", 6), rep("WT", 6)), - stringsAsFactors = FALSE) + replacement = "", fixed = TRUE), + sample_group = c(rep("KO", 4), rep("WT", 4)), + stringsAsFactors = FALSE) ``` Subsequently we load the raw data as an `OnDiskMSnExp` object using the -`readMSData` method from the `MSnbase` package. While the `MSnbase` package was -originally developed for proteomics data processing, many of its functionality, -including raw data import and data representation, can be shared and reused in -metabolomics data analysis. Also, `MSnbase` can be used to *centroid* profile-mode -MS data (see the corresponding vignette in the `MSnbase` package). +`readMSData` method from the `r Biocpkg("MSnbase")` package. The `MSnbase` +provides based structures and infrastructure for the processing of mass +spectrometry data. Also, `MSnbase` can be used to *centroid* profile-mode MS +data (see the corresponding vignette in the `MSnbase` package). -```{r load-with-msnbase, message = FALSE } +```{r load-with-msnbase, message = FALSE } raw_data <- readMSData(files = cdfs, pdata = new("NAnnotatedDataFrame", pd), - mode = "onDisk") + mode = "onDisk") +``` + +We next restrict the data set to the retention time range from 2500 to 3500 +seconds. This is merely to reduce the processing time of this vignette. + +```{r subsetting, message = FALSE, echo = TRUE} +raw_data <- filterRt(raw_data, c(2500, 3500)) ``` -The `OnDiskMSnExp` object contains general information about the number of -spectra, retention times, the measured total ion current etc, but does not -contain the full raw data (i.e. the m/z and intensity values from each measured -spectrum). Its memory footprint is thus rather small making it an ideal object -to represent large metabolomics experiments while still allowing to perform +The resulting `OnDiskMSnExp` object contains general information about the +number of spectra, retention times, the measured total ion current etc, but does +not contain the full raw data (i.e. the m/z and intensity values from each +measured spectrum). Its memory footprint is thus rather small making it an ideal +object to represent large metabolomics experiments while allowing to perform simple quality controls, data inspection and exploration as well as data sub-setting operations. The m/z and intensity values are imported from the raw data files on demand, hence the location of the raw data files should not be @@ -121,60 +137,59 @@ The `OnDiskMSnExp` organizes the MS data by spectrum and provides the methods `intensity`, `mz` and `rtime` to access the raw data from the files (the measured intensity values, the corresponding m/z and retention time values). In addition, the `spectra` method could be used to return all data encapsulated in `Spectrum` -classes. Below we extract the retention time values from the object. +objects. Below we extract the retention time values from the object. -```{r data-inspection-rtime, message = FALSE } -head(rtime(raw_data)) +```{r data-inspection-rtime, message = FALSE } +head(rtime(raw_data)) ``` -All data is returned as one-dimensional vectors (a numeric vector for `rtime` and -a `list` of numeric vectors for `mz` and `intensity`, each containing the values from -one spectrum), even if the experiment consists of multiple files/samples. The -`fromFile` function returns a numeric vector that provides the mapping of the -values to the originating file. Below we use the `fromFile` indices to organize -the `mz` values by file. +All data is returned as one-dimensional vectors (a numeric vector for `rtime` +and a `list` of numeric vectors for `mz` and `intensity`, each containing the +values from one spectrum), even if the experiment consists of multiple +files/samples. The `fromFile` function returns an integer vector providing +the mapping of the values to the originating file. Below we use the `fromFile` +indices to organize the `mz` values by file. -```{r data-inspection-mz, message = FALSE } +```{r data-inspection-mz, message = FALSE } mzs <- mz(raw_data) ## Split the list by file mzs_by_file <- split(mzs, f = fromFile(raw_data)) -length(mzs_by_file) +length(mzs_by_file) ``` As a first evaluation of the data we plot below the base peak chromatogram (BPC) for each file in our experiment. We use the `chromatogram` method and set the -`aggregationFun` to `"max"` to return for each spectrum the maximal intensity and -hence create the BPC from the raw data. To create a total ion chromatogram we -could set `aggregationFun` to `sum`. +`aggregationFun` to `"max"` to return for each spectrum the maximal intensity +and hence create the BPC from the raw data. To create a total ion chromatogram +we could set `aggregationFun` to `sum`. -```{r data-inspection-bpc, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6 } +```{r data-inspection-bpc, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6 } ## Get the base peak chromatograms. This reads data from the files. bpis <- chromatogram(raw_data, aggregationFun = "max") ## Define colors for the two groups -group_colors <- brewer.pal(3, "Set1")[1:2] +group_colors <- paste0(brewer.pal(3, "Set1")[1:2], "60") names(group_colors) <- c("KO", "WT") ## Plot all chromatograms. plot(bpis, col = group_colors[raw_data$sample_group]) - ``` -The `chromatogram` method returned a `Chromatograms` object that organizes -individual `Chromatogram` objects (which in fact contain the chromatographic data) -in a two-dimensional array: columns represent samples and rows (optionally) m/z -and/or retention time ranges. Below we extract the chromatogram of the first -sample and access its retention time and intensity values. +The `chromatogram` method returned a `MChromatograms` object that organizes +individual `Chromatogram` objects (which in fact contain the chromatographic +data) in a two-dimensional array: columns represent samples and rows +(optionally) m/z and/or retention time ranges. Below we extract the chromatogram +of the first sample and access its retention time and intensity values. -```{r data-inspection-chromatogram, message = FALSE } +```{r data-inspection-chromatogram, message = FALSE } bpi_1 <- bpis[1, 1] head(rtime(bpi_1)) -head(intensity(bpi_1)) +head(intensity(bpi_1)) ``` -The `chromatogram` method supports also extraction of chromatographic data from a -m/z-rt slice of the MS data. In the next section we will use this method to +The `chromatogram` method supports also extraction of chromatographic data from +a m/z-rt slice of the MS data. In the next section we will use this method to create an extracted ion chromatogram (EIC) for a selected peak. Note that `chromatogram` reads the raw data from each file to calculate the @@ -186,18 +201,47 @@ data). Below we create boxplots representing the distribution of total ion currents per file. Such plots can be very useful to spot problematic or failing MS runs. -```{r data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file." } +```{r data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file." } ## Get the total ion current by file tc <- split(tic(raw_data), f = fromFile(raw_data)) boxplot(tc, col = group_colors[raw_data$sample_group], - ylab = "intensity", main = "Total ion current") + ylab = "intensity", main = "Total ion current") +``` + +Also, we can cluster the samples based on similarity of their base peak +chromatogram. This can also be helpful to spot potentially problematic samples +in an experiment or generally get an initial overview of the sample grouping in +the experiment. Since the retention times between samples are not exactly +identical, we use the `bin` function to group intensities in fixed time ranges +(bins) along the retention time axis. In the present example we use a bin size +of 1 second, the default is 0.5 seconds. The clustering is performed using +complete linkage hierarchical clustering on the pairwise correlations of the +binned base peak chromatograms. + +```{r data-inspection-bpc-heatmap, message = FALSE, fig.align = "center", fig.width = 7, fig.height = 6, fig.cap = "Grouping of samples based on similarity of their base peak chromatogram."} +## Bin the BPC +bpis_bin <- MSnbase::bin(bpis, binSize = 2) + +## Calculate correlation on the log2 transformed base peak intensities +cormat <- cor(log2(do.call(cbind, lapply(bpis_bin, intensity)))) +colnames(cormat) <- rownames(cormat) <- raw_data$sample_name + +## Define which phenodata columns should be highlighted in the plot +ann <- data.frame(group = raw_data$sample_group) +rownames(ann) <- raw_data$sample_name + +## Perform the cluster analysis +pheatmap(cormat, annotation = ann, + annotation_color = list(group = group_colors)) ``` +The samples cluster in a pairwise manner, the KO and WT samples for the sample +index having the most similar BPC. # Chromatographic peak detection -Next we perform the chromatographic peak detection using the *centWave* algorithm -[@Tautenhahn:2008fx]. Before running the peak detection it is however +Next we perform the chromatographic peak detection using the *centWave* +algorithm [@Tautenhahn:2008fx]. Before running the peak detection it is however strongly suggested to visually inspect e.g. the extracted ion chromatogram of internal standards or known compounds to evaluate and adapt the peak detection settings since the default settings will not be appropriate for most LCMS @@ -205,72 +249,125 @@ experiments. The two most critical parameters for *centWave* are the `peakwidth` (expected range of chromatographic peak widths) and `ppm` (maximum expected deviation of m/z values of centroids corresponding to one chromatographic peak; this is usually much larger than the ppm specified by the manufacturer) -parameters. -To evaluate the typical chromatographic peak width we plot the EIC for one peak. +parameters. To evaluate the typical chromatographic peak width we plot the EIC +for one peak. -```{r peak-detection-plot-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 5, fig.cap = "Extracted ion chromatogram for one peak." } +```{r peak-detection-plot-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 5, fig.cap = "Extracted ion chromatogram for one peak." } ## Define the rt and m/z range of the peak area rtr <- c(2700, 2900) mzr <- c(334.9, 335.1) ## extract the chromatogram chr_raw <- chromatogram(raw_data, mz = mzr, rt = rtr) -plot(chr_raw, col = group_colors[chr_raw$sample_group]) +plot(chr_raw, col = group_colors[chr_raw$sample_group]) ``` -Note that `Chromatogram` objects extracted by the `chromatogram` method contain an -`NA` value if in a certain scan (i.e. for a specific retention time) no signal was -measured in the respective mz range. This is reflected by the lines not being -drawn as continuous lines in the plot above. +Note that `Chromatogram` objects extracted by the `chromatogram` method contain +an `NA` value if in a certain scan (i.e. for a specific retention time) no +signal was measured in the respective mz range. This is reflected by the lines +not being drawn as continuous lines in the plot above. The peak above has a width of about 50 seconds. The `peakwidth` parameter should be set to accommodate the expected widths of peak in the data set. We set it to `20,80` for the present example data set. -For the `ppm` parameter we extract the full MS data (intensity, retention time and -m/z values) corresponding to the above peak. To this end we first filter the raw -object by retention time, then by m/z and finally plot the object with `type = -"XIC"` to produce the plot below. We use the *pipe* (`%>%`) command better -illustrate the corresponding workflow. +For the `ppm` parameter we extract the full MS data (intensity, retention time +and m/z values) corresponding to the above peak. To this end we first filter the +raw object by retention time, then by m/z and finally plot the object with `type += "XIC"` to produce the plot below. We use the *pipe* (`%>%`) command better +illustrate the corresponding workflow. Note also that in this type of plot +identified chromatographic peaks would be indicated by default if present. -```{r peak-detection-plot-ms-data, message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity." } +```{r peak-detection-plot-ms-data, message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity." } raw_data %>% filterRt(rt = rtr) %>% filterMz(mz = mzr) %>% - plot(type = "XIC") + plot(type = "XIC") ``` In the present data there is actually no variation in the m/z values. Usually -one would see the m/z values (lower panel) scatter around the *real* m/z value of -the compound. The first step of the *centWave* algorithm defines so called regions -of interest (ROI) based on the difference of m/z values from consecutive +one would see the m/z values (lower panel) scatter around the *real* m/z value +of the compound. The first step of the *centWave* algorithm defines so called +regions of interest (ROI) based on the difference of m/z values from consecutive scans. In detail, m/z values from consecutive scans are included into a ROI if the difference between the m/z and the mean m/z of the ROI is smaller than the -user defined `ppm` parameter. A reasonable choice for the `ppm` could thus be the -maximal m/z difference of data points from neighboring scans/spectra that are -part of the chromatographic peak. It is suggested to inspect the ranges of m/z -values for many compounds (either internal standards or compounds known to be -present in the sample) and define the `ppm` parameter for *centWave* according to -these. +user defined `ppm` parameter. A reasonable choice for the `ppm` could thus be +the maximal m/z difference of data points from neighboring scans/spectra that +are part of the chromatographic peak. It is suggested to inspect the ranges of +m/z values for many compounds (either internal standards or compounds known to +be present in the sample) and define the `ppm` parameter for *centWave* +according to these. + +Note that we can also perform the peak detection on the extracted ion +chromatogram. This can help to evaluate different peak detection settings. Only +be aware that peak detection on an extracted ion chromatogram will not consider +the `ppm` parameter and that the estimation of the background signal is +different to the peak detection on the full data set; values for the `snthresh` +will hence have different consequences. Below we perform the peak detection with +the `findChromPeaks` function on the extracted ion chromatogram. The submitted +*parameter* object defines which algorithm will be used and allows to define the +settings for this algorithm. We use the *centWave* algorithm with default +settings, except for `snthresh`. + +```{r peak-detection-eic, message = FALSE} +xchr <- findChromPeaks(chr_raw, param = CentWaveParam(snthresh = 2)) +``` + +We can access the identified chromatographic peaks with the `chromPeaks` +function. + +```{r peak-detection-eic-chromPeaks} +head(chromPeaks(xchr)) +``` -Below we perform the chromatographic peak detection using the `findChromPeaks` -method. The submitted *parameter* object defines which algorithm will be used and -allows to define the settings for this algorithm. Note that we set the argument -`noise` to `1000` to slightly speed up the analysis by considering only signals with -a value larger than 1000 in the peak detection step. +Parallel to the `chromPeaks` matrix there is also a data frame `chromPeakData` +that allows to add arbitrary annotations to each chromatographic peak. Below we +extract this data frame that by default contains only the MS level in which the +peak was identified. -```{r peak-detection-centwave, message = FALSE, results = "hide" } -cwp <- CentWaveParam(peakwidth = c(30, 80), noise = 1000) -xdata <- findChromPeaks(raw_data, param = cwp) +```{r peak-detection-chromatogram-chromPeakData} +chromPeakData(xchr) ``` -The results are returned as an `XCMSnExp` object which extends the `OnDiskMSnExp` -object by storing also LC/GC-MS preprocessing results. This means also that all -methods to sub-set and filter the data or to access the (raw) data are inherited -from the `OnDiskMSnExp` object. The results from the chromatographic peak -detection can be accessed with the `chromPeaks` method. +Next we plot the identified chromatographic peaks in the extracted ion chromatogram. We use the `col` +parameter to color the individual chromatogram lines. Colors can also be +specified for the identified peaks, `peakCol` for the foreground/border color, +`peakBg` for the background/fill color. One color has to be provided for each +chromatographic peak listed by `chromPeaks`. Below we define a color to indicate +the sample group from which the sample is and use the sample information in the +peaks' `"sample"` column to assign the correct color to each chromatographic +peak. More peak highlighting options are described further below. + +```{r peak-detection-eic-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. Peak area of identified chromatographic peaks are highlighted in the sample group color."} +sample_colors <- group_colors[xchr$sample_group] +plot(xchr, col = sample_colors, + peakBg = sample_colors[chromPeaks(xchr)[, "column"]]) -```{r peak-detection-chromPeaks, message = FALSE } -head(chromPeaks(xdata)) +``` + +Finally we perform the chromatographic peak detection on the full data set. Note +that we set the argument `prefilter` to `c(6, 5000)` and `noise` to `5000` to +reduce the run time of this vignette. With this setting we consider only signals +with a value larger than 5000 in the peak detection step. + +```{r peak-detection-centwave, message = FALSE, results = "hide" } +cwp <- CentWaveParam(peakwidth = c(20, 80), noise = 5000, + prefilter = c(6, 5000)) +xdata <- findChromPeaks(raw_data, param = cwp) +``` + +The results are returned as an `XCMSnExp` object which extends the +`OnDiskMSnExp` object by storing also LC/GC-MS preprocessing results. This means +also that all methods to sub-set and filter the data or to access the (raw) data +are inherited from the `OnDiskMSnExp` object and can thus be re-used. Note also +that it is possible to perform additional rounds of peak detection (e.g. on MS +level > 1 data) on the `xdata` object by calling `findChromPeaks` with the +parameter `add = TRUE`. + +The results from the chromatographic peak detection can be accessed with the +`chromPeaks` method. + +```{r peak-detection-chromPeaks, message = FALSE } +head(chromPeaks(xdata)) ``` The returned `matrix` provides the m/z and retention time range for each @@ -279,30 +376,106 @@ identified chromatographic peak as well as the integrated signal intensity the index of the sample in the object/experiment in which the peak was identified. -Below we use the data from this table to calculate some per-file summaries. +Annotations for each individual peak can be extracted with the `chromPeakData` +function. This data frame could also be used to add/store arbitrary annotations +for each detected peak. + +```{r peak-detection-chromPeakData} +chromPeakData(xdata) +``` + +Peak detection will not always work perfectly leading to peak detection +artifacts, such as overlapping peaks or artificially split peaks. The +`refineChromPeaks` function allows to *refine* peak detection results by either +removing identified peaks not passing a certain criteria or by merging +artificially split chromatographic peaks. With parameter objects +`CleanPeaksParam` and `FilterIntensityParam` it is possible to remove peaks with +a retention time range or intensities below a threshold, respectively (see their +respective help pages for more details and examples). With +`MergeNeighboringPeaksParam` it is possible to merge chromatographic +peaks. Below we post-process the peak detection results merging peaks +overlapping in a 4 second window per file if the signal between in between them +is lower than 75% of the smaller peak's maximal intensity. See the +`MergeNeighboringPeaksParam` help page for a detailed description of the +settings and the approach. + +```{r peak-postprocessing, message = FALSE} +mpp <- MergeNeighboringPeaksParam(expandRt = 4) +xdata_pp <- refineChromPeaks(xdata, mpp) +``` + +An example for a merged peak is given below. + +```{r peak-postprocessing-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The splitted peak was merged into one."} +mzr_1 <- 305.1 + c(-0.01, 0.01) +chr_1 <- chromatogram(filterFile(xdata, 1), mz = mzr_1) +chr_2 <- chromatogram(filterFile(xdata_pp, 1), mz = mzr_1) +par(mfrow = c(1, 2)) +plot(chr_1) +plot(chr_2) +``` + +For the first trace in the chromatogram above centWave detected 3 peaks (1 for +the full area and two smaller ones, see left panel in the plot above). The peak +refinement with `MergeNeighboringPeaksParam` reduced them to a single peak +(right panel in the figure above). Note that this refinement does not merge +neighboring peaks for which the signal in between them is lower than a certain +proportion (see figure below). + +```{r peak-postprocessing-not-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The peaks were not merged."} +mzr_1 <- 496.2 + c(-0.01, 0.01) +chr_1 <- chromatogram(filterFile(xdata, 1), mz = mzr_1) +chr_2 <- chromatogram(filterFile(xdata_pp, 1), mz = mzr_1) +par(mfrow = c(1, 2)) +plot(chr_1) +plot(chr_2) +``` + +Note also that it is possible to perform the peak refinement on extracted ion +chromatograms. This could e.g. be used to fine-tune the settings for the +parameter. To illustrate this we perform below a peak refinement on the +extracted ion chromatogram `chr_1` reducing the `minProp` parameter to force +joining the two peaks. -```{r peak-detection-peaks-per-sample, message = FALSE, results = "asis" } -summary_fun <- function(z) { +```{r peak-postprocessing-chr, fig..width = 5, fig.height = 5} +res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) +chromPeaks(res) +plot(res) +``` + +Before proceeding we replace the `xdata` object with the results from the peak +refinement. + +```{r} +xdata <- xdata_pp +``` + +Below we use the data from the `chromPeaks` matrix to calculate some per-file +summaries. + +```{r peak-detection-peaks-per-sample, message = FALSE, results = "asis" } +summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) -} -T <- lapply(split.data.frame(chromPeaks(xdata), - f = chromPeaks(xdata)[, "sample"]), - FUN = summary_fun) + +T <- lapply(split.data.frame( + chromPeaks(xdata), f = chromPeaks(xdata)[, "sample"]), + FUN = summary_fun) T <- do.call(rbind, T) rownames(T) <- basename(fileNames(xdata)) -pandoc.table(T, - caption = paste0("Summary statistics on identified chromatographic", - " peaks. Shown are number of identified peaks per", - " sample and widths/duration of chromatographic ", - "peaks.")) +pandoc.table( + T, + caption = paste0("Summary statistics on identified chromatographic", + " peaks. Shown are number of identified peaks per", + " sample and widths/duration of chromatographic ", + "peaks.")) ``` We can also plot the location of the identified chromatographic peaks in the -m/z - retention time space for one file using the `plotChromPeaks` function. Below -we plot the chromatographic peaks for the 3rd sample. +m/z - retention time space for one file using the `plotChromPeaks` +function. Below we plot the chromatographic peaks for the 3rd sample. -```{r peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample." } -plotChromPeaks(xdata, file = 3) +```{r peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample." } +plotChromPeaks(xdata, file = 3) ``` To get a global overview of the peak detection we can plot the frequency of @@ -310,44 +483,78 @@ identified peaks per file along the retention time axis. This allows to identify time periods along the MS run in which a higher number of peaks was identified and evaluate whether this is consistent across files. -```{r peak-detection-chrom-peak-image, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file." } -plotChromPeakImage(xdata) +```{r peak-detection-chrom-peak-image, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file." } +plotChromPeakImage(xdata) +``` + +Next we highlight the identified chromatographic peaks for the example peak from +before. Evaluating such plots on a list of peaks corresponding to known peaks or +internal standards helps to ensure that peak detection settings were appropriate +and correctly identified the expected peaks. We extract the ion chromatogram +from the peak detection result object, which contains then also the identified +chromatographic peaks for that ion that we can extract with the `chromPeaks` +function. + +```{r peak-detection-eic-example-peak, message = FALSE} +chr_ex <- chromatogram(xdata, mz = mzr, rt = rtr) +chromPeaks(chr_ex) ``` -Next we highlight the identified chromatographic peaks for the example peak -from before. Evaluating such plots on a list of peaks corresponding to known -peaks or internal standards helps to ensure that peak detection settings were -appropriate and correctly identified the expected peaks. +We can also plot the extracted ion chromatogram. Identified chromatographic +peaks will be automatically highlighted in the plot. Below we highlight +chromatographic peaks with a rectangle from the peak's minimal to maximal rt and +from an intensity of 0 to the maximal signal of the peak. + +```{r peak-detection-highlight-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The rectangles indicate the identified chromatographic peaks per sample." } +sample_colors <- group_colors[chr_ex$sample_group] +plot(chr_ex, col = sample_colors, peakType = "rectangle", + peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], + peakBg = NA) +``` -```{r peak-detection-highlight-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The rectangles indicate the identified chromatographic peaks per sample." } -plot(chr_raw, col = group_colors[chr_raw$sample_group], lwd = 2) -highlightChromPeaks(xdata, border = group_colors[chr_raw$sample_group], - lty = 3, rt = rtr, mz = mzr) +Alternatively to the rectangle visualization above, it is possible to represent +the apex position of each peak with a single point (passing argument +`type = "point"` to the function), or draw the actually identified peak by +specifying `type = "polygon"`. To completely omit highlighting the identified +peaks (e.g. to plot base peak chromatograms or similar) `type = "none"` can be +used. Below we use `type = "polygon"` to fill the peak area +for each identified chromatographic peak in each sample. Whether individual +peaks can be still identified in such a plot depends however on the number of +samples from which peaks are drawn. + +```{r peak-detection-highlight-chrom-peaks-plot-polygon, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The signal area of identified chromatographic peaks are filled with a color." } +plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, + peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) ``` Note that we can also specifically extract identified chromatographic peaks for a selected region by providing the respective m/z and retention time ranges with the `mz` and `rt` arguments in the `chromPeaks` method. -```{r peak-detection-chrom-peak-table-selected, message = FALSE, results = "asis" } +```{r peak-detection-chrom-peak-table-selected, message = FALSE, results = "asis" } pander(chromPeaks(xdata, mz = mzr, rt = rtr), caption = paste("Identified chromatographic peaks in a selected ", - "m/z and retention time range.")) + "m/z and retention time range.")) ``` Finally we plot also the distribution of peak intensity per sample. This allows to investigate whether systematic differences in peak signals between samples are present. -```{r peak-detection-chrom-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample." } +```{r peak-detection-chrom-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample." } ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(xdata)[, "into"]), - f = chromPeaks(xdata)[, "sample"]) + f = chromPeaks(xdata)[, "sample"]) boxplot(ints, varwidth = TRUE, col = group_colors[xdata$sample_group], - ylab = expression(log[2]~intensity), main = "Peak intensities") -grid(nx = NA, ny = NULL) + ylab = expression(log[2]~intensity), main = "Peak intensities") +grid(nx = NA, ny = NULL) ``` +Note that in addition to the above described identification of chromatographic +peaks, it is also possible to *manually* define and add chromatographic peaks +with the `manualChromPeaks` function (see `?manualChromPeaks` help page for more +information). + # Alignment @@ -360,32 +567,33 @@ different samples within an experiment. A plethora of alignment algorithms exist (see [@Smith:2013gr]), with some of them being implemented also in `xcms`. The method to perform the -alignment/retention time correction in `xcms` is `adjustRtime` which uses different -alignment algorithms depending on the provided parameter class. In the example -below we use the *obiwarp* method [@Prince:2006jj] to align the samples. We -use a `binSize = 0.6` which creates warping functions in mz bins of 0.6. Also here -it is advisable to modify the settings for each experiment and evaluate if -retention time correction did align internal controls or known compounds -properly. +alignment/retention time correction in `xcms` is `adjustRtime` which uses +different alignment algorithms depending on the provided parameter class. -```{r alignment-obiwarp, message = FALSE, results = "hide" } -xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6)) +In the example below we use the *obiwarp* method [@Prince:2006jj] to align the +samples. We use a `binSize = 0.6` which creates warping functions in mz bins of +0.6. Also here it is advisable to modify the settings for each experiment and +evaluate if retention time correction did align internal controls or known +compounds properly. + +```{r alignment-obiwarp, message = FALSE, results = "hide" } +xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6)) ``` `adjustRtime`, besides calculating adjusted retention times for each spectrum, does also adjust the reported retention times of the identified chromatographic peaks. -To extract the adjusted retention times we can use the `adjustedRtime` method, or -simply the `rtime` method that, if present, returns by default adjusted retention -times from an `XCMSnExp` object. +To extract the adjusted retention times we can use the `adjustedRtime` method, +or simply the `rtime` method that, if present, returns by default adjusted +retention times from an `XCMSnExp` object. -```{r alignment-rtime, message = FALSE } +```{r alignment-rtime, message = FALSE } ## Extract adjusted retention times head(adjustedRtime(xdata)) ## Or simply use the rtime method -head(rtime(xdata)) +head(rtime(xdata)) ``` *Raw* retention times can be extracted from an `XCMSnExp` containing @@ -393,105 +601,164 @@ aligned data with `rtime(xdata, adjusted = FALSE)`. To evaluate the impact of the alignment we plot the BPC on the adjusted data. In addition we plot the differences of the adjusted- to the raw retention times per -sample using the `plotAdjustedRtime` function. - -```{r alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Obiwarp aligned data. Base peak chromatogram after alignment (top) and difference between adjusted and raw retention times along the retention time axis (bottom)." } +sample using the `plotAdjustedRtime` function. For a base peak chromatogram it +makes no sense to also extract identified chromatographic peaks from the result +object. We thus use parameter `include = "none"` in the `chromatogram` call to +not include chromatographic peaks in the returned object. Note that +alternatively it would also be possible to simply avoid plotting them by setting +`peakType = "none"` in the `plot` call. + +```{r alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Obiwarp aligned data. Base peak chromatogram after alignment (top) and difference between adjusted and raw retention times along the retention time axis (bottom)." } ## Get the base peak chromatograms. -bpis_adj <- chromatogram(xdata, aggregationFun = "max") +bpis_adj <- chromatogram(xdata, aggregationFun = "max", include = "none") par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) plot(bpis_adj, col = group_colors[bpis_adj$sample_group]) ## Plot also the difference of adjusted to raw retention time. -plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) +plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) ``` Too large differences between adjusted and raw retention times could indicate poorly performing samples or alignment. -Alternatively we could use the *peak groups* alignment method that adjusts the -retention time by aligning previously identified *hook peaks* (chromatographic -peaks present in most/all samples). Ideally, these hook peaks should span most -part of the retention time range. Below we first restore the raw retention times -(also of the identified peaks) using the `dropAdjustedRtime` methods. Note that a -`drop*` method is available for each preprocessing step allowing to remove the -respective results from the `XCMSnExp` object. +**Note**: `XCMSnExp` objects hold the raw along with the adjusted retention +times and subsetting will in most cases drop the adjusted retention +times. Sometimes it might thus be useful to **replace** the raw retention times +with the adjusted retention times. This can be done with the +`applyAdjustedRtime`. -```{r alignment-drop, message = FALSE } -## Does the object have adjusted retention times? -hasAdjustedRtime(xdata) +At last we evaluate the impact of the alignment on the test peak. -## Drop the alignment results. -xdata <- dropAdjustedRtime(xdata) +```{r alignment-peak-groups-example-peak, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example extracted ion chromatogram before (top) and after alignment (bottom)." } +par(mfrow = c(2, 1)) +## Plot the raw data +plot(chr_raw, col = group_colors[chr_raw$sample_group]) -## Does the object have adjusted retention times? -hasAdjustedRtime(xdata) +## Extract the chromatogram from the adjusted object +chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr) +plot(chr_adj, col = group_colors[chr_raw$sample_group], peakType = "none") ``` -**Note**: `XCMSnExp` objects hold the raw along with the adjusted retention times and -subsetting will in most cases drop the adjusted retention times. Sometimes it -might thus be useful to **replace** the raw retention times with the adjusted -retention times. This can be done with the `applyAdjustedRtime`. - -As noted above the *peak groups* method requires peak groups (features) present in -most samples to perform the alignment. We thus have to perform a first -correspondence run to identify such peaks (details about the algorithm used are -presented in the next section). We use here again default settings, but it is -strongly advised to adapt the parameters for each data set. The definition of -the sample groups (i.e. assignment of individual samples to the sample groups in -the experiment) is mandatory for the `PeakDensityParam`. If there are no sample -groups in the experiment `sampleGroups` should be set to a single value for each -file (e.g. `rep(1, length(fileNames(xdata))`). - -```{r alignment-peak-groups, message = FALSE } -## Correspondence: group peaks across samples. -pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.8) -xdata <- groupChromPeaks(xdata, param = pdp) - -## Now the retention time correction. -pgp <- PeakGroupsParam(minFraction = 0.85) +## Subset-based alignment + +In some experiments it might be helpful to perform the alignment based on only a +subset of the samples, e.g. if QC samples were injected at regular intervals or +if the experiment contains blanks. Alignment method in `xcms` allow to +estimate retention time drifts on a subset of samples (either all samples +excluding blanks or QC samples injected at regular intervals during a +measurement run) and use these to adjust the full data set. + +Parameters `subset` (of the `PeakGroupsParam` or `ObiwarpParam` object) can be +used to define the subset of samples on which the alignment of the full data set +will be based (e.g. `subset` being the index of QC samples), and parameter +`subsetAdjust` allows to specify the method by which the *left-out* samples will +be adjusted. There are currently two options available: + +- `subsetAdjust = "previous"`: adjust the retention times of a non-subset + sample based on the alignment results of the previous subset sample (e.g. a + QC sample). If samples are e.g. in the order *A1*, *B1*, *B2*, *A2*, *B3*, + *B4* with *A* representing QC samples and *B* study samples, using + `subset = c(1, 4)` and `subsetAdjust = "previous"` would result in all *A* + samples to be aligned with each other and non-subset samples *B1* and *B2* + being adjusted based on the alignment result of subset samples *A1* and *B3* + and *B4* on those of *A2*. + +- `subsetAdjust = "average"`: adjust retention times of non-subset samples based + on an interpolation of the alignment results of the previous and subsequent + subset sample. In the example above, *B1* would be adjusted based on the + average of adjusted retention times between subset (QC) samples *A1* and + *A2*. Since there is no subset sample after non-subset samples *B3* and *B4* + these will be adjusted based on the alignment results of *A2* alone. Note + that a weighted average is used to calculate the adjusted retention time + averages, which uses the inverse of the difference of the index of the + non-subset sample to the subset samples as weights. Thus, if we have a + setup like *A1*, *B1*, *B2*, *A2* the adjusted retention times of *A1* + would get a larger weight than those of *A2* in the adjustment of + non-subset sample *B1* causing it's adjusted retention times to be closer + to those of *A1* than to *A2*. See below for examples. + +Both cases require a meaningful/correct ordering of the samples within the +object (e.g. ordering by injection index). + +The examples below aim to illustrate the effect of these alignment options. We +assume that samples 1, 4 and 7 in the *faahKO* data set are QC samples (sample +pools). We thus want to perform the alignment based on these samples and +subsequently adjust the retention times of the left-out samples (2, 3, 5, 6 and +8) based on interpolation of the results from the neighboring *subset* (QC) +samples. After initial peak grouping we perform below the alignment with the +*peak groups* method passing the indices of the samples on which we want the +alignment to be based on with the `subset` argument and specify `subsetAdjust = +"average"` to adjust the study samples based on interpolation of the alignment +results from neighboring subset/QC samples. + +Note that for any subset-alignment all parameters such as `minFraction` are +relative to the `subset`, not the full experiment! + +To re-perform an alignment we can first remove previous alignment results with +the `dropAdjustedRtime` function. + +```{r subset-define, message = FALSE, warning = FALSE} +xdata <- dropAdjustedRtime(xdata) -## Get the peak groups that would be used for alignment. -xdata <- adjustRtime(xdata, param = pgp) - +## Define the experimental layout +xdata$sample_type <- "study" +xdata$sample_type[c(1, 4, 7)] <- "QC" ``` -Note also that we could use the `adjustedRtimePeakGroups` method on the object -before alignment to evaluate on which features (peak groups) the alignment would -be performed. This can be useful to test different settings for the peak groups -algorithm. Also, it is possible to manually select or define certain peak groups -(i.e. their retention times per sample) and provide this matrix to the -`PeakGroupsParam` class with the `peakGroupsMatrix` argument. - -Below plot the difference between raw and adjusted retention times -using the `plotAdjustedRtime` function, which, if the *peak groups* method is used -for alignment, also highlights the peak groups used in the adjustment. - -```{r alignment-peak-groups-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak groups aligned data." } -## Plot the difference of adjusted to raw retention time. -plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group], - peakGroupsCol = "grey", peakGroupsPch = 1) +We next have to perform an initial correspondence analysis because the *peak +groups* alignment method adjusts the retention time by aligning previously +identified *hook peaks* (chromatographic peaks present in most/all samples; +details about the algorithm used are presented in the next section). We use here +the default settings, but it is strongly advised to adapt the parameters for +each data set. The definition of the sample groups (i.e. assignment of +individual samples to the sample groups in the experiment) is mandatory for the +`PeakDensityParam`. If there are no sample groups in the experiment +`sampleGroups` should be set to a single value for each file (e.g. `rep(1, +length(fileNames(xdata))`). + +```{r alignment-subset, message = FALSE, warning = FALSE} +## Initial peak grouping. Use sample_type as grouping variable +pdp_subs <- PeakDensityParam(sampleGroups = xdata$sample_type, + minFraction = 0.9) +xdata <- groupChromPeaks(xdata, param = pdp_subs) + +## Define subset-alignment options and perform the alignment +pgp_subs <- PeakGroupsParam(minFraction = 0.85, + subset = which(xdata$sample_type == "QC"), + subsetAdjust = "average", span = 0.4) +xdata <- adjustRtime(xdata, param = pgp_subs) ``` -At last we evaluate the impact of the alignment on the test peak. - -```{r alignment-peak-groups-example-peak, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example extracted ion chromatogram before (top) and after alignment (bottom)." } -par(mfrow = c(2, 1)) -## Plot the raw data -plot(chr_raw, col = group_colors[chr_raw$sample_group]) - -## Extract the chromatogram from the adjusted object -chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr) -plot(chr_adj, col = group_colors[chr_raw$sample_group]) +Below we plot the results of the alignment labeling the samples being part of +the *subset* in green and the others in grey. This nicely shows how the +interpolation of the `subsetAdjust = "average"` works: retention times of sample +2 are adjusted based on those from subset sample 1 and 4, giving however more +weight to the closer subset sample 1 which results in the adjusted retention +times of 2 being more similar to those of sample 1. Sample 3 on the other hand +gets adjusted giving more weight to the second subset sample (4). + +```{r alignment-subset-plot-2, message = FALSE, warning = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Subset-alignment results with option average. Difference between adjusted and raw retention times along the retention time axis. Samples on which the alignment models were estimated are shown in green, study samples in grey." } +clrs <- rep("#00000040", 8) +clrs[xdata$sample_type == "QC"] <- c("#00ce0080") +par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) +plot(chromatogram(xdata, aggregationFun = "sum"), + col = clrs, peakType = "none") +plotAdjustedRtime(xdata, col = clrs, peakGroupsPch = 1, + peakGroupsCol = "#00ce0040") ``` +Option `subsetAdjust = "previous"` adjusts the retention times of a non-subset +sample based on a single subset sample (the previous), which results in most +cases in the adjusted retention times of the non-subset sample being highly +similar to those of the subset sample which was used for adjustment. + # Correspondence The final step in the metabolomics preprocessing is the correspondence that matches detected chromatographic peaks between samples (and depending on the settings, also within samples if they are adjacent). The method to perform the -correspondence in `xcms` is `groupChromPeaks`. We will use the *peak density* method -[@Smith:2006ic] to group chromatographic peaks. The algorithm combines +correspondence in `xcms` is `groupChromPeaks`. We will use the *peak density* +method [@Smith:2006ic] to group chromatographic peaks. The algorithm combines chromatographic peaks depending on the density of peaks along the retention time axis within small slices along the mz dimension. To illustrate this we plot below the chromatogram for an mz slice with multiple chromatographic peaks @@ -500,28 +767,20 @@ hence only chromatographic peaks present in at least 40% of the samples per sample group are grouped into a feature. The sample group assignment is specified with the `sampleGroups` argument. -```{r correspondence-example-slice, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example for peak density correspondence. Upper panel: chromatogram for an mz slice with multiple chromatographic peaks. Middle and lower panel: identified chromatographic peaks at their retention time (x-axis) and index within samples of the experiments (y-axis) for different values of the bw parameter. The black line represents the peak density estimate. Grouping of peaks (based on the provided settings) is indicated by grey rectangles." } +```{r correspondence-example-slice, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example for peak density correspondence. Upper panel: chromatogram for an mz slice with multiple chromatographic peaks. lower panel: identified chromatographic peaks at their retention time (x-axis) and index within samples of the experiments (y-axis) for different values of the bw parameter. The black line represents the peak density estimate. Grouping of peaks (based on the provided settings) is indicated by grey rectangles." } ## Define the mz slice. mzr <- c(305.05, 305.15) ## Extract and plot the chromatograms -chr_mzr <- chromatogram(xdata, mz = mzr, rt = c(2500, 4000)) -par(mfrow = c(3, 1), mar = c(1, 4, 1, 0.5)) -cols <- group_colors[chr_mzr$sample_group] -plot(chr_mzr, col = cols, xaxt = "n", xlab = "") -## Highlight the detected peaks in that region. -highlightChromPeaks(xdata, mz = mzr, col = cols, type = "point", pch = 16) +chr_mzr <- chromatogram(xdata, mz = mzr) ## Define the parameters for the peak density method pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 30) -par(mar = c(4, 4, 1, 0.5)) -plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp, - pch = 16, xlim = c(2500, 4000)) -## Use a different bw -pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 20) -plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp, - pch = 16, xlim = c(2500, 4000)) + minFraction = 0.4, bw = 30) +plotChromPeakDensity(chr_mzr, col = sample_colors, param = pdp, + peakBg = sample_colors[chromPeaks(chr_mzr)[, "sample"]], + peakCol = sample_colors[chromPeaks(chr_mzr)[, "sample"]], + peakPch = 16) + ``` The upper panel in the plot above shows the extracted ion chromatogram for each @@ -529,61 +788,95 @@ sample with the detected peaks highlighted. The middle and lower plot shows the retention time for each detected peak within the different samples. The black solid line represents the density distribution of detected peaks along the retention times. Peaks combined into *features* (peak groups) are indicated with -grey rectangles. Different values for the `bw` parameter of the `PeakDensityParam` -were used: `bw = 30` in the middle and `bw = 20` in the lower panel. With the -default value for the parameter `bw` the two neighboring chromatographic peaks -would be grouped into the same feature, while with a `bw` of 20 they would be -grouped into separate features. This grouping depends on the parameters for the -density function and other parameters passed to the algorithm with the -`PeakDensityParam`. - -```{r correspondence, message = FALSE } +grey rectangles. This type of visualization is ideal to test correspondence +settings on example m/z slices before applying them to the full data set. + +Below we perform the correspondence analysis with the defined settings on the +full data set. + +```{r correspondence, message = FALSE } ## Perform the correspondence pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 20) -xdata <- groupChromPeaks(xdata, param = pdp) + minFraction = 0.4, bw = 30) +xdata <- groupChromPeaks(xdata, param = pdp) ``` -The results from the correspondence can be extracted using the -`featureDefinitions` method, that returns a `DataFrame` with the definition of the -features (i.e. the mz and rt ranges and, in column `peakidx`, the index of the -chromatographic peaks in the `chromPeaks` matrix for each feature). +Results from the xcms-based preprocessing can be summarized into a +`SummarizedExperiment` object from the `r Biocpkg("SummarizedExperiment")` +package with the `quantify` method. This object will contain the feature +abundances as the *assay matrix*, the feature definition (their m/z, retention +time and other metadata) as `rowData` (i.e. row annotations) and the +sample/phenotype information as `colData` (i.e. column annotations). All the +processing history will be put into the object's `metadata`. This object can +then be used for any further (`xcms`-independent) processing and analysis. + +Below we use `quantify` to generate the result object for the present +analysis. The parameters `value` and any other additional parameters are passed +along to the `featureValues` method that is used internally to create the +feature abundance matrix. + +```{r quantify} +res <- quantify(xdata, value = "into") +``` + +Sample annotations can be accessed with the `colData` method. + +```{r quantify-colData} +colData(res) +``` -```{r correspondence-featureDefs, message = FALSE } +Feature annotations with `rowData`: + +```{r quantify-rowData} +rowData(res) +``` + +The feature abundances can be accessed with the `assay` method. Note also that a +`SummarizedExperiment` supports multiple such assay matrices. + +```{r} +head(assay(res)) +``` + +In addition it is possible to extract the results from the correspondence +analysis individually using the `featureDefinitions` and `featureValues` +methods, the former returning a `DataFrame` with the definition of the features +(i.e. the mz and rt ranges and, in column `peakidx`, the index of the +chromatographic peaks in the `chromPeaks` matrix for each feature), the latter +the feature abundances. + +```{r correspondence-featureDefs, message = FALSE } ## Extract the feature definitions featureDefinitions(xdata) - ``` -The `featureValues` method returns a `matrix` with rows being features and columns -samples. The content of this matrix can be defined using the `value` -argument. Setting `value = "into"` returns a matrix with the integrated signal of -the peaks corresponding to a feature in a sample. Any column name of the -`chromPeaks` matrix can be passed to the argument `value`. Below we extract the -integrated peak intensity per feature/sample. +The `featureValues` method returns a `matrix` with rows being features and +columns samples. The content of this matrix can be defined using the `value` +argument. The default `value = "into"` returns a matrix with the integrated +signal of the peaks corresponding to a feature in a sample. Any column name of +the `chromPeaks` matrix can be passed to the argument `value`. Below we extract +the integrated peak intensity per feature/sample. -```{r correspondence-feature-values, message = FALSE } +```{r correspondence-feature-values, message = FALSE } ## Extract the into column for each feature. head(featureValues(xdata, value = "into")) - ``` -This feature matrix contains `NA` for samples in which no chromatographic peak was -detected in the feature's m/z-rt region. While in many cases there might indeed -be no peak signal in the respective region, it might also be that there is -signal, but the peak detection algorithm failed to detect a chromatographic -peak. `xcms` provides the `fillChromPeaks` method to *fill in* intensity data for such -missing values from the original files. The *filled in* peaks are added to the -`chromPeaks` matrix and are flagged with an `1` in the `"is_filled"` column. Below we -perform such a filling-in of missing peaks. +This feature matrix contains `NA` for samples in which no chromatographic peak +was detected in the feature's m/z-rt region. While in many cases there might +indeed be no peak signal in the respective region, it might also be that there +is signal, but the peak detection algorithm failed to detect a chromatographic +peak (e.g. because the signal was too low or too noisy). `xcms` provides +the `fillChromPeaks` method to *fill in* intensity data for such missing values +from the original files. The *filled in* peaks are added to the `chromPeaks` +matrix and indicated with a value `TRUE` in the `"is_filled"` column of +the `chromPeakData` data frame. Below we perform such a gap filling. + -```{r fill-chrom-peaks, message = FALSE } -## Filling missing peaks using default settings. Alternatively we could -## pass a FillChromPeaksParam object to the method. -xdata <- fillChromPeaks(xdata) +```{r fill-chrom-peaks, message = FALSE } +xdata <- fillChromPeaks(xdata, param = ChromPeakAreaParam()) head(featureValues(xdata)) - ``` For features without detected peaks in a sample, the method extracts all @@ -592,9 +885,23 @@ intensities in the mz-rt region of the feature, integrates the signal and adds a measured/available for the mz-rt region of the feature. For these, even after filling in missing peak data, a `NA` is reported in the `featureValues` matrix. +Different options to define the mz-rt region of the features are +available. With the `ChromPeakAreaParam()` parameter object used above, the +feature area is defined using the m/z and rt ranges of all of its (detected) +chromatographic peaks: the lower m/z value of the area is defined as the lower +quartile (25% quantile) of the `"mzmin"` values of all peaks of the feature, +the upper m/z value as the upper quartile (75% quantile) of the `"mzmax"` +values, the lower rt value as the lower quartile (25% quantile) of the `"rtmin"` +and the upper rt value as the upper quartile (75% quantile) of the `"rtmax"` +values. This ensures that the signal is integrated from a feature-specific area. + +Alternatively, it is possible to use the `FillChromPeaksParam` parameter object +in the `fillChromPeaks` call, which resembles the approach of the original (old) +`xcms` implementation. + Below we compare the number of missing values before and after filling in -missing values. We can use the parameter `filled` of the `featureValues` method to -define whether or not filled-in peak values should be returned too. +missing values. We can use the parameter `filled` of the `featureValues` method +to define whether or not filled-in peak values should be returned too. ```{r fill-chrom-peaks-compare, message = FALSE } ## Missing values before filling in peaks @@ -604,7 +911,71 @@ apply(featureValues(xdata, filled = FALSE), MARGIN = 2, ## Missing values after filling in peaks apply(featureValues(xdata), MARGIN = 2, FUN = function(z) sum(is.na(z))) - + +``` + +```{r export-result, eval = FALSE, echo = FALSE} +save(xdata, file = "xdata.RData") +``` + +Next we use the `featureSummary` function to get a general per-feature summary +that includes the number of samples in which a peak was found or the number of +samples in which more than one peak was assigned to the feature. Specifying also +sample groups breaks down these summary statistics for each individual sample +group. + +```{r featureSummary, message = FALSE } +head(featureSummary(xdata, group = xdata$sample_group)) +``` + +We can add the feature value matrix with the filled-in data for missing peaks +also to our `SummarizedExperiment` object `res` as an additional *assay*: + +```{r} +assays(res)$raw_filled <- featureValues(xdata, filled = TRUE) +``` + +We have now two matrices (assays) available, the matrix with the detected and +the matrix with the detected and filled-in values, each can be accessed by their +name. + +```{r} +assayNames(res) + +head(assay(res, "raw")) +head(assay(res, "raw_filled")) +``` + +The performance of peak detection, alignment and correspondence should always be +evaluated by inspecting extracted ion chromatograms e.g. of known compounds, +internal standards or identified features in general. The `featureChromatograms` +function allows to extract chromatograms for each feature present in +`featureDefinitions`. The returned `MChromatograms` object contains an ion +chromatogram for each feature (each row containing the data for one feature) and +sample (each column representing containing data for one sample). Below we +extract the chromatograms for the first 4 features. + +```{r featureChromatograms, message = FALSE } +feature_chroms <- featureChromatograms(xdata, features = 1:4) + +feature_chroms +``` + +And plot the extracted ion chromatograms. We again use the group color for each +identified peak to fill the area. + +```{r feature-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Extracted ion chromatograms for features 1 to 4." } +plot(feature_chroms, col = sample_colors, + peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) + +``` + +To access the EICs of the second feature we can simply subset the +`feature_chroms` object. + +```{r} +eic_2 <- feature_chroms[2, ] +chromPeaks(eic_2) ``` At last we perform a principal component analysis to evaluate the grouping of @@ -612,9 +983,9 @@ the samples in this experiment. Note that we did not perform any data normalization hence the grouping might (and will) also be influenced by technical biases. -```{r final-pca, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "PCA for the faahKO data set, un-normalized intensities." } +```{r final-pca, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "PCA for the faahKO data set, un-normalized intensities." } ## Extract the features and log2 transform them -ft_ints <- log2(featureValues(xdata, value = "into")) +ft_ints <- log2(assay(res, "raw_filled")) ## Perform the PCA omitting all features with an NA in any of the ## samples. Also, the intensities are mean centered. @@ -623,16 +994,16 @@ pc <- prcomp(t(na.omit(ft_ints)), center = TRUE) ## Plot the PCA cols <- group_colors[xdata$sample_group] pcSummary <- summary(pc) -plot(pc$x[, 1], pc$x[,2], pch = 21, main = "", +plot(pc$x[, 1], pc$x[,2], pch = 21, main = "", xlab = paste0("PC1: ", format(pcSummary$importance[2, 1] * 100, - digits = 3), " % variance"), + digits = 3), " % variance"), ylab = paste0("PC2: ", format(pcSummary$importance[2, 2] * 100, - digits = 3), " % variance"), + digits = 3), " % variance"), col = "darkgrey", bg = cols, cex = 2) grid() text(pc$x[, 1], pc$x[,2], labels = xdata$sample_name, col = "darkgrey", pos = 3, cex = 2) - + ``` We can see the expected separation between the KO and WT samples on PC2. On PC1 @@ -645,13 +1016,16 @@ properties of the mice analyzed (sex, age, litter mates etc). # Further data processing and analysis Normalizing features' signal intensities is required, but at present not (yet) -supported in `xcms` (some methods might be added in near future). Also, for the +supported in `xcms` (some methods might be added in near future). It is advised +to use the `SummarizedExperiment` returned by the `quantify` method for any +further data processing, as this type of object stores feature definitions, +sample annotations as well as feature abundances in the same object. For the identification of e.g. features with significant different intensities/abundances it is suggested to use functionality provided in other R -packages, such as Bioconductor's excellent `limma` package. To enable support also -for other packages that rely on the *old* `xcmsSet` result object, it is possible to -coerce the new `XCMSnExp` object to an `xcmsSet` object using `xset <- as(x, -"xcmsSet")`, with `x` being an `XCMSnExp` object. +packages, such as Bioconductor's excellent `limma` package. To enable support +also for other packages that rely on the *old* `xcmsSet` result object, it is +possible to coerce the new `XCMSnExp` object to an `xcmsSet` object using `xset +<- as(x, "xcmsSet")`, with `x` being an `XCMSnExp` object. # Additional details and notes @@ -662,58 +1036,62 @@ compared to the original user interface see the *new\_functionality* vignette. ## Evaluating the process history -`XCMSnExp` objects allow to capture all performed pre-processing steps along with -the used parameter class within the `@processHistory` slot. Storing also the -parameter class ensures the highest possible degree of analysis documentation -and in future might enable to *replay* analyses or parts of it. The list of all -performed preprocessings can be extracted using the `processHistory` method. +`XCMSnExp` objects allow to capture all performed pre-processing steps along +with the used parameter class within the `@processHistory` slot. Storing also +the parameter class ensures the highest possible degree of analysis +documentation and in future might enable to *replay* analyses or parts of it. +The list of all performed preprocessings can be extracted using the +`processHistory` method. -```{r processhistory, message = FALSE } -processHistory(xdata) +```{r processhistory, message = FALSE } +processHistory(xdata) ``` It is also possible to extract specific processing steps by specifying its -type. Available *types* can be listed with the `processHistoryTypes` function. Below -we extract the parameter class for the alignment/retention time adjustment step. +type. Available *types* can be listed with the `processHistoryTypes` +function. Below we extract the parameter class for the alignment/retention time +adjustment step. -```{r processhistory-select, message = FALSE } +```{r processhistory-select, message = FALSE } ph <- processHistory(xdata, type = "Retention time correction") -ph +ph ``` And we can also extract the parameter class used in this preprocessing step. -```{r processhistory-param, message = FALSE } +```{r processhistory-param, message = FALSE } ## Access the parameter processParam(ph[[1]]) - + ``` ## Subsetting and filtering -`XCMSnEx` objects can be subsetted/filtered using the `[` method, or one of the many -`filter*` methods. All these methods aim to ensure that the data in the +`XCMSnEx` objects can be subsetted/filtered using the `[` method, or one of the +many `filter*` methods. All these methods aim to ensure that the data in the returned object is consistent. This means for example that if the object is subsetted by selecting specific spectra (by using the `[` method) all identified chromatographic peaks are removed. Correspondence results (i.e. identified features) are removed if the object is subsetted to contain only data from -selected files (using the `filterFile` method). This is because the correspondence -results depend on the files on which the analysis was performed - running a -correspondence on a subset of the files would lead to different results. +selected files (using the `filterFile` method). This is because the +correspondence results depend on the files on which the analysis was performed - +running a correspondence on a subset of the files would lead to different +results. Note that with `keepFeatures = TRUE` it would be possible to overwrite +this and keep also correspondence results for the specified files. As an exception, it is possible to force keeping adjusted retention times in the -subsetted object setting the `keepAdjustedRtime` argument to `TRUE` in any of the -subsetting methods. +subsetted object setting the `keepAdjustedRtime` argument to `TRUE` in any of +the subsetting methods. Below we subset our results object the data for the files 2 and 4. -```{r subset-filterFile, message = FALSE } +```{r subset-filterFile, message = FALSE } subs <- filterFile(xdata, file = c(2, 4)) ## Do we have identified chromatographic peaks? -hasChromPeaks(subs) +hasChromPeaks(subs) ``` Peak detection is performed separately on each file, thus the subsetted object @@ -723,63 +1101,63 @@ features. All features have however been removed and also the adjusted retention times (since the alignment based on features that were identified on chromatographic peaks on all files). -```{r subset-filterFile-2, message = FALSE } +```{r subset-filterFile-2, message = FALSE } ## Do we still have features? hasFeatures(subs) ## Do we still have adjusted retention times? -hasAdjustedRtime(subs) +hasAdjustedRtime(subs) ``` -We can however use the `keepAdjustedRtime` argument to force keeping the adjusted -retention times. +We can however use the `keepAdjustedRtime` argument to force keeping the +adjusted retention times, `keepFeatures` would even keep correspondence results. -```{r subset-filterFile-3, message = FALSE } +```{r subset-filterFile-3, message = FALSE } subs <- filterFile(xdata, keepAdjustedRtime = TRUE) -hasAdjustedRtime(subs) +hasAdjustedRtime(subs) ``` -The `filterRt` method can be used to subset the object to spectra within a certain -retention time range. +The `filterRt` method can be used to subset the object to spectra within a +certain retention time range. -```{r subset-filterRt, message = FALSE } +```{r subset-filterRt, message = FALSE } subs <- filterRt(xdata, rt = c(3000, 3500)) -range(rtime(subs)) +range(rtime(subs)) ``` Filtering by retention time does not change/affect adjusted retention times -(also, if adjusted retention times are present, the filtering is performed **on** -the adjusted retention times). +(also, if adjusted retention times are present, the filtering is performed +**on** the adjusted retention times). -```{r subset-filterRt-2, message = FALSE } -hasAdjustedRtime(subs) +```{r subset-filterRt-2, message = FALSE } +hasAdjustedRtime(subs) ``` Also, we have all identified chromatographic peaks within the specified retention time range: -```{r subset-filterRt-3, message = FALSE } +```{r subset-filterRt-3, message = FALSE } hasChromPeaks(subs) -range(chromPeaks(subs)[, "rt"]) +range(chromPeaks(subs)[, "rt"]) ``` -The most natural way to subset any object in R is with `[`. Using `[` on an `XCMSnExp` -object subsets it keeping only the selected spectra. The index `i` used in `[` has -thus to be an integer between 1 and the total number of spectra (across all -files). Below we subset `xdata` using both `[` and `filterFile` to keep all spectra -from one file. +The most natural way to subset any object in R is with `[`. Using `[` on an +`XCMSnExp` object subsets it keeping only the selected spectra. The index `i` +used in `[` has thus to be an integer between 1 and the total number of spectra +(across all files). Below we subset `xdata` using both `[` and `filterFile` to +keep all spectra from one file. -```{r subset-bracket, message = FALSE, warning = FALSE } +```{r subset-bracket, message = FALSE, warning = FALSE } ## Extract all data from the 3rd file. one_file <- filterFile(xdata, file = 3) one_file_2 <- xdata[fromFile(xdata) == 3] ## Is the content the same? -all.equal(spectra(one_file), spectra(one_file_2)) +all.equal(one_file[[2]], one_file_2[[2]]) ``` While the spectra-content is the same in both objects, `one_file` contains also @@ -787,18 +1165,18 @@ the identified chromatographic peaks while `one_file_2` does not. Thus, in most situations subsetting using one of the filter functions is preferred over the use of `[`. -```{r subset-bracket-peaks, message = FALSE } +```{r subset-bracket-peaks, message = FALSE } ## Subsetting with filterFile preserves chromatographic peaks head(chromPeaks(one_file)) ## Subsetting with [ not -head(chromPeaks(one_file_2)) +head(chromPeaks(one_file_2)) ``` -Note however that also `[` does support the `keepAdjustedRtime` argument. Below we -subset the object to spectra 20:30. +Note however that also `[` does support the `keepAdjustedRtime` argument. Below +we subset the object to spectra 20:30. -```{r subset-bracket-keepAdjustedRtime, message = FALSE, warnings = FALSE } +```{r subset-bracket-keepAdjustedRtime, message = FALSE, warnings = FALSE } subs <- xdata[20:30, keepAdjustedRtime = TRUE] hasAdjustedRtime(subs) @@ -807,69 +1185,57 @@ hasAdjustedRtime(subs) rtime(subs) ## Access raw retention times: -rtime(subs, adjusted = FALSE) +rtime(subs, adjusted = FALSE) ``` -As with `MSnExp` and `OnDiskMSnExp` objects, `[[` can be used to extract a single -spectrum object from an `XCMSnExp` object. The retention time of the spectrum -corresponds to the adjusted retention time if present. +As with `MSnExp` and `OnDiskMSnExp` objects, `[[` can be used to extract a +single spectrum object from an `XCMSnExp` object. The retention time of the +spectrum corresponds to the adjusted retention time if present. -```{r subset-double-bracket, message = FALSE } +```{r subset-double-bracket, message = FALSE } ## Extract a single spectrum -xdata[[14]] +xdata[[14]] ``` -At last we can also use the `split` method that allows to split an `XCMSnExp` based -on a provided factor `f`. Below we split `xdata` per file. Using `keepAdjustedRtime -= TRUE` ensures that adjusted retention times are not removed. +At last we can also use the `split` method that allows to split an `XCMSnExp` +based on a provided factor `f`. Below we split `xdata` per file. Using +`keepAdjustedRtime = TRUE` ensures that adjusted retention times are not +removed. -```{r subset-split, message = FALSE } +```{r subset-split, message = FALSE } x_list <- split(xdata, f = fromFile(xdata), keepAdjustedRtime = TRUE) lengths(x_list) -lapply(x_list, hasAdjustedRtime) -``` - -Note however that there is also a dedicated `splitByFile` method instead for that -operation, that internally uses `filterFile` and hence does e.g. not remove -identified chromatographic peaks. The method does not yet support the -`keepAdjustedRtime` parameter and thus removes by default adjusted retention -times. - -```{r subset-split-by-file, message = FALSE } -xdata_by_file <- splitByFile(xdata, f = factor(1:length(fileNames(xdata)))) - -lapply(xdata_by_file, hasChromPeaks) +lapply(x_list, hasAdjustedRtime) ``` ## Parallel processing -Most methods in `xcms` support parallel processing. Parallel processing is handled -and configured by the `BiocParallel` Bioconductor package and can be globally -defined for an R session. +Most methods in `xcms` support parallel processing. Parallel processing is +handled and configured by the `BiocParallel` Bioconductor package and can be +globally defined for an R session. Unix-based systems (Linux, macOS) support `multicore`-based parallel processing. To configure it globally we `register` the parameter class. Note also that `bpstart` is used below to initialize the parallel processes. -```{r multicore, message = FALSE, eval = FALSE } -register(bpstart(MulticoreParam(2))) +```{r multicore, message = FALSE, eval = FALSE } +register(bpstart(MulticoreParam(2))) ``` Windows supports only socket-based parallel processing: -```{r snow, message = FALSE, eval = FALSE } -register(bpstart(SnowParam(2))) +```{r snow, message = FALSE, eval = FALSE } +register(bpstart(SnowParam(2))) ``` Note that `multicore`-based parallel processing might be buggy or failing on -macOS. If so, the `DoparParam` could be used instead (requiring the `foreach` + macOS. If so, the `DoparParam` could be used instead (requiring the `foreach` package). For other options and details see the vignettes from the `BiocParallel` package. # References - diff --git a/vignettes/xcms.bioschemas.html b/vignettes/xcms.bioschemas.html new file mode 100644 index 000000000..09c5852ec --- /dev/null +++ b/vignettes/xcms.bioschemas.html @@ -0,0 +1,26 @@ + diff --git a/vignettes/xcms.org b/vignettes/xcms.org deleted file mode 100644 index e4475a666..000000000 --- a/vignettes/xcms.org +++ /dev/null @@ -1,951 +0,0 @@ -#+TITLE: LCMS data preprocessing and analysis with =xcms= -#+AUTHOR: Johannes Rainer -#+EMAIL: johannes.rainer@eurac.edu -#+DESCRIPTION: -#+KEYWORDS: -#+LANGUAGE: en -#+OPTIONS: ^:{} toc:nil -#+PROPERTY: header-args :exports code -#+PROPERTY: header-args :session *R* - -#+BEGIN_EXPORT html ---- -title: "LCMS data preprocessing and analysis with xcms" -package: xcms -output: - BiocStyle::html_document: - toc_float: true -vignette: > - %\VignetteIndexEntry{LCMS data preprocessing and analysis with xcms} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,RColorBrewer,faahKO,pander,magrittr} - %\VignettePackage{xcms} - %\VignetteKeywords{mass spectrometry, metabolomics} -bibliography: references.bib -csl: biomed-central.csl -references: -- id: dummy - title: no title - author: - - family: noname - given: noname ---- - -#+END_EXPORT - -#+NAME: biocstyle -#+BEGIN_SRC R :ravel echo = FALSE, results = "asis" - BiocStyle::markdown() -#+END_SRC - -#+BEGIN_EXPORT html -**Package**: `r Biocpkg("xcms")`
-**Authors**: Johannes Rainer
-**Modified**: `r file.info("xcms.Rmd")$mtime`
-**Compiled**: `r date()` -#+END_EXPORT - -#+NAME: init -#+BEGIN_SRC R :ravel message = FALSE, echo = FALSE, results = "hide" - ## Silently loading all packages - library(BiocStyle) - library(xcms) - library(faahKO) - library(pander) - ## Use socket based parallel processing on Windows systems - ## if (.Platform$OS.type == "unix") { - ## register(bpstart(MulticoreParam(2))) - ## } else { - ## register(bpstart(SnowParam(2))) - ## } - register(SerialParam()) -#+END_SRC - -* Introduction - -This documents describes data import, exploration, preprocessing and analysis of -LCMS experiments with =xcms= version >= 3. The examples and basic workflow was -adapted from the original /LC/MS Preprocessing and Analysis with xcms/ vignette -from Colin A. Smith. - -The new user interface and methods use the =XCMSnExp= object (instead of the /old/ -=xcmsSet= object) as a container for the pre-processing results. To support -packages and pipelines relying on the =xcmsSet= object, it is however possible to -convert an =XCMSnExp= into a =xcmsSet= object using the =as= method (i.e. =xset <- -as(x, "xcmsSet")=, with =x= being an =XCMSnExp= object. - -* Data import - -=xcms= supports analysis of LC/MS data from files in (AIA/ANDI) NetCDF, mzML/mzXML -and mzData format. For the actual data import Bioconductor's SRC_R[:exports -both]{Biocpkg("mzR")} is used. For demonstration purpose we will analyze a -subset of the data from \cite{Saghatelian04} in which the metabolic consequences -of knocking out the fatty acid amide hydrolase (FAAH) gene in mice was -investigated. The raw data files (in NetCDF format) are provided with the =faahKO= -data package. The data set consists of samples from the spinal cords of 6 -knock-out and 6 wild-type mice. Each file contains data in centroid mode -acquired in positive ion mode form 200-600 m/z and 2500-4500 seconds. - -Below we load all required packages, locate the raw CDF files within the =faahKO= -package and build a /phenodata/ data frame describing the experimental setup. - -#+NAME: load-libs-pheno -#+BEGIN_SRC R :ravel message = FALSE - library(xcms) - library(faahKO) - library(RColorBrewer) - library(pander) - library(magrittr) - - ## Get the full path to the CDF files - cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE, - recursive = TRUE) - ## Create a phenodata data.frame - pd <- data.frame(sample_name = sub(basename(cdfs), pattern = ".CDF", - replacement = "", fixed = TRUE), - sample_group = c(rep("KO", 6), rep("WT", 6)), - stringsAsFactors = FALSE) -#+END_SRC - -Subsequently we load the raw data as an =OnDiskMSnExp= object using the -=readMSData= method from the =MSnbase= package. While the =MSnbase= package was -originally developed for proteomics data processing, many of its functionality, -including raw data import and data representation, can be shared and reused in -metabolomics data analysis. Also, =MSnbase= can be used to /centroid/ profile-mode -MS data (see the corresponding vignette in the =MSnbase= package). - -#+NAME: load-with-msnbase -#+BEGIN_SRC R :ravel message = FALSE - raw_data <- readMSData(files = cdfs, pdata = new("NAnnotatedDataFrame", pd), - mode = "onDisk") -#+END_SRC - -The =OnDiskMSnExp= object contains general information about the number of -spectra, retention times, the measured total ion current etc, but does not -contain the full raw data (i.e. the m/z and intensity values from each measured -spectrum). Its memory footprint is thus rather small making it an ideal object -to represent large metabolomics experiments while still allowing to perform -simple quality controls, data inspection and exploration as well as data -sub-setting operations. The m/z and intensity values are imported from the raw -data files on demand, hence the location of the raw data files should not be -changed after initial data import. - -* Initial data inspection - -The =OnDiskMSnExp= organizes the MS data by spectrum and provides the methods -=intensity=, =mz= and =rtime= to access the raw data from the files (the measured -intensity values, the corresponding m/z and retention time values). In addition, -the =spectra= method could be used to return all data encapsulated in =Spectrum= -classes. Below we extract the retention time values from the object. - -#+NAME: data-inspection-rtime -#+BEGIN_SRC R :ravel message = FALSE - head(rtime(raw_data)) -#+END_SRC - -All data is returned as one-dimensional vectors (a numeric vector for =rtime= and -a =list= of numeric vectors for =mz= and =intensity=, each containing the values from -one spectrum), even if the experiment consists of multiple files/samples. The -=fromFile= function returns a numeric vector that provides the mapping of the -values to the originating file. Below we use the =fromFile= indices to organize -the =mz= values by file. - -#+NAME: data-inspection-mz -#+BEGIN_SRC R :ravel message = FALSE - mzs <- mz(raw_data) - - ## Split the list by file - mzs_by_file <- split(mzs, f = fromFile(raw_data)) - - length(mzs_by_file) -#+END_SRC - -As a first evaluation of the data we plot below the base peak chromatogram (BPC) -for each file in our experiment. We use the =chromatogram= method and set the -=aggregationFun= to ="max"= to return for each spectrum the maximal intensity and -hence create the BPC from the raw data. To create a total ion chromatogram we -could set =aggregationFun= to =sum=. - -#+NAME: data-inspection-bpc -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6 - ## Get the base peak chromatograms. This reads data from the files. - bpis <- chromatogram(raw_data, aggregationFun = "max") - ## Define colors for the two groups - group_colors <- brewer.pal(3, "Set1")[1:2] - names(group_colors) <- c("KO", "WT") - - ## Plot all chromatograms. - plot(bpis, col = group_colors[raw_data$sample_group]) - -#+END_SRC - -The =chromatogram= method returned a =Chromatograms= object that organizes -individual =Chromatogram= objects (which in fact contain the chromatographic data) -in a two-dimensional array: columns represent samples and rows (optionally) m/z -and/or retention time ranges. Below we extract the chromatogram of the first -sample and access its retention time and intensity values. - -#+NAME: data-inspection-chromatogram -#+BEGIN_SRC R :ravel message = FALSE - bpi_1 <- bpis[1, 1] - head(rtime(bpi_1)) - head(intensity(bpi_1)) -#+END_SRC - -The =chromatogram= method supports also extraction of chromatographic data from a -m/z-rt slice of the MS data. In the next section we will use this method to -create an extracted ion chromatogram (EIC) for a selected peak. - -Note that =chromatogram= reads the raw data from each file to calculate the -chromatogram. The =bpi= and =tic= methods on the other hand do not read any data -from the raw files but use the respective information that was provided in the -header definition of the input files (which might be different from the actual -data). - -Below we create boxplots representing the distribution of total ion currents per -file. Such plots can be very useful to spot problematic or failing MS runs. - -#+NAME: data-inspection-tic-boxplot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file." - ## Get the total ion current by file - tc <- split(tic(raw_data), f = fromFile(raw_data)) - boxplot(tc, col = group_colors[raw_data$sample_group], - ylab = "intensity", main = "Total ion current") -#+END_SRC - -* Chromatographic peak detection - -Next we perform the chromatographic peak detection using the /centWave/ algorithm -\cite{Tautenhahn:2008fx}. Before running the peak detection it is however -strongly suggested to visually inspect e.g. the extracted ion chromatogram of -internal standards or known compounds to evaluate and adapt the peak detection -settings since the default settings will not be appropriate for most LCMS -experiments. The two most critical parameters for /centWave/ are the =peakwidth= -(expected range of chromatographic peak widths) and =ppm= (maximum expected -deviation of m/z values of centroids corresponding to one chromatographic peak; -this is usually much larger than the ppm specified by the manufacturer) -parameters. -To evaluate the typical chromatographic peak width we plot the EIC for one peak. - -#+NAME: peak-detection-plot-eic -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 5, fig.cap = "Extracted ion chromatogram for one peak." - ## Define the rt and m/z range of the peak area - rtr <- c(2700, 2900) - mzr <- c(334.9, 335.1) - ## extract the chromatogram - chr_raw <- chromatogram(raw_data, mz = mzr, rt = rtr) - plot(chr_raw, col = group_colors[chr_raw$sample_group]) -#+END_SRC - -Note that =Chromatogram= objects extracted by the =chromatogram= method contain an -=NA= value if in a certain scan (i.e. for a specific retention time) no signal was -measured in the respective mz range. This is reflected by the lines not being -drawn as continuous lines in the plot above. - -The peak above has a width of about 50 seconds. The =peakwidth= parameter should -be set to accommodate the expected widths of peak in the data set. We set it to -=20,80= for the present example data set. - -For the =ppm= parameter we extract the full MS data (intensity, retention time and -m/z values) corresponding to the above peak. To this end we first filter the raw -object by retention time, then by m/z and finally plot the object with =type = -"XIC"= to produce the plot below. We use the /pipe/ (=%>%=) command better -illustrate the corresponding workflow. - -#+NAME: peak-detection-plot-ms-data -#+BEGIN_SRC R :ravel message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity." - raw_data %>% - filterRt(rt = rtr) %>% - filterMz(mz = mzr) %>% - plot(type = "XIC") -#+END_SRC - -In the present data there is actually no variation in the m/z values. Usually -one would see the m/z values (lower panel) scatter around the /real/ m/z value of -the compound. The first step of the /centWave/ algorithm defines so called regions -of interest (ROI) based on the difference of m/z values from consecutive -scans. In detail, m/z values from consecutive scans are included into a ROI if -the difference between the m/z and the mean m/z of the ROI is smaller than the -user defined =ppm= parameter. A reasonable choice for the =ppm= could thus be the -maximal m/z difference of data points from neighboring scans/spectra that are -part of the chromatographic peak. It is suggested to inspect the ranges of m/z -values for many compounds (either internal standards or compounds known to be -present in the sample) and define the =ppm= parameter for /centWave/ according to -these. - -Below we perform the chromatographic peak detection using the =findChromPeaks= -method. The submitted /parameter/ object defines which algorithm will be used and -allows to define the settings for this algorithm. Note that we set the argument -=noise= to =1000= to slightly speed up the analysis by considering only signals with -a value larger than 1000 in the peak detection step. - -#+NAME: peak-detection-centwave -#+BEGIN_SRC R :ravel message = FALSE, results = "hide" - cwp <- CentWaveParam(peakwidth = c(30, 80), noise = 1000) - xdata <- findChromPeaks(raw_data, param = cwp) -#+END_SRC - -The results are returned as an =XCMSnExp= object which extends the =OnDiskMSnExp= -object by storing also LC/GC-MS preprocessing results. This means also that all -methods to sub-set and filter the data or to access the (raw) data are inherited -from the =OnDiskMSnExp= object. The results from the chromatographic peak -detection can be accessed with the =chromPeaks= method. - -#+NAME: peak-detection-chromPeaks -#+BEGIN_SRC R :ravel message = FALSE - head(chromPeaks(xdata)) -#+END_SRC - -The returned =matrix= provides the m/z and retention time range for each -identified chromatographic peak as well as the integrated signal intensity -("into") and the maximal peak intensitity ("maxo"). Columns "sample" contains -the index of the sample in the object/experiment in which the peak was -identified. - -Below we use the data from this table to calculate some per-file summaries. - -#+NAME: peak-detection-peaks-per-sample -#+BEGIN_SRC R :ravel message = FALSE, results = "asis" - summary_fun <- function(z) { - c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) - } - T <- lapply(split.data.frame(chromPeaks(xdata), - f = chromPeaks(xdata)[, "sample"]), - FUN = summary_fun) - T <- do.call(rbind, T) - rownames(T) <- basename(fileNames(xdata)) - pandoc.table(T, - caption = paste0("Summary statistics on identified chromatographic", - " peaks. Shown are number of identified peaks per", - " sample and widths/duration of chromatographic ", - "peaks.")) -#+END_SRC - -We can also plot the location of the identified chromatographic peaks in the -m/z - retention time space for one file using the =plotChromPeaks= function. Below -we plot the chromatographic peaks for the 3rd sample. - -#+NAME: peak-detection-chrom-peaks-plot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample." - plotChromPeaks(xdata, file = 3) -#+END_SRC - -To get a global overview of the peak detection we can plot the frequency of -identified peaks per file along the retention time axis. This allows to identify -time periods along the MS run in which a higher number of peaks was identified -and evaluate whether this is consistent across files. - -#+NAME: peak-detection-chrom-peak-image -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file." - plotChromPeakImage(xdata) -#+END_SRC - -Next we highlight the identified chromatographic peaks for the example peak -from before. Evaluating such plots on a list of peaks corresponding to known -peaks or internal standards helps to ensure that peak detection settings were -appropriate and correctly identified the expected peaks. - -#+NAME: peak-detection-highlight-chrom-peaks-plot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The rectangles indicate the identified chromatographic peaks per sample." - plot(chr_raw, col = group_colors[chr_raw$sample_group], lwd = 2) - highlightChromPeaks(xdata, border = group_colors[chr_raw$sample_group], - lty = 3, rt = rtr, mz = mzr) -#+END_SRC - -Note that we can also specifically extract identified chromatographic peaks for -a selected region by providing the respective m/z and retention time ranges with -the =mz= and =rt= arguments in the =chromPeaks= method. - -#+NAME: peak-detection-chrom-peak-table-selected -#+BEGIN_SRC R :ravel message = FALSE, results = "asis" - pander(chromPeaks(xdata, mz = mzr, rt = rtr), - caption = paste("Identified chromatographic peaks in a selected ", - "m/z and retention time range.")) -#+END_SRC - -Finally we plot also the distribution of peak intensity per sample. This allows -to investigate whether systematic differences in peak signals between samples -are present. - -#+NAME: peak-detection-chrom-peak-intensity-boxplot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample." - ## Extract a list of per-sample peak intensities (in log2 scale) - ints <- split(log2(chromPeaks(xdata)[, "into"]), - f = chromPeaks(xdata)[, "sample"]) - boxplot(ints, varwidth = TRUE, col = group_colors[xdata$sample_group], - ylab = expression(log[2]~intensity), main = "Peak intensities") - grid(nx = NA, ny = NULL) -#+END_SRC - -* Alignment - -The time at which analytes elute in the chromatography can vary between samples -(and even compounds). Such a difference was already observable in the extracted -ion chromatogram plot shown as an example in the previous section. The alignment -step, also referred to as retention time correction, aims at adjusting this by -shifting signals along the retention time axis to align the signals between -different samples within an experiment. - -A plethora of alignment algorithms exist (see \cite{Smith:2013gr}), with some of -them being implemented also in =xcms=. The method to perform the -alignment/retention time correction in =xcms= is =adjustRtime= which uses different -alignment algorithms depending on the provided parameter class. In the example -below we use the /obiwarp/ method \cite{Prince:2006jj} to align the samples. We -use a =binSize = 0.6= which creates warping functions in mz bins of 0.6. Also here -it is advisable to modify the settings for each experiment and evaluate if -retention time correction did align internal controls or known compounds -properly. - -#+NAME: alignment-obiwarp -#+BEGIN_SRC R :ravel message = FALSE, results = "hide" - xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6)) -#+END_SRC - -=adjustRtime=, besides calculating adjusted retention times for each spectrum, -does also adjust the reported retention times of the identified chromatographic -peaks. - -To extract the adjusted retention times we can use the =adjustedRtime= method, or -simply the =rtime= method that, if present, returns by default adjusted retention -times from an =XCMSnExp= object. - -#+NAME: alignment-rtime -#+BEGIN_SRC R :ravel message = FALSE - ## Extract adjusted retention times - head(adjustedRtime(xdata)) - - ## Or simply use the rtime method - head(rtime(xdata)) -#+END_SRC - -/Raw/ retention times can be extracted from an =XCMSnExp= containing -aligned data with =rtime(xdata, adjusted = FALSE)=. - -To evaluate the impact of the alignment we plot the BPC on the adjusted data. In -addition we plot the differences of the adjusted- to the raw retention times per -sample using the =plotAdjustedRtime= function. - -#+NAME: alignment-obiwarp-plot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Obiwarp aligned data. Base peak chromatogram after alignment (top) and difference between adjusted and raw retention times along the retention time axis (bottom)." - ## Get the base peak chromatograms. - bpis_adj <- chromatogram(xdata, aggregationFun = "max") - par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) - plot(bpis_adj, col = group_colors[bpis_adj$sample_group]) - ## Plot also the difference of adjusted to raw retention time. - plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) -#+END_SRC - -Too large differences between adjusted and raw retention times could indicate -poorly performing samples or alignment. - -Alternatively we could use the /peak groups/ alignment method that adjusts the -retention time by aligning previously identified /hook peaks/ (chromatographic -peaks present in most/all samples). Ideally, these hook peaks should span most -part of the retention time range. Below we first restore the raw retention times -(also of the identified peaks) using the =dropAdjustedRtime= methods. Note that a -=drop*= method is available for each preprocessing step allowing to remove the -respective results from the =XCMSnExp= object. - -#+NAME: alignment-drop -#+BEGIN_SRC R :ravel message = FALSE - ## Does the object have adjusted retention times? - hasAdjustedRtime(xdata) - - ## Drop the alignment results. - xdata <- dropAdjustedRtime(xdata) - - ## Does the object have adjusted retention times? - hasAdjustedRtime(xdata) -#+END_SRC - -*Note*: =XCMSnExp= objects hold the raw along with the adjusted retention times and -subsetting will in most cases drop the adjusted retention times. Sometimes it -might thus be useful to *replace* the raw retention times with the adjusted -retention times. This can be done with the =applyAdjustedRtime=. - -As noted above the /peak groups/ method requires peak groups (features) present in -most samples to perform the alignment. We thus have to perform a first -correspondence run to identify such peaks (details about the algorithm used are -presented in the next section). We use here again default settings, but it is -strongly advised to adapt the parameters for each data set. The definition of -the sample groups (i.e. assignment of individual samples to the sample groups in -the experiment) is mandatory for the =PeakDensityParam=. If there are no sample -groups in the experiment =sampleGroups= should be set to a single value for each -file (e.g. =rep(1, length(fileNames(xdata))=). - -#+NAME: alignment-peak-groups -#+BEGIN_SRC R :ravel message = FALSE - ## Correspondence: group peaks across samples. - pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.8) - xdata <- groupChromPeaks(xdata, param = pdp) - - ## Now the retention time correction. - pgp <- PeakGroupsParam(minFraction = 0.85) - - ## Get the peak groups that would be used for alignment. - xdata <- adjustRtime(xdata, param = pgp) - -#+END_SRC - -Note also that we could use the =adjustedRtimePeakGroups= method on the object -before alignment to evaluate on which features (peak groups) the alignment would -be performed. This can be useful to test different settings for the peak groups -algorithm. Also, it is possible to manually select or define certain peak groups -(i.e. their retention times per sample) and provide this matrix to the -=PeakGroupsParam= class with the =peakGroupsMatrix= argument. - -Below plot the difference between raw and adjusted retention times -using the =plotAdjustedRtime= function, which, if the /peak groups/ method is used -for alignment, also highlights the peak groups used in the adjustment. - -#+NAME: alignment-peak-groups-plot -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak groups aligned data." - ## Plot the difference of adjusted to raw retention time. - plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group], - peakGroupsCol = "grey", peakGroupsPch = 1) -#+END_SRC - -At last we evaluate the impact of the alignment on the test peak. - -#+NAME: alignment-peak-groups-example-peak -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example extracted ion chromatogram before (top) and after alignment (bottom)." - par(mfrow = c(2, 1)) - ## Plot the raw data - plot(chr_raw, col = group_colors[chr_raw$sample_group]) - - ## Extract the chromatogram from the adjusted object - chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr) - plot(chr_adj, col = group_colors[chr_raw$sample_group]) -#+END_SRC - -* Correspondence - -The final step in the metabolomics preprocessing is the correspondence that -matches detected chromatographic peaks between samples (and depending on the -settings, also within samples if they are adjacent). The method to perform the -correspondence in =xcms= is =groupChromPeaks=. We will use the /peak density/ method -\cite{Smith:2006ic} to group chromatographic peaks. The algorithm combines -chromatographic peaks depending on the density of peaks along the retention time -axis within small slices along the mz dimension. To illustrate this we plot -below the chromatogram for an mz slice with multiple chromatographic peaks -within each sample. We use below a value of 0.4 for the =minFraction= parameter -hence only chromatographic peaks present in at least 40% of the samples per -sample group are grouped into a feature. The sample group assignment is -specified with the =sampleGroups= argument. - -#+NAME: correspondence-example-slice -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example for peak density correspondence. Upper panel: chromatogram for an mz slice with multiple chromatographic peaks. Middle and lower panel: identified chromatographic peaks at their retention time (x-axis) and index within samples of the experiments (y-axis) for different values of the bw parameter. The black line represents the peak density estimate. Grouping of peaks (based on the provided settings) is indicated by grey rectangles." - ## Define the mz slice. - mzr <- c(305.05, 305.15) - - ## Extract and plot the chromatograms - chr_mzr <- chromatogram(xdata, mz = mzr, rt = c(2500, 4000)) - par(mfrow = c(3, 1), mar = c(1, 4, 1, 0.5)) - cols <- group_colors[chr_mzr$sample_group] - plot(chr_mzr, col = cols, xaxt = "n", xlab = "") - ## Highlight the detected peaks in that region. - highlightChromPeaks(xdata, mz = mzr, col = cols, type = "point", pch = 16) - ## Define the parameters for the peak density method - pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 30) - par(mar = c(4, 4, 1, 0.5)) - plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp, - pch = 16, xlim = c(2500, 4000)) - ## Use a different bw - pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 20) - plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp, - pch = 16, xlim = c(2500, 4000)) -#+END_SRC - -The upper panel in the plot above shows the extracted ion chromatogram for each -sample with the detected peaks highlighted. The middle and lower plot shows the -retention time for each detected peak within the different samples. The black -solid line represents the density distribution of detected peaks along the -retention times. Peaks combined into /features/ (peak groups) are indicated with -grey rectangles. Different values for the =bw= parameter of the =PeakDensityParam= -were used: =bw = 30= in the middle and =bw = 20= in the lower panel. With the -default value for the parameter =bw= the two neighboring chromatographic peaks -would be grouped into the same feature, while with a =bw= of 20 they would be -grouped into separate features. This grouping depends on the parameters for the -density function and other parameters passed to the algorithm with the -=PeakDensityParam=. - -#+NAME: correspondence -#+BEGIN_SRC R :ravel message = FALSE - ## Perform the correspondence - pdp <- PeakDensityParam(sampleGroups = xdata$sample_group, - minFraction = 0.4, bw = 20) - xdata <- groupChromPeaks(xdata, param = pdp) -#+END_SRC - -The results from the correspondence can be extracted using the -=featureDefinitions= method, that returns a =DataFrame= with the definition of the -features (i.e. the mz and rt ranges and, in column =peakidx=, the index of the -chromatographic peaks in the =chromPeaks= matrix for each feature). - -#+NAME: correspondence-featureDefs -#+BEGIN_SRC R :ravel message = FALSE - ## Extract the feature definitions - featureDefinitions(xdata) - -#+END_SRC - -The =featureValues= method returns a =matrix= with rows being features and columns -samples. The content of this matrix can be defined using the =value= -argument. Setting =value = "into"= returns a matrix with the integrated signal of -the peaks corresponding to a feature in a sample. Any column name of the -=chromPeaks= matrix can be passed to the argument =value=. Below we extract the -integrated peak intensity per feature/sample. - -#+NAME: correspondence-feature-values -#+BEGIN_SRC R :ravel message = FALSE - ## Extract the into column for each feature. - head(featureValues(xdata, value = "into")) - -#+END_SRC - -This feature matrix contains =NA= for samples in which no chromatographic peak was -detected in the feature's m/z-rt region. While in many cases there might indeed -be no peak signal in the respective region, it might also be that there is -signal, but the peak detection algorithm failed to detect a chromatographic -peak. =xcms= provides the =fillChromPeaks= method to /fill in/ intensity data for such -missing values from the original files. The /filled in/ peaks are added to the -=chromPeaks= matrix and are flagged with an =1= in the ="is_filled"= column. Below we -perform such a filling-in of missing peaks. - -#+NAME: fill-chrom-peaks -#+BEGIN_SRC R :ravel message = FALSE - ## Filling missing peaks using default settings. Alternatively we could - ## pass a FillChromPeaksParam object to the method. - xdata <- fillChromPeaks(xdata) - - head(featureValues(xdata)) - -#+END_SRC - -For features without detected peaks in a sample, the method extracts all -intensities in the mz-rt region of the feature, integrates the signal and adds a -/filled-in/ peak to the =chromPeaks= matrix. No peak is added if no signal is -measured/available for the mz-rt region of the feature. For these, even after -filling in missing peak data, a =NA= is reported in the =featureValues= matrix. - -Below we compare the number of missing values before and after filling in -missing values. We can use the parameter =filled= of the =featureValues= method to -define whether or not filled-in peak values should be returned too. - -#+NAME: fill-chrom-peaks-compare -#+BEGIN_SRC R :results silent :ravel message = FALSE - ## Missing values before filling in peaks - apply(featureValues(xdata, filled = FALSE), MARGIN = 2, - FUN = function(z) sum(is.na(z))) - - ## Missing values after filling in peaks - apply(featureValues(xdata), MARGIN = 2, - FUN = function(z) sum(is.na(z))) - -#+END_SRC - -At last we perform a principal component analysis to evaluate the grouping of -the samples in this experiment. Note that we did not perform any data -normalization hence the grouping might (and will) also be influenced by -technical biases. - -#+NAME: final-pca -#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "PCA for the faahKO data set, un-normalized intensities." - ## Extract the features and log2 transform them - ft_ints <- log2(featureValues(xdata, value = "into")) - - ## Perform the PCA omitting all features with an NA in any of the - ## samples. Also, the intensities are mean centered. - pc <- prcomp(t(na.omit(ft_ints)), center = TRUE) - - ## Plot the PCA - cols <- group_colors[xdata$sample_group] - pcSummary <- summary(pc) - plot(pc$x[, 1], pc$x[,2], pch = 21, main = "", - xlab = paste0("PC1: ", format(pcSummary$importance[2, 1] * 100, - digits = 3), " % variance"), - ylab = paste0("PC2: ", format(pcSummary$importance[2, 2] * 100, - digits = 3), " % variance"), - col = "darkgrey", bg = cols, cex = 2) - grid() - text(pc$x[, 1], pc$x[,2], labels = xdata$sample_name, col = "darkgrey", - pos = 3, cex = 2) - -#+END_SRC - -We can see the expected separation between the KO and WT samples on PC2. On PC1 -samples separate based on their ID, samples with an ID <= 18 from samples with -an ID > 18. This separation might be caused by a technical bias -(e.g. measurements performed on different days/weeks) or due to biological -properties of the mice analyzed (sex, age, litter mates etc). - -* Further data processing and analysis - -Normalizing features' signal intensities is required, but at present not (yet) -supported in =xcms= (some methods might be added in near future). Also, for the -identification of e.g. features with significant different -intensities/abundances it is suggested to use functionality provided in other R -packages, such as Bioconductor's excellent =limma= package. To enable support also -for other packages that rely on the /old/ =xcmsSet= result object, it is possible to -coerce the new =XCMSnExp= object to an =xcmsSet= object using =xset <- as(x, -"xcmsSet")=, with =x= being an =XCMSnExp= object. - -* Additional details and notes - -For a detailed description of the new data objects and changes/improvements -compared to the original user interface see the /new_functionality/ vignette. - -** Evaluating the process history - -=XCMSnExp= objects allow to capture all performed pre-processing steps along with -the used parameter class within the =@processHistory= slot. Storing also the -parameter class ensures the highest possible degree of analysis documentation -and in future might enable to /replay/ analyses or parts of it. The list of all -performed preprocessings can be extracted using the =processHistory= method. - -#+NAME: processhistory -#+BEGIN_SRC R :ravel message = FALSE - processHistory(xdata) -#+END_SRC - -It is also possible to extract specific processing steps by specifying its -type. Available /types/ can be listed with the =processHistoryTypes= function. Below -we extract the parameter class for the alignment/retention time adjustment step. - -#+NAME: processhistory-select -#+BEGIN_SRC R :ravel message = FALSE - ph <- processHistory(xdata, type = "Retention time correction") - - ph -#+END_SRC - -And we can also extract the parameter class used in this preprocessing step. - -#+NAME: processhistory-param -#+BEGIN_SRC R :ravel message = FALSE - ## Access the parameter - processParam(ph[[1]]) - -#+END_SRC - -** Subsetting and filtering - -=XCMSnEx= objects can be subsetted/filtered using the =[= method, or one of the many -=filter*= methods. All these methods aim to ensure that the data in the -returned object is consistent. This means for example that if the object is -subsetted by selecting specific spectra (by using the =[= method) all identified -chromatographic peaks are removed. Correspondence results (i.e. identified -features) are removed if the object is subsetted to contain only data from -selected files (using the =filterFile= method). This is because the correspondence -results depend on the files on which the analysis was performed - running a -correspondence on a subset of the files would lead to different results. - -As an exception, it is possible to force keeping adjusted retention times in the -subsetted object setting the =keepAdjustedRtime= argument to =TRUE= in any of the -subsetting methods. - -Below we subset our results object the data for the files 2 and 4. - -#+NAME: subset-filterFile -#+BEGIN_SRC R :ravel message = FALSE - subs <- filterFile(xdata, file = c(2, 4)) - - ## Do we have identified chromatographic peaks? - hasChromPeaks(subs) -#+END_SRC - -Peak detection is performed separately on each file, thus the subsetted object -contains all identified chromatographic peaks from the two files. However, we -used a retention time adjustment (alignment) that was based on available -features. All features have however been removed and also the adjusted retention -times (since the alignment based on features that were identified on -chromatographic peaks on all files). - -#+NAME: subset-filterFile-2 -#+BEGIN_SRC R :ravel message = FALSE - ## Do we still have features? - hasFeatures(subs) - - ## Do we still have adjusted retention times? - hasAdjustedRtime(subs) -#+END_SRC - -We can however use the =keepAdjustedRtime= argument to force keeping the adjusted -retention times. - -#+NAME: subset-filterFile-3 -#+BEGIN_SRC R :ravel message = FALSE - subs <- filterFile(xdata, keepAdjustedRtime = TRUE) - - hasAdjustedRtime(subs) -#+END_SRC - -The =filterRt= method can be used to subset the object to spectra within a certain -retention time range. - -#+NAME: subset-filterRt -#+BEGIN_SRC R :ravel message = FALSE - subs <- filterRt(xdata, rt = c(3000, 3500)) - - range(rtime(subs)) -#+END_SRC - -Filtering by retention time does not change/affect adjusted retention times -(also, if adjusted retention times are present, the filtering is performed *on* -the adjusted retention times). - -#+NAME: subset-filterRt-2 -#+BEGIN_SRC R :ravel message = FALSE - hasAdjustedRtime(subs) -#+END_SRC - -Also, we have all identified chromatographic peaks within the specified -retention time range: - -#+NAME: subset-filterRt-3 -#+BEGIN_SRC R :ravel message = FALSE - hasChromPeaks(subs) - - range(chromPeaks(subs)[, "rt"]) -#+END_SRC - -The most natural way to subset any object in R is with =[=. Using =[= on an =XCMSnExp= -object subsets it keeping only the selected spectra. The index =i= used in =[= has -thus to be an integer between 1 and the total number of spectra (across all -files). Below we subset =xdata= using both =[= and =filterFile= to keep all spectra -from one file. - -#+NAME: subset-bracket -#+BEGIN_SRC R :ravel message = FALSE, warning = FALSE - ## Extract all data from the 3rd file. - one_file <- filterFile(xdata, file = 3) - - one_file_2 <- xdata[fromFile(xdata) == 3] - - ## Is the content the same? - all.equal(spectra(one_file), spectra(one_file_2)) -#+END_SRC - -While the spectra-content is the same in both objects, =one_file= contains also -the identified chromatographic peaks while =one_file_2= does not. Thus, in most -situations subsetting using one of the filter functions is preferred over the -use of =[=. - -#+NAME: subset-bracket-peaks -#+BEGIN_SRC R :ravel message = FALSE - ## Subsetting with filterFile preserves chromatographic peaks - head(chromPeaks(one_file)) - - ## Subsetting with [ not - head(chromPeaks(one_file_2)) -#+END_SRC - -Note however that also =[= does support the =keepAdjustedRtime= argument. Below we -subset the object to spectra 20:30. - -#+NAME: subset-bracket-keepAdjustedRtime -#+BEGIN_SRC R :ravel message = FALSE, warnings = FALSE - subs <- xdata[20:30, keepAdjustedRtime = TRUE] - - hasAdjustedRtime(subs) - - ## Access adjusted retention times: - rtime(subs) - - ## Access raw retention times: - rtime(subs, adjusted = FALSE) -#+END_SRC - -As with =MSnExp= and =OnDiskMSnExp= objects, =[[= can be used to extract a single -spectrum object from an =XCMSnExp= object. The retention time of the spectrum -corresponds to the adjusted retention time if present. - -#+NAME: subset-double-bracket -#+BEGIN_SRC R :ravel message = FALSE - ## Extract a single spectrum - xdata[[14]] -#+END_SRC - -At last we can also use the =split= method that allows to split an =XCMSnExp= based -on a provided factor =f=. Below we split =xdata= per file. Using =keepAdjustedRtime -= TRUE= ensures that adjusted retention times are not removed. - -#+NAME: subset-split -#+BEGIN_SRC R :ravel message = FALSE - x_list <- split(xdata, f = fromFile(xdata), keepAdjustedRtime = TRUE) - - lengths(x_list) - - lapply(x_list, hasAdjustedRtime) -#+END_SRC - -Note however that there is also a dedicated =splitByFile= method instead for that -operation, that internally uses =filterFile= and hence does e.g. not remove -identified chromatographic peaks. The method does not yet support the -=keepAdjustedRtime= parameter and thus removes by default adjusted retention -times. - -#+NAME: subset-split-by-file -#+BEGIN_SRC R :ravel message = FALSE - xdata_by_file <- splitByFile(xdata, f = factor(1:length(fileNames(xdata)))) - - lapply(xdata_by_file, hasChromPeaks) -#+END_SRC - -** Parallel processing - -Most methods in =xcms= support parallel processing. Parallel processing is handled -and configured by the =BiocParallel= Bioconductor package and can be globally -defined for an R session. - -Unix-based systems (Linux, macOS) support =multicore=-based parallel -processing. To configure it globally we =register= the parameter class. Note also -that =bpstart= is used below to initialize the parallel processes. - -#+NAME: multicore -#+BEGIN_SRC R :ravel message = FALSE, eval = FALSE - register(bpstart(MulticoreParam(2))) -#+END_SRC - -Windows supports only socket-based parallel processing: - -#+NAME: snow -#+BEGIN_SRC R :ravel message = FALSE, eval = FALSE - register(bpstart(SnowParam(2))) -#+END_SRC - -Note that =multicore=-based parallel processing might be buggy or failing on -macOS. If so, the =DoparParam= could be used instead (requiring the =foreach= -package). - -For other options and details see the vignettes from the =BiocParallel= package. - -* Details on chromatographic peak detection methods :noexport: - -** /matchedFilter/ - -The /matched filter/ method was originally described in - -** /centWave/ - - -* References -* TODOs :noexport: - -** DONE General data import and data exploration section. - CLOSED: [2017-10-12 Thu 07:26] - - State "DONE" from "TODO" [2017-10-12 Thu 07:26] -** DONE Feature detection section. - CLOSED: [2017-10-12 Thu 07:26] - - State "DONE" from "TODO" [2017-10-12 Thu 07:26] -** DONE Sample alignment section. - CLOSED: [2017-10-12 Thu 07:26] - - State "DONE" from "TODO" [2017-10-12 Thu 07:26] -** DONE Retention time correction section. - CLOSED: [2017-10-12 Thu 07:26] - - State "DONE" from "TODO" [2017-10-12 Thu 07:26] -** TODO Describe methods more in detail in the appropriate section diff --git a/vignettes/xcmsMSn.Rmd b/vignettes/xcmsMSn.Rmd deleted file mode 100644 index c0df2c3f1..000000000 --- a/vignettes/xcmsMSn.Rmd +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: "Processing Tandem-MS and MSn data with xcms" -author: -- name: S. Neumann -- name: K. Kutzera -package: xcms -output: - BiocStyle::html_document: - toc_float: true -vignette: > - %\VignetteIndexEntry{Processing Tandem-MS and MSn data with xcms} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteKeywords{Mass Spectrometry, MS, Metabolomics, Bioinformatics} - %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,msdata,BiocStyle} ---- - -```{r style, echo = FALSE, results = 'asis'} -BiocStyle::markdown() -``` - -# Introduction - -```{r echo = FALSE, results = "hide", message = FALSE} -library(BiocStyle) -``` - -This document describes how to use `xcms` for processing of Tandem-MS and MS$^{n}$ -data from e.g. triple-quad, QTOF, ion trap or orbitrap mass spectrometers. This -uses still the *old* user interface. - -```{r load-libs, message = FALSE} -library(xcms) -library(msdata) - -## Disable parallel processing -register(SerialParam()) -``` - -# Raw data file preparation - -The `xcms` package reads full-scan LC/MS data and associated Tandem-MS and MS$^{n}$ -spectra from mzData and mzXML files (CDF does not support MS$^{n}$). - -For the purposes of demonstration, we will use a some sample files included in -the `msdata` package. The (truncated) raw data files are contained in the *data* -directory. To access the mzData files, we first locate the *data* directory in -the `msdata` package. - -```{r raw-files} -mzdatapath <- system.file("iontrap", package = "msdata") -list.files(mzdatapath, recursive = TRUE) -``` - -# Accessing, combining and visualising MSn spectra - -The MS$^{n}$ spectra are read into xcms just like plain LC-MS files. Inclusion of -the additional scans has to be explicitely enabled. The xcmsRaw summary shows -the additional content: - -```{r} -library(xcms) -mzdatafiles <- list.files(mzdatapath, pattern = "extracted.mzData", - recursive = TRUE, full.names = TRUE) -xraw <- xcmsRaw(mzdatafiles[1], includeMSn=TRUE) -xraw - -``` - -There is also a new peak picker, which actually returns all parent ions as -annotated in the mzData / mzXML files. A warning will be issued if this is not -present, either because of a poor converter or if `xraw` is read from a CDF -file: - -```{r} -peaks <- findPeaks(xraw, method="MS1") -``` - -By giving larger windows, subsets of spectra can be returned, and these can be -combined to obtain so called compound spectra containing e.g. MS$^{2}$ to -MS$^{5}$. - -# xcmsFragments - -A xcmsFragments-Object contains a Table with informations about all Peaks from -MS1 to MS$^{n}$ of one Experiment, including mz-value, retentiontime, MSlevel, and -parentpeak. The data can be used for visualization or (later) computing -alignments between different Experiments. The object is created as empty and can -be filled with `collect()`. This method requires a peaktable or a xcmsSet for -the MS1peaks and the original xcmsRaw for the msn-Data. - -```{r} -xs <- xcmsSet(mzdatafiles, method = "MS1") -xfrag <- xcmsFragments(xs) -xfrag - -``` - -plotTree prints out a simplyfied tree of the peaks in a `xcmsFragments`. The -user can define borders for the retentionTime and for the MassRange, he can also -print the tree for a single MS1-Peak. PlotTree uses the package `RgraphViz` and -plots the tree text-based if requested or if the package cannot be found. - -Below we simply print the textual representation of the fragment tree. - -```{r} -plotTree(xfrag, xcmsFragmentPeakID = 6, textOnly = TRUE) -``` - diff --git a/xcms.Rproj b/xcms.Rproj new file mode 100644 index 000000000..21a4da087 --- /dev/null +++ b/xcms.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source