Skip to content

Commit

Permalink
Merge pull request #99 from Meredith-Lab/cran-release
Browse files Browse the repository at this point in the history
CRAN release prep
  • Loading branch information
Aariq authored Feb 15, 2024
2 parents 237bb4f + d6cc9b1 commit d550389
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 68 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ README.Rmd
^pkgdown$
^doc$
^Meta$
^cran-comments\.md$
14 changes: 8 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ Authors@R: c(
person("S. Marshall", "Ledford", role = "ctb"),
person("Tamás", "Stirling", role = "ctb")
)
Description: Use this package to calculate estimated relative volatility
index values for organic compounds based on functional group
contributions. Calculation uses the SIMPOL.1 method (Prankow and Asher,
2008) or modified SIMPOL.1 method as in Meredith et al. (2023).
Description: Calculate estimated relative volatility index values for
organic compounds based on functional group contributions. Calculation
uses the SIMPOL.1 method (Prankow and Asher, 2008)
<doi:10.5194/acp-8-2773-2008> or modified SIMPOL.1 method as in
Meredith et al. (2023) <doi:10.5194/acp-8-2773-2008>.
License: MIT + file LICENSE
URL: https://meredith-lab.github.io/volcalc/
BugReports: https://github.com/Meredith-Lab/volcalc/issues
Expand All @@ -27,8 +28,8 @@ Imports:
httr2,
KEGGREST,
magrittr,
rlang,
purrr,
rlang,
stringr,
tibble,
tidyr,
Expand All @@ -39,9 +40,10 @@ Suggests:
rmarkdown,
testthat (>= 3.0.0),
withr
VignetteBuilder:
knitr
biocViews:
Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
7 changes: 5 additions & 2 deletions R/calc_vol.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#'
#' @export
#' @examples
#' \dontrun{
#' mol_paths <- mol_example()
#' calc_vol(mol_paths)
#'
Expand All @@ -48,7 +49,7 @@
#'
#' # Return intermediate calculations
#' calc_vol(mol_paths, return_calc_steps = TRUE)
#'
#' }
calc_vol <-
function(input,
from = c("mol_path", "smiles"),
Expand Down Expand Up @@ -123,7 +124,9 @@ calc_vol <-

#return:
vol_df %>%
dplyr::select(dplyr::all_of(c({{ from }}, "formula", "name", "rvi", "category", cols_fx, cols_calc)))
dplyr::select(dplyr::all_of(c(
{{ from }}, "formula", "name", "rvi", "category", cols_fx, cols_calc)
))

}

29 changes: 20 additions & 9 deletions R/get_fx_groups.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
#' @export
get_fx_groups <- function(compound_sdf) {

# For now at least, this code only works with SDFset objects that contain single molecules.
# For now at least, this code only works with SDFset objects that contain
# single molecules.
# TODO: make this function work with SDFset objects with multiple molecules?
if (length(compound_sdf) != 1) {
stop("SDFset objects must contain a single molecule only")
Expand All @@ -46,8 +47,11 @@ get_fx_groups <- function(compound_sdf) {
rowname <- n <- NULL

#convert counts to integer
groups <- groups %>% dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))
rings <- data.frame(t(ChemmineR::rings(compound_sdf, type = "count", arom = TRUE, inner = TRUE)))
groups <-
groups %>%
dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))
rings <-
data.frame(t(ChemmineR::rings(compound_sdf, type = "count", arom = TRUE, inner = TRUE)))
atoms <- atomcount2tibble(ChemmineR::atomcount(compound_sdf))
carbon_bond_data <- data.frame(ChemmineR::conMA(compound_sdf)[[1]]) %>%
dplyr::select(dplyr::contains("C_")) %>%
Expand Down Expand Up @@ -86,7 +90,9 @@ get_fx_groups <- function(compound_sdf) {

amide_primary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H2]"
amide_secondary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]"
amide_tertiary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]"
amide_tertiary_pattern <-
"[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]"

# amide_total_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]"

carbonylperoxynitrate_pattern <- "*C(=O)OO[N+1](=O)[O-1]"
Expand All @@ -95,12 +101,17 @@ get_fx_groups <- function(compound_sdf) {
carbonylperoxyacid_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][$([OX2H]),$([OX1-])]"
nitroester_pattern <- "C(=O)(OC)C~[NX3](-,=[OX1])-,=[OX1]"
# This captures OH groups on a ring that also has a nitro group (para, ortho, or meta). Need to correct aromatic hydroxyl count later.
nitrophenol_pattern <- "[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]"
phosphoric_acid_pattern <- "[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]"
phosphoric_ester_pattern <- "[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]"
sulfate_pattern <- "[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]"
nitrophenol_pattern <-
"[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]"
phosphoric_acid_pattern <-
"[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]"
phosphoric_ester_pattern <-
"[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]"
sulfate_pattern <-
"[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]"
#sulfonate groups; sulfonate ions, and conjugate acid, sulfonic acids
sulfonate_pattern <- "[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]"
sulfonate_pattern <-
"[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]"
thiol_pattern <- "[#16X2H]"
carbothioester_pattern <- "S([#6])[CX3](=O)[#6]"

Expand Down
20 changes: 13 additions & 7 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ knitr::opts_chunk$set(

<!-- badges: start -->

[![R-CMD-check](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml) [![latest-DOI](https://zenodo.org/badge/425022983.svg)](https://zenodo.org/badge/latestdoi/425022983) [![manuscript-DOI](https://img.shields.io/badge/DOI-10.3389/fmicb.2023.1267234-32a859.svg)](https://doi.org/10.3389/fmicb.2023.1267234) [![Project Status: Active -- The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Codecov test coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master) [![volcalc status badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc)
[![R-CMD-check](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml) [![latest-DOI](https://zenodo.org/badge/425022983.svg)](https://zenodo.org/badge/latestdoi/425022983) [![manuscript-DOI](https://img.shields.io/badge/DOI-10.3389/fmicb.2023.1267234-32a859.svg)](https://doi.org/10.3389/fmicb.2023.1267234) [![Project Status: Active -- The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Codecov test coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master) [![volcalc status badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc) [![CRAN status](https://www.r-pkg.org/badges/version/volcalc)](https://CRAN.R-project.org/package=volcalc)

<!-- badges: end -->

Expand All @@ -26,12 +26,18 @@ knitr::opts_chunk$set(
The `volcalc` package allows you to automate calculating estimates of volatility for chemical compounds.

`volcalc` supports "group contribution" methods for estimating volatility that rely on molecular properties such as molecular weight, numbers of certain atoms, and counts of certain functional groups.
Currently, the only methods implemented are SIMPOL.1 (Pankow & Asher 2008) and a modified version used in Meredith et al. (2023).
Currently, the only methods implemented are SIMPOL.1 ([Pankow & Asher 2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version used in [Meredith et al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).

`volcalc` works with either .mol files or [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) strings as input, and supports downloading .mol files directly from [KEGG](https://www.kegg.jp/).

## Installation

Install from CRAN with

``` r
install.packages("volcalc")
```

You can install the development version of `volcalc` from GitHub with

``` r
Expand Down Expand Up @@ -113,9 +119,9 @@ citation("volcalc")

### References

Pankow, J.F., Asher, W.E., 2008.
SIMPOL.1: a simple group contribution method for predicting vapor pressures and enthalpies of vaporization of multifunctional organic compounds.
Atmos.
Chem.
Phys.
Pankow, J.F., Asher, W.E., 2008. SIMPOL.1: a simple group contribution
method for predicting vapor pressures and enthalpies of vaporization of
multifunctional organic compounds. Atmos. Chem. Phys.
<https://doi.org/10.5194/acp-8-2773-2008>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K., Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating methods for estimating metabolite volatility. Frontiers in Microbiology 14. <https://doi.org/10.3389/fmicb.2023.1267234>
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.re
coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master)
[![volcalc status
badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc)
[![CRAN
status](https://www.r-pkg.org/badges/version/volcalc)](https://CRAN.R-project.org/package=volcalc)

<!-- badges: end -->

Expand All @@ -27,7 +29,9 @@ volatility for chemical compounds.
volatility that rely on molecular properties such as molecular weight,
numbers of certain atoms, and counts of certain functional groups.
Currently, the only methods implemented are SIMPOL.1 ([Pankow & Asher
2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version used in [Meredith et al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).
2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version
used in [Meredith et
al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).

`volcalc` works with either .mol files or
[SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system)
Expand All @@ -36,6 +40,12 @@ strings as input, and supports downloading .mol files directly from

## Installation

Install from CRAN with

``` r
install.packages("volcalc")
```

You can install the development version of `volcalc` from GitHub with

``` r
Expand Down Expand Up @@ -171,5 +181,7 @@ method for predicting vapor pressures and enthalpies of vaporization of
multifunctional organic compounds. Atmos. Chem. Phys.
<https://doi.org/10.5194/acp-8-2773-2008>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K., Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating methods for estimating metabolite volatility. Frontiers in Microbiology 14. <https://doi.org/10.3389/fmicb.2023.1267234>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K.,
Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating
methods for estimating metabolite volatility. Frontiers in Microbiology
14. <https://doi.org/10.3389/fmicb.2023.1267234>
13 changes: 13 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## R CMD check results

0 errors | 0 warnings | 1 note

* This is a new release.

The note on check is:

>Package has a FOSS license but eventually depends on the following
> package which may restrict use:
> ChemmineOB
ChemmineOB is an R package with the [Artistic-2.0 license](https://github.com/girke-lab/ChemmineOB/blob/master/LICENSE), which does appear to be FOSS. Other R packages
3 changes: 2 additions & 1 deletion man/calc_vol.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/volcalc-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 22 additions & 10 deletions tests/testthat/test-calc_vol.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
test_that("volatility estimate is correct for example compound for entire workflow", {
test_that("volatility estimate is correct", {
ex_vol_df <- calc_vol("data/C16181.mol")
expect_equal(round(ex_vol_df$rvi, 6), 6.975349)
})

test_that("returns correct columns depending on return arguments", {
just_vol <- calc_vol("data/C16181.mol")
with_fx <- calc_vol("data/C16181.mol", return_fx_groups = TRUE)
with_fx_steps <- calc_vol("data/C16181.mol", return_fx_groups = TRUE, return_calc_steps = TRUE)
expect_setequal(colnames(just_vol), c("mol_path", "formula", "name", "rvi", "category"))
with_fx_steps <-
calc_vol("data/C16181.mol",
return_fx_groups = TRUE,
return_calc_steps = TRUE)
expect_setequal(colnames(just_vol),
c("mol_path", "formula", "name", "rvi", "category"))
# just some examples here
expect_contains(colnames(with_fx), c(colnames(just_vol), "carbons", "carbothioesters", "fluorines"))
expect_contains(colnames(with_fx_steps), c(colnames(with_fx), "molecular_weight", "log_alpha", "log10_P"))
expect_contains(colnames(with_fx),
c(colnames(just_vol), "carbons", "carbothioesters", "fluorines"))
expect_contains(colnames(with_fx_steps),
c(colnames(with_fx), "molecular_weight", "log_alpha", "log10_P"))
})

test_that("calc_vol() works with multiple inputs", {
Expand All @@ -21,11 +27,15 @@ test_that("calc_vol() works with multiple inputs", {
})

test_that("smiles and .mol give same results", {
paths <- c("data/C16181.mol", "data/map00361/C00011.mol", "data/map00361/C00042.mol")
smiles <- c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
paths <-
c("data/C16181.mol",
"data/map00361/C00011.mol",
"data/map00361/C00042.mol")
smiles <-
c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
expect_equal(
calc_vol(smiles, from = "smiles") %>% dplyr::select(-name, -smiles),
calc_vol(paths) %>% dplyr::select(-name, -mol_path)
calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles),
calc_vol(paths) %>% dplyr::select(-name,-mol_path)
)
})

Expand All @@ -36,7 +46,9 @@ test_that("errors with invalid SMILES", {
test_that("meredith and original method give different results", {
#thiol and sulfonate groups, respectively
# paths <- c(test_path("data/C00409.mol"), test_path("data/C03349.mol"))
smiles <- c("Methanethiol" = "SC", "Methyl methanesulfonate" = "COS(=O)(=O)C")
smiles <-
c("Methanethiol" = "SC",
"Methyl methanesulfonate" = "COS(=O)(=O)C")
meredith <- calc_vol(smiles, from = "smiles", method = "meredith")
simpol <- calc_vol(smiles, from = "smiles", method = "simpol1")
expect_true(all(meredith$rvi < simpol$rvi))
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-get_fx_groups.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ test_that("SMARTS strings are correct", {
expected <- test_compounds %>% dplyr::select(smiles, dplyr::all_of(common_cols))
actual <- test_fx_groups %>% dplyr::select(smiles, dplyr::all_of(common_cols))

# compare but ignore NAs in expected, by just overwriting them with values in actual using rows_patch()
# compare but ignore NAs in expected, by just overwriting them with values in
# actual using rows_patch()
expect_equal(
actual,
dplyr::rows_patch(expected, actual)
Expand Down
Loading

0 comments on commit d550389

Please sign in to comment.