Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove dependencies #42

Merged
merged 14 commits into from
Mar 13, 2024
7 changes: 1 addition & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,20 @@ License: MIT + file LICENSE
URL: https://bedapub.github.io/designit/, https://github.com/BEDApub/designit/
BugReports: https://github.com/BEDApub/designit/issues
Depends:
R (>= 3.5.0)
R (>= 4.1.0)
Imports:
magrittr,
rlang (>= 0.4.0),
dplyr (>= 1.0.0),
purrr,
ggplot2,
scales,
tibble,
tidyselect,
tidyr,
assertthat,
stringr,
R6,
ineq,
data.table,
glue,
forcats,
checkmate,
stats
Suggests:
testthat,
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(BatchContainer)
export(BatchContainerDimension)
export(L1_norm)
Expand Down Expand Up @@ -34,6 +33,5 @@ export(shuffle_with_subgroup_formation)
export(sum_scores)
export(worst_score)
import(R6, except = getNamespaceExports("R6"))
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(stats,na.omit)
4 changes: 2 additions & 2 deletions R/all_equal_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ all_equal_df <- function(df1, df2) {
df2 <- df2[colnames(df1)]

# convert factors to characters
df1 <- df1 %>%
df1 <- df1 |>
dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character))
df2 <- df2 %>%
df2 <- df2 |>
dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character))

# order by all columns
Expand Down
8 changes: 4 additions & 4 deletions R/assignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ assign_from_table <- function(batch_container, samples) {
)
location_columns <- batch_container$dimension_names
sample_columns <- setdiff(colnames(samples), batch_container$dimension_names)
only_samples <- samples[sample_columns] %>%
only_samples <- samples[sample_columns] |>
# remove all-NA rows, i.e. unassigned locations
dplyr::filter(!dplyr::if_all(tidyselect::everything(), is.na))
dplyr::filter(!dplyr::if_all(dplyr::everything(), is.na))
if (is.null(batch_container$samples)) {
batch_container$samples <- only_samples
} else {
Expand All @@ -171,8 +171,8 @@ assign_from_table <- function(batch_container, samples) {
)) == 0,
msg = "sample sheed has locations not available in the batch container"
)
samples_with_id <- batch_container$get_locations() %>%
dplyr::left_join(samples, by = location_columns) %>%
samples_with_id <- batch_container$get_locations() |>
dplyr::left_join(samples, by = location_columns) |>
dplyr::left_join(batch_container$samples, by = sample_columns)

batch_container$move_samples(location_assignment = samples_with_id$.sample_id)
Expand Down
44 changes: 22 additions & 22 deletions R/batch_container.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ validate_samples <- function(samples) {
msg = "Samples should have at least one row"
)

assertthat::assert_that(nrow(dplyr::filter(samples, dplyr::if_all(tidyselect::everything(), is.na))) == 0,
assertthat::assert_that(nrow(dplyr::filter(samples, dplyr::if_all(dplyr::everything(), is.na))) == 0,
msg = "Samples contain all-NA rows"
)
}
Expand Down Expand Up @@ -68,10 +68,10 @@ locations_table_from_dimensions <- function(dimensions, exclude) {
msg = "duplicated dimension names"
)

ldf <- dimensions %>%
purrr::map(~ .x$values) %>%
expand.grid() %>%
dplyr::arrange(dplyr::across(dplyr::everything())) %>%
ldf <- dimensions |>
purrr::map(~ .x$values) |>
expand.grid() |>
dplyr::arrange(dplyr::across(dplyr::everything())) |>
tibble::as_tibble()

if (!is.null(exclude)) {
Expand All @@ -84,7 +84,7 @@ locations_table_from_dimensions <- function(dimensions, exclude) {
msg = "Columns of exclude should match dimensions"
)

exclude <- exclude[, names(dimensions), drop = FALSE] %>%
exclude <- exclude[, names(dimensions), drop = FALSE] |>
dplyr::mutate(
dplyr::across(where(is.numeric), as.integer),
dplyr::across(where(is.factor), as.character)
Expand Down Expand Up @@ -232,7 +232,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
private$samples_dt_cache <- cbind(
self$get_locations(),
private$samples_table[private$assignment_vector, ]
) %>%
) |>
data.table::as.data.table()
}
res <- data.table::copy(private$samples_dt_cache)
Expand Down Expand Up @@ -383,7 +383,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
}
v
}
) %>%
) |>
purrr::flatten_dbl()
assertthat::assert_that(length(res) >= length(scoring))
assertthat::assert_that(
Expand All @@ -403,7 +403,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
# autogenerated documentation (via roxygen2)
bc <- BatchContainer$new(private$locations_df)
if (!is.null(self$samples)) {
bc$samples <- self$samples %>%
bc$samples <- self$samples |>
dplyr::select(-.sample_id)
}
if (!is.null(self$assignment)) {
Expand Down Expand Up @@ -438,8 +438,8 @@ BatchContainer <- R6::R6Class("BatchContainer",
.trim = FALSE
))
cat(" Dimensions: ")
self$dimension_names %>%
stringr::str_c(collapse = ", ") %>%
self$dimension_names |>
stringr::str_c(collapse = ", ") |>
cat()
cat("\n")
invisible(self)
Expand Down Expand Up @@ -477,27 +477,27 @@ BatchContainer <- R6::R6Class("BatchContainer",
rlang::is_integerish(index),
msg = "index should be an integer"
)
d <- self$trace %>%
dplyr::filter(.data$optimization_index %in% index) %>%
dplyr::select(.data$optimization_index, .data$scores) %>%
tidyr::unnest(.data$scores) %>%
d <- self$trace |>
dplyr::filter(.data$optimization_index %in% index) |>
dplyr::select(.data$optimization_index, .data$scores) |>
tidyr::unnest(.data$scores) |>
tidyr::pivot_longer(c(-.data$optimization_index, -.data$step),
names_to = "score",
values_to = "value") %>%
values_to = "value") |>
dplyr::mutate(aggregated = FALSE)
if (include_aggregated) {
d_agg <- self$trace %>%
dplyr::filter(.data$optimization_index %in% index) %>%
dplyr::select(.data$optimization_index, .data$aggregated_scores) %>%
d_agg <- self$trace |>
dplyr::filter(.data$optimization_index %in% index) |>
dplyr::select(.data$optimization_index, .data$aggregated_scores) |>
tidyr::unnest(.data$aggregated_scores)

if ("step" %in% colnames(d_agg)) {
# if no aggregated scores are provided (aggregated_scores=NULL),
# there will be no step column after unnesting
d_agg <- d_agg %>%
d_agg <- d_agg |>
tidyr::pivot_longer(c(-.data$optimization_index, -.data$step),
names_to = "score",
values_to = "value") %>%
values_to = "value") |>
dplyr::mutate(
aggregated = TRUE,
score = paste0("agg.", .data$score)
Expand All @@ -518,7 +518,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
#' @param ... not used.
#' @return a [ggplot2::ggplot()] object
plot_trace = function(index = NULL, include_aggregated = FALSE, ...) {
d <- self$scores_table(index, include_aggregated) %>%
d <- self$scores_table(index, include_aggregated) |>
dplyr::mutate(
agg_title = dplyr::if_else(.data$aggregated, "aggregated", "score")
)
Expand Down
2 changes: 1 addition & 1 deletion R/formula_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @examples
generate_terms <- function(.tbl, ...) {
if (!tibble::is_tibble(.tbl)) .tbl <- tibble::as_tibble(.tbl)
.tbl <- dplyr::select(.tbl, tidyselect::everything(), ...)
.tbl <- dplyr::select(.tbl, dplyr::everything(), ...)
n <- nrow(.tbl)
m <- ncol(.tbl)
form <- paste("~ 1 + .")
Expand Down
4 changes: 2 additions & 2 deletions R/optimize.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ optimize_design <- function(batch_container, samples = NULL,
iteration <- 1
using_attributes <- FALSE # keeps track if attributes had been used in 1st iteration, since they must be provided consistently

shuffle_params <- shuffle_proposal_func(batch_container, iteration) %>%
shuffle_params <- shuffle_proposal_func(batch_container, iteration) |>
extract_shuffle_params(attributes_expected = FALSE)

# Always perform first shuffling before scoring the bc; works also if attributes are added at this stage
Expand Down Expand Up @@ -392,7 +392,7 @@ optimize_design <- function(batch_container, samples = NULL,

if (iteration <= max_iter) {
# only call shuffle_proposal_func in case we have more iterations
shuffle_params <- shuffle_proposal_func(batch_container, iteration) %>%
shuffle_params <- shuffle_proposal_func(batch_container, iteration) |>
extract_shuffle_params(attributes_expected = using_attributes)
}
}
Expand Down
12 changes: 8 additions & 4 deletions R/osat.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ osat_score <- function(bc, batch_vars, feature_vars, expected_dt = NULL, quiet =
special_col_names <- c(".n_batch", ".batch_freq", ".n_samples")
special_col_names_str <- stringr::str_c(special_col_names, collapse = ", ")
assertthat::assert_that(length(intersect(special_col_names, colnames(df))) == 0,
msg = glue::glue("special names ({special_col_names_str}) cannot be used as column names")
msg = stringr::str_glue(
"special names ({special_col_names_str}) cannot be used as column names"
)
)
if (is.null(expected_dt)) {
batch_df <- df[, .(.n_batch = .N), by = batch_vars]
Expand Down Expand Up @@ -86,12 +88,14 @@ osat_score <- function(bc, batch_vars, feature_vars, expected_dt = NULL, quiet =
data.table::setkeyv(expected_dt, c(batch_vars, feature_vars))
} else {
assertthat::assert_that(is.data.frame(expected_dt) && nrow(expected_dt) > 0)
expected_colnames <- c(feature_vars, batch_vars) %>%
c(".n_expected") %>%
expected_colnames <- c(feature_vars, batch_vars) |>
c(".n_expected") |>
sort()
expected_colnames_str <- stringr::str_c(expected_colnames, collapse = ", ")
assertthat::assert_that(all(sort(colnames(expected_dt)) == expected_colnames),
msg = glue::glue("expecting column names in expected_dt: {expected_colnames_str}")
msg = stringr::str_glue(
"expecting column names in expected_dt: {expected_colnames_str}"
)
)
}
sample_count_df <- na.omit(df[, .N, by = c(feature_vars, batch_vars)])
Expand Down
14 changes: 7 additions & 7 deletions R/permute_subgroups.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ form_homogeneous_subgroups <- function(batch_container, allocate_var, keep_toget
# The allocation variable must have factor levels in a given order!
# This is important later on for sample swapping as it has to be known which factor level corresponds to 'group 1' etc
if (!inherits(allocate_fac, "factor")) {
allocate_fac <- forcats::as_factor(allocate_fac)
allocate_fac <- factor(allocate_fac, levels = unique(allocate_fac))
}

assertthat::assert_that(length(keep_together_vars) > 0, msg = "Function can only help if vector of 'keep_together_vars' is specified.")
Expand Down Expand Up @@ -104,12 +104,12 @@ form_homogeneous_subgroups <- function(batch_container, allocate_var, keep_toget
}

# Group sample list by relevant variables
grouped_samples <- dplyr::group_by(samples, dplyr::across(tidyselect::all_of(use_vars)))
grouped_samples <- dplyr::group_by(samples, dplyr::across(dplyr::all_of(use_vars)))

# Determine sizes of the subgroups and store in list; name elements by levels of the involved grouping variables
subgroup_sizes <- purrr::map(dplyr::group_size(grouped_samples), ~ best_group_sizes(.x, n_min, n_max, n_ideal, prefer_big_groups))
names(subgroup_sizes) <- dplyr::group_keys(grouped_samples) %>%
tidyr::unite(col = "keys", sep = "/") %>%
names(subgroup_sizes) <- dplyr::group_keys(grouped_samples) |>
tidyr::unite(col = "keys", sep = "/") |>
dplyr::pull(1)

assertthat::assert_that(!strict || (min(unlist(subgroup_sizes)) >= n_min && max(unlist(subgroup_sizes)) <= n_max),
Expand Down Expand Up @@ -294,7 +294,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation

# Index vector to assign group allocation to sample dataframe in its original order.
# Split up according to group structure to allow easy permutation on the subgroup level
sample_vec <- order(dplyr::group_indices(subgroup_object$Grouped_Samples)) %>%
sample_vec <- order(dplyr::group_indices(subgroup_object$Grouped_Samples)) |>
split(f = group_vec)

# Order of the allocation variable in the batch container may be arbitrary and not strictly sorted;
Expand Down Expand Up @@ -330,7 +330,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation

# Create a random permutation within (!) all the groups
# This means that only 'equivalent' items are swapped around and get assigned to new subgroups every time
rand_index <- purrr::map(sample_vec, my_sample) %>% unlist(use.names = F)
rand_index <- purrr::map(sample_vec, my_sample) |> unlist(use.names = F)

# Check if keep_separate_vars constraints are fulfilled within the randomized subgroups
# Otherwise, create new permutations and increment number of tolerated violations every 1000 unsuccessful attempts
Expand All @@ -350,7 +350,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation
"Increasing number of tolerated violations to ", sep_fails_tolerance
)
}
rand_index <- purrr::map(sample_vec, my_sample) %>% unlist(use.names = F)
rand_index <- purrr::map(sample_vec, my_sample) |> unlist(use.names = F)
}
}

Expand Down
32 changes: 16 additions & 16 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
plot_design <- function(.tbl, ..., .color, .alpha = NULL) {
# generate vars
vars <- rlang::enquos(...)
combinations <- .tbl %>%
dplyr::mutate(combinations = interaction(!!!vars, lex.order = T)) %>%
combinations <- .tbl |>
dplyr::mutate(combinations = interaction(!!!vars, lex.order = T)) |>
dplyr::pull(combinations)
g <- ggplot2::ggplot(.tbl) +
ggplot2::aes(
Expand All @@ -22,9 +22,9 @@ plot_design <- function(.tbl, ..., .color, .alpha = NULL) {
ggplot2::geom_histogram(stat = "count") +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
if (!rlang::quo_is_null(rlang::enquo(.alpha))) {
alpha_levels <- .tbl %>%
dplyr::pull({{ .alpha }}) %>%
unique() %>%
alpha_levels <- .tbl |>
dplyr::pull({{ .alpha }}) |>
unique() |>
length()
alpha_range <- c(1 / min(5, alpha_levels), 1)
g <- g +
Expand Down Expand Up @@ -100,11 +100,11 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
Pattern <- NULL

if (add_excluded) {
assertthat::assert_that(checkmate::test_r6(.tbl, "BatchContainer"))
assertthat::assert_that(R6::is.R6(.tbl) && inherits(.tbl, "BatchContainer"))
excluded <- .tbl$exclude
}

if (checkmate::test_r6(.tbl, "BatchContainer")) {
if (R6::is.R6(.tbl) && inherits(.tbl, "BatchContainer")) {
.tbl <- .tbl$get_samples()
} else {
assertthat::assert_that(is.data.frame(.tbl))
Expand All @@ -124,20 +124,20 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
warning("Please install ggpattern to use patterns in the plot")
} else {
assertthat::assert_that(assertthat::has_name(.tbl, rlang::as_name(rlang::enquo(.pattern))))
.tbl <- .tbl %>%
dplyr::mutate(Pattern = forcats::as_factor({{ .pattern }}))
.tbl <- .tbl |>
dplyr::mutate(Pattern = as.factor({{ .pattern }}))
}
}
# If there is no plate,
if (rlang::quo_is_null(rlang::enquo(plate)) ||
!assertthat::has_name(.tbl, rlang::as_name(rlang::enquo(plate)))) {
# check if row + column is unique
assertthat::assert_that(
(.tbl %>% dplyr::count({{ column }}, {{ row }}) %>% nrow()) == nrow(.tbl),
(.tbl |> dplyr::count({{ column }}, {{ row }}) |> nrow()) == nrow(.tbl),
msg = "Non-unique row + column combination found. Please provide a plate variable."
)
# make a fake plate variable
.tbl <- .tbl %>%
.tbl <- .tbl |>
dplyr::mutate(plate = 1)
plate <- rlang::sym("plate")
}
Expand All @@ -151,11 +151,11 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
.tbl <- dplyr::bind_rows(.tbl, excluded)
}

.tbl <- .tbl %>%
.tbl <- .tbl |>
dplyr::mutate(
plate = forcats::as_factor({{ plate }}),
column = forcats::as_factor({{ column }}),
row = forcats::as_factor({{ row }})
plate = as.factor({{ plate }}),
column = as.factor({{ column }}),
row = as.factor({{ row }})
)

# make plot
Expand All @@ -169,7 +169,7 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
# scale alpha
.alpha <- rlang::enquo(.alpha)
if (!rlang::quo_is_null(.alpha)) {
alpha_var <- .tbl %>%
alpha_var <- .tbl |>
dplyr::pull(!!.alpha)
alpha_levels <- unique(alpha_var)
if (is.numeric(alpha_var) && length(alpha_levels > 7)) {
Expand Down
Loading
Loading