bedapub · idavydov · Mar 13, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,25 +39,20 @@ License: MIT + file LICENSE
 URL: https://bedapub.github.io/designit/, https://github.com/BEDApub/designit/
 BugReports: https://github.com/BEDApub/designit/issues
 Depends:
-    R (>= 3.5.0)
+    R (>= 4.1.0)
 Imports: 
-    magrittr,
     rlang (>= 0.4.0),
     dplyr (>= 1.0.0),
     purrr,
     ggplot2,
     scales,
     tibble,
-    tidyselect,
     tidyr,
     assertthat,
     stringr,
     R6,
     ineq,
     data.table,
-    glue,
-    forcats,
-    checkmate,
     stats
 Suggests: 
     testthat,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-export("%>%")
 export(BatchContainer)
 export(BatchContainerDimension)
 export(L1_norm)
@@ -34,6 +33,5 @@ export(shuffle_with_subgroup_formation)
 export(sum_scores)
 export(worst_score)
 import(R6, except = getNamespaceExports("R6"))
-importFrom(magrittr,"%>%")
 importFrom(rlang,.data)
 importFrom(stats,na.omit)
diff --git a/R/all_equal_df.R b/R/all_equal_df.R
@@ -25,9 +25,9 @@ all_equal_df <- function(df1, df2) {
   df2 <- df2[colnames(df1)]
 
   # convert factors to characters
-  df1 <- df1 %>%
+  df1 <- df1 |>
     dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character))
-  df2 <- df2 %>%
+  df2 <- df2 |>
     dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character))
 
   # order by all columns

diff --git a/R/assignment.R b/R/assignment.R
@@ -152,9 +152,9 @@ assign_from_table <- function(batch_container, samples) {
   )
   location_columns <- batch_container$dimension_names
   sample_columns <- setdiff(colnames(samples), batch_container$dimension_names)
-  only_samples <- samples[sample_columns] %>%
+  only_samples <- samples[sample_columns] |>
     # remove all-NA rows, i.e. unassigned locations
-    dplyr::filter(!dplyr::if_all(tidyselect::everything(), is.na))
+    dplyr::filter(!dplyr::if_all(dplyr::everything(), is.na))
   if (is.null(batch_container$samples)) {
     batch_container$samples <- only_samples
   } else {
@@ -171,8 +171,8 @@ assign_from_table <- function(batch_container, samples) {
   )) == 0,
   msg = "sample sheed has locations not available in the batch container"
   )
-  samples_with_id <- batch_container$get_locations() %>%
-    dplyr::left_join(samples, by = location_columns) %>%
+  samples_with_id <- batch_container$get_locations() |>
+    dplyr::left_join(samples, by = location_columns) |>
     dplyr::left_join(batch_container$samples, by = sample_columns)
 
   batch_container$move_samples(location_assignment = samples_with_id$.sample_id)

diff --git a/R/batch_container.R b/R/batch_container.R
@@ -14,7 +14,7 @@ validate_samples <- function(samples) {
     msg = "Samples should have at least one row"
   )
 
-  assertthat::assert_that(nrow(dplyr::filter(samples, dplyr::if_all(tidyselect::everything(), is.na))) == 0,
+  assertthat::assert_that(nrow(dplyr::filter(samples, dplyr::if_all(dplyr::everything(), is.na))) == 0,
     msg = "Samples contain all-NA rows"
   )
 }
@@ -68,10 +68,10 @@ locations_table_from_dimensions <- function(dimensions, exclude) {
     msg = "duplicated dimension names"
   )
 
-  ldf <- dimensions %>%
-    purrr::map(~ .x$values) %>%
-    expand.grid() %>%
-    dplyr::arrange(dplyr::across(dplyr::everything())) %>%
+  ldf <- dimensions |>
+    purrr::map(~ .x$values) |>
+    expand.grid() |>
+    dplyr::arrange(dplyr::across(dplyr::everything())) |>
     tibble::as_tibble()
 
   if (!is.null(exclude)) {
@@ -84,7 +84,7 @@ locations_table_from_dimensions <- function(dimensions, exclude) {
       msg = "Columns of exclude should match dimensions"
     )
 
-    exclude <- exclude[, names(dimensions), drop = FALSE] %>%
+    exclude <- exclude[, names(dimensions), drop = FALSE] |>
       dplyr::mutate(
         dplyr::across(where(is.numeric), as.integer),
         dplyr::across(where(is.factor), as.character)
@@ -232,7 +232,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
           private$samples_dt_cache <- cbind(
             self$get_locations(),
             private$samples_table[private$assignment_vector, ]
-          ) %>%
+          ) |>
             data.table::as.data.table()
         }
         res <- data.table::copy(private$samples_dt_cache)
@@ -383,7 +383,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
           }
           v
         }
-      ) %>%
+      ) |>
         purrr::flatten_dbl()
       assertthat::assert_that(length(res) >= length(scoring))
       assertthat::assert_that(
@@ -403,7 +403,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
       # autogenerated documentation (via roxygen2)
       bc <- BatchContainer$new(private$locations_df)
       if (!is.null(self$samples)) {
-        bc$samples <- self$samples %>%
+        bc$samples <- self$samples |>
           dplyr::select(-.sample_id)
       }
       if (!is.null(self$assignment)) {
@@ -438,8 +438,8 @@ BatchContainer <- R6::R6Class("BatchContainer",
         .trim = FALSE
       ))
       cat("  Dimensions: ")
-      self$dimension_names %>%
-        stringr::str_c(collapse = ", ") %>%
+      self$dimension_names |>
+        stringr::str_c(collapse = ", ") |>
         cat()
       cat("\n")
       invisible(self)
@@ -477,27 +477,27 @@ BatchContainer <- R6::R6Class("BatchContainer",
         rlang::is_integerish(index),
         msg = "index should be an integer"
       )
-      d <- self$trace %>%
-        dplyr::filter(.data$optimization_index %in% index) %>%
-        dplyr::select(.data$optimization_index, .data$scores) %>%
-        tidyr::unnest(.data$scores) %>%
+      d <- self$trace |>
+        dplyr::filter(.data$optimization_index %in% index) |>
+        dplyr::select(.data$optimization_index, .data$scores) |>
+        tidyr::unnest(.data$scores) |>
         tidyr::pivot_longer(c(-.data$optimization_index, -.data$step),
                             names_to = "score",
-                            values_to = "value") %>%
+                            values_to = "value") |>
         dplyr::mutate(aggregated = FALSE)
       if (include_aggregated) {
-        d_agg <- self$trace %>%
-          dplyr::filter(.data$optimization_index %in% index) %>%
-          dplyr::select(.data$optimization_index, .data$aggregated_scores) %>%
+        d_agg <- self$trace |>
+          dplyr::filter(.data$optimization_index %in% index) |>
+          dplyr::select(.data$optimization_index, .data$aggregated_scores) |>
           tidyr::unnest(.data$aggregated_scores)
 
         if ("step" %in% colnames(d_agg)) {
           # if no aggregated scores are provided (aggregated_scores=NULL),
           # there will be no step column after unnesting
-          d_agg <- d_agg %>%
+          d_agg <- d_agg |>
             tidyr::pivot_longer(c(-.data$optimization_index, -.data$step),
                                 names_to = "score",
-                                values_to = "value") %>%
+                                values_to = "value") |>
             dplyr::mutate(
               aggregated = TRUE,
               score = paste0("agg.", .data$score)
@@ -518,7 +518,7 @@ BatchContainer <- R6::R6Class("BatchContainer",
     #' @param ... not used.
     #' @return a [ggplot2::ggplot()] object
     plot_trace = function(index = NULL, include_aggregated = FALSE, ...) {
-      d <- self$scores_table(index, include_aggregated) %>%
+      d <- self$scores_table(index, include_aggregated) |>
         dplyr::mutate(
           agg_title = dplyr::if_else(.data$aggregated, "aggregated", "score")
         )

diff --git a/R/formula_utils.R b/R/formula_utils.R
@@ -10,7 +10,7 @@
 #' @examples
 generate_terms <- function(.tbl, ...) {
   if (!tibble::is_tibble(.tbl)) .tbl <- tibble::as_tibble(.tbl)
-  .tbl <- dplyr::select(.tbl, tidyselect::everything(), ...)
+  .tbl <- dplyr::select(.tbl, dplyr::everything(), ...)
   n <- nrow(.tbl)
   m <- ncol(.tbl)
   form <- paste("~ 1 + .")

diff --git a/R/optimize.R b/R/optimize.R
@@ -257,7 +257,7 @@ optimize_design <- function(batch_container, samples = NULL,
   iteration <- 1
   using_attributes <- FALSE # keeps track if attributes had been used in 1st iteration, since they must be provided consistently
 
-  shuffle_params <- shuffle_proposal_func(batch_container, iteration) %>%
+  shuffle_params <- shuffle_proposal_func(batch_container, iteration) |>
     extract_shuffle_params(attributes_expected = FALSE)
 
   # Always perform first shuffling before scoring the bc; works also if attributes are added at this stage
@@ -392,7 +392,7 @@ optimize_design <- function(batch_container, samples = NULL,
 
     if (iteration <= max_iter) {
       # only call shuffle_proposal_func in case we have more iterations
-      shuffle_params <- shuffle_proposal_func(batch_container, iteration) %>%
+      shuffle_params <- shuffle_proposal_func(batch_container, iteration) |>
         extract_shuffle_params(attributes_expected = using_attributes)
     }
   }

diff --git a/R/osat.R b/R/osat.R
@@ -58,7 +58,9 @@ osat_score <- function(bc, batch_vars, feature_vars, expected_dt = NULL, quiet =
   special_col_names <- c(".n_batch", ".batch_freq", ".n_samples")
   special_col_names_str <- stringr::str_c(special_col_names, collapse = ", ")
   assertthat::assert_that(length(intersect(special_col_names, colnames(df))) == 0,
-    msg = glue::glue("special names ({special_col_names_str}) cannot be used as column names")
+    msg = stringr::str_glue(
+      "special names ({special_col_names_str}) cannot be used as column names"
+    )
   )
   if (is.null(expected_dt)) {
     batch_df <- df[, .(.n_batch = .N), by = batch_vars]
@@ -86,12 +88,14 @@ osat_score <- function(bc, batch_vars, feature_vars, expected_dt = NULL, quiet =
     data.table::setkeyv(expected_dt, c(batch_vars, feature_vars))
   } else {
     assertthat::assert_that(is.data.frame(expected_dt) && nrow(expected_dt) > 0)
-    expected_colnames <- c(feature_vars, batch_vars) %>%
-      c(".n_expected") %>%
+    expected_colnames <- c(feature_vars, batch_vars) |>
+      c(".n_expected") |>
       sort()
     expected_colnames_str <- stringr::str_c(expected_colnames, collapse = ", ")
     assertthat::assert_that(all(sort(colnames(expected_dt)) == expected_colnames),
-      msg = glue::glue("expecting column names in expected_dt: {expected_colnames_str}")
+      msg = stringr::str_glue(
+        "expecting column names in expected_dt: {expected_colnames_str}"
+      )
     )
   }
   sample_count_df <- na.omit(df[, .N, by = c(feature_vars, batch_vars)])

diff --git a/R/permute_subgroups.R b/R/permute_subgroups.R
@@ -35,7 +35,7 @@ form_homogeneous_subgroups <- function(batch_container, allocate_var, keep_toget
   # The allocation variable must have factor levels in a given order!
   # This is important later on for sample swapping as it has to be known which factor level corresponds to 'group 1' etc
   if (!inherits(allocate_fac, "factor")) {
-    allocate_fac <- forcats::as_factor(allocate_fac)
+    allocate_fac <- factor(allocate_fac, levels = unique(allocate_fac))
   }
 
   assertthat::assert_that(length(keep_together_vars) > 0, msg = "Function can only help if vector of 'keep_together_vars' is specified.")
@@ -104,12 +104,12 @@ form_homogeneous_subgroups <- function(batch_container, allocate_var, keep_toget
   }
 
   # Group sample list by relevant variables
-  grouped_samples <- dplyr::group_by(samples, dplyr::across(tidyselect::all_of(use_vars)))
+  grouped_samples <- dplyr::group_by(samples, dplyr::across(dplyr::all_of(use_vars)))
 
   # Determine sizes of the subgroups and store in list; name elements by levels of the involved grouping variables
   subgroup_sizes <- purrr::map(dplyr::group_size(grouped_samples), ~ best_group_sizes(.x, n_min, n_max, n_ideal, prefer_big_groups))
-  names(subgroup_sizes) <- dplyr::group_keys(grouped_samples) %>%
-    tidyr::unite(col = "keys", sep = "/") %>%
+  names(subgroup_sizes) <- dplyr::group_keys(grouped_samples) |>
+    tidyr::unite(col = "keys", sep = "/") |>
     dplyr::pull(1)
 
   assertthat::assert_that(!strict || (min(unlist(subgroup_sizes)) >= n_min && max(unlist(subgroup_sizes)) <= n_max),
@@ -294,7 +294,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation
 
   # Index vector to assign group allocation to sample dataframe in its original order.
   # Split up according to group structure to allow easy permutation on the subgroup level
-  sample_vec <- order(dplyr::group_indices(subgroup_object$Grouped_Samples)) %>%
+  sample_vec <- order(dplyr::group_indices(subgroup_object$Grouped_Samples)) |>
     split(f = group_vec)
 
   # Order of the allocation variable in the batch container may be arbitrary and not strictly sorted;
@@ -330,7 +330,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation
 
     # Create a random permutation within (!) all the groups
     # This means that only 'equivalent' items are swapped around and get assigned to new subgroups every time
-    rand_index <- purrr::map(sample_vec, my_sample) %>% unlist(use.names = F)
+    rand_index <- purrr::map(sample_vec, my_sample) |> unlist(use.names = F)
 
     # Check if keep_separate_vars constraints are fulfilled within the randomized subgroups
     # Otherwise, create new permutations and increment number of tolerated violations every 1000 unsuccessful attempts
@@ -350,7 +350,7 @@ shuffle_with_subgroup_formation <- function(subgroup_object, subgroup_allocation
             "Increasing number of tolerated violations to ", sep_fails_tolerance
           )
         }
-        rand_index <- purrr::map(sample_vec, my_sample) %>% unlist(use.names = F)
+        rand_index <- purrr::map(sample_vec, my_sample) |> unlist(use.names = F)
       }
     }
 

diff --git a/R/plot.R b/R/plot.R
@@ -11,8 +11,8 @@
 plot_design <- function(.tbl, ..., .color, .alpha = NULL) {
   # generate vars
   vars <- rlang::enquos(...)
-  combinations <- .tbl %>%
-    dplyr::mutate(combinations = interaction(!!!vars, lex.order = T)) %>%
+  combinations <- .tbl |>
+    dplyr::mutate(combinations = interaction(!!!vars, lex.order = T)) |>
     dplyr::pull(combinations)
   g <- ggplot2::ggplot(.tbl) +
     ggplot2::aes(
@@ -22,9 +22,9 @@ plot_design <- function(.tbl, ..., .color, .alpha = NULL) {
     ggplot2::geom_histogram(stat = "count") +
     ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
   if (!rlang::quo_is_null(rlang::enquo(.alpha))) {
-    alpha_levels <- .tbl %>%
-      dplyr::pull({{ .alpha }}) %>%
-      unique() %>%
+    alpha_levels <- .tbl |>
+      dplyr::pull({{ .alpha }}) |>
+      unique() |>
       length()
     alpha_range <- c(1 / min(5, alpha_levels), 1)
     g <- g +
@@ -100,11 +100,11 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
   Pattern <- NULL
 
   if (add_excluded) {
-    assertthat::assert_that(checkmate::test_r6(.tbl, "BatchContainer"))
+    assertthat::assert_that(R6::is.R6(.tbl) && inherits(.tbl, "BatchContainer"))
     excluded <- .tbl$exclude
   }
 
-  if (checkmate::test_r6(.tbl, "BatchContainer")) {
+  if (R6::is.R6(.tbl) && inherits(.tbl, "BatchContainer")) {
     .tbl <- .tbl$get_samples()
   } else {
     assertthat::assert_that(is.data.frame(.tbl))
@@ -124,20 +124,20 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
       warning("Please install ggpattern to use patterns in the plot")
     } else {
       assertthat::assert_that(assertthat::has_name(.tbl, rlang::as_name(rlang::enquo(.pattern))))
-      .tbl <- .tbl %>%
-        dplyr::mutate(Pattern = forcats::as_factor({{ .pattern }}))
+      .tbl <- .tbl |>
+        dplyr::mutate(Pattern = as.factor({{ .pattern }}))
     }
   }
   # If there is no plate,
   if (rlang::quo_is_null(rlang::enquo(plate)) ||
     !assertthat::has_name(.tbl, rlang::as_name(rlang::enquo(plate)))) {
     # check if row + column is unique
     assertthat::assert_that(
-      (.tbl %>% dplyr::count({{ column }}, {{ row }}) %>% nrow()) == nrow(.tbl),
+      (.tbl |> dplyr::count({{ column }}, {{ row }}) |> nrow()) == nrow(.tbl),
       msg = "Non-unique row + column combination found. Please provide a plate variable."
     )
     # make a fake plate variable
-    .tbl <- .tbl %>%
+    .tbl <- .tbl |>
       dplyr::mutate(plate = 1)
     plate <- rlang::sym("plate")
   }
@@ -151,11 +151,11 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
     .tbl <- dplyr::bind_rows(.tbl, excluded)
   }
 
-  .tbl <- .tbl %>%
+  .tbl <- .tbl |>
     dplyr::mutate(
-      plate = forcats::as_factor({{ plate }}),
-      column = forcats::as_factor({{ column }}),
-      row = forcats::as_factor({{ row }})
+      plate = as.factor({{ plate }}),
+      column = as.factor({{ column }}),
+      row = as.factor({{ row }})
     )
 
   # make plot
@@ -169,7 +169,7 @@ plot_plate <- function(.tbl, plate = plate, row = row, column = column,
   # scale alpha
   .alpha <- rlang::enquo(.alpha)
   if (!rlang::quo_is_null(.alpha)) {
-    alpha_var <- .tbl %>%
+    alpha_var <- .tbl |>
       dplyr::pull(!!.alpha)
     alpha_levels <- unique(alpha_var)
     if (is.numeric(alpha_var) && length(alpha_levels > 7)) {