Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mutate(.keep = "none") in transmute() #483

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

* `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464)

* `transmute()` preserves row count and avoids unnecessary copies (#470)

# dtplyr 1.3.1

* Fix for failing R CMD check.
Expand Down
43 changes: 6 additions & 37 deletions R/step-subset-transmute.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,10 @@
#' dt <- lazy_dt(dplyr::starwars)
#' dt %>% transmute(name, sh = paste0(species, "/", homeworld))
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_new_vars(.data, ...)
dots_list <- process_new_vars(.data, dots)
dots <- dots_list$dots

groups <- group_vars(.data)
if (!is_empty(groups)) {
# TODO could check if there is actually anything mutated, e.g. to avoid
# DT[, .(x = x)]
is_group_var <- names(dots) %in% groups
group_dots <- dots[is_group_var]

.data <- mutate(ungroup(.data), !!!group_dots)
.data <- group_by(.data, !!!syms(groups))

dots <- dots[!is_group_var]
}

if (is_empty(dots)) {
# grouping variables have been removed from `dots` so `select()` would
# produce a message "Adding grouping vars".
# As `dplyr::transmute()` doesn't generate a message when adding group vars
# we can also leave it away here
return(select(.data, !!!group_vars(.data)))
}

if (!dots_list$use_braces) {
j <- call2(".", !!!dots)
} else {
j <- mutate_with_braces(dots)$expr
}
vars <- union(group_vars(.data), names(dots))
out <- step_subset_j(.data, vars = vars, j = j)
if (dots_list$need_removal_step) {
out <- select(out, -tidyselect::all_of(dots_list$vars_removed))
}

out
out <- mutate(.data, ..., .keep = "none")
cols_expr <- names(capture_new_vars(.data, ...))
cols_group <- group_vars(.data)
cols_group <- setdiff(cols_group, cols_expr)
cols_retain <- c(cols_group, cols_expr)
select(out, all_of(cols_retain))
Copy link
Collaborator

@eutwt eutwt Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some columns could have been removed in mutate/transmute with colnm = NULL, e.g.

tibble(x = 1, y = 2) %>% 
  lazy_dt() %>% 
  transmute(x, z = 3, z = NULL)

So, maybe intersect() with the colnames of the mutate before select?

cols_retain <- intersect(c(cols_group, cols_expr), out$vars)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or, could use any_of()

}
7 changes: 7 additions & 0 deletions tests/testthat/_snaps/step-call.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
Output
setnames(copy(DT), c("a", "b", "c"), toupper)

# can compute distinct computed variables

Code
dt %>% distinct(z = x + y) %>% show_query()
Output
unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)])

# errors are raised

Code
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-step-call.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", {
test_that("can compute distinct computed variables", {
dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt")

expect_equal(
dt %>% distinct(z = x + y) %>% show_query(),
expr(unique(dt[, .(z = x + y)]))
expect_snapshot(
dt %>% distinct(z = x + y) %>% show_query()
)

expect_equal(
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ test_that("generates single calls as expect", {
dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)

expect_equal(
dt %>% transmute(x2 = x * 2) %>% show_query(),
expr(DT[, .(x2 = x * 2)])
)
})

test_that("mutate generates compound expression if needed", {
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-subset-summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", {
dt %>% summarise(x = mean(x)) %>% show_query(),
expr(DT[, .(x = mean(x))])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
)
})

test_that("can use with across", {
Expand Down
247 changes: 12 additions & 235 deletions tests/testthat/test-step-subset-transmute.R
Original file line number Diff line number Diff line change
@@ -1,249 +1,26 @@
test_that("simple calls generate expected translations", {
test_that("works", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
dt %>% transmute(x) %>% collect(),
dt %>% mutate(x, .keep = "none") %>% collect()
)
})

test_that("transmute generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
test_that("empty dots preserves groups", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>%
group_by(y)

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(DT[, {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})

test_that("allows multiple assignment to the same variable", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when nested
expect_equal(
dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(),
expr(DT[, {
x <- x * 2
x <- x * 2
.(x)
}])
)

# when not nested
expect_equal(
dt %>% transmute(z = 2, y = 3) %>% show_query(),
expr(DT[, .(z = 2, y = 3)])
)
})


test_that("groups are respected", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% show_query(),
expr(DT[, .(y = 2), keyby = .(x)])
)
})

test_that("grouping vars can be transmuted", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(dt$groups, "x")
expect_equal(
dt %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)])
)

skip("transmuting grouping vars with nesting is not supported")
dt <- lazy_dt(data.table(x = 1), "DT") %>%
group_by(x) %>%
transmute(x = x + 1, y = x + 1, x = y + 1)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% collect(),
tibble(x = 4, y = 3) %>% group_by(x)
)
})

test_that("empty transmute works", {
dt <- lazy_dt(data.frame(x = 1), "DT")
expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L]))
expect_equal(transmute(dt)$vars, character())
expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L]))

dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x)
expect_equal(transmute(dt_grouped)$vars, "x")
})

test_that("only transmuting groups works", {
dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x)
expect_equal(transmute(dt, x) %>% collect(), dt %>% collect())
expect_equal(transmute(dt, x)$vars, "x")
})

test_that("across() can access previously created variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, across(y, sqrt))
expect_equal(
collect(step),
tibble(y = sqrt(2))
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
y <- sqrt(y)
.(y)
}])
)
})

test_that("new columns take precedence over global variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
y <- 'global var'
step <- transmute(dt, y = 2, z = y + 1)
expect_equal(
collect(step),
tibble(y = 2, z = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y + 1
.(y, z)
}])
)
})

# var = NULL -------------------------------------------------------------

test_that("var = NULL when var is in original data", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}][, `:=`("x", NULL)])
)
})

test_that("var = NULL when var is in final output", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = NULL, y = 3)
expect_equal(
collect(step),
tibble(y = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- NULL
y <- 3
.(y)
}])
)
})

test_that("temp var with nested arguments", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}][, `:=`("y", NULL)])
)
})
res <- dt %>% transmute() %>% collect()

test_that("temp var with no new vars added", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, y = NULL)
expect_equal(
collect(step),
tibble()
)
expect_equal(
step$vars,
character()
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
.(y)
}][, `:=`("y", NULL)])
)
expect_equal(names(res), "y")
})

test_that("var = NULL works when data is grouped", {
dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g)
test_that("preserves column order", {
dt <- lazy_dt(data.table(x = 1, y = 1), "DT")

# when var is in original data
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}, keyby = .(g)][, `:=`("x", NULL)])
)
res <- dt %>% transmute(y, x) %>% collect()

# when var is not in original data
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}, keyby = .(g)][, `:=`("y", NULL)])
)
expect_equal(names(res), c("y", "x"))
})

Loading