From ae5be2b8b44a81c0971a3b6ec00f304606f2ad65 Mon Sep 17 00:00:00 2001 From: sumny Date: Fri, 4 Dec 2020 17:36:09 +0100 Subject: [PATCH 1/5] init design --- R/Graph.R | 50 +++++++++++++++++++++++++++ tests/testthat/test_Graph.R | 69 +++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/R/Graph.R b/R/Graph.R index f97b7b318..455373d1d 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -98,6 +98,7 @@ #' * `update_ids(prefix = "", postfix = "")` \cr #' (`character`, `character`) -> `self` \cr #' Pre- or postfix [`PipeOp`]'s existing ids. Both `prefix` and `postfix` default to `""`, i.e. no changes. +#' * FIXME: replace_subgraph #' * `train(input, single_input = TRUE)` \cr #' (`any`, `logical(1)`) -> named `list`\cr #' Train [`Graph`] by traversing the [`Graph`]s' edges and calling all the [`PipeOp`]'s `$train` methods in turn. @@ -246,6 +247,55 @@ Graph = R6Class("Graph", invisible(self) }, + replace_subgraph = function(ids, substitute) { + assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE) + substitute = as_graph(substitute, clone = TRUE) + substitute_ids = substitute$ids(TRUE) + if (length(substitute_ids) != length(ids)) { + stopf("Number of ids to replace differs from the number of PipeOps in the substitute") + } + if (length(substitute_ids) > 1L) { + topo_order = match(ids, self$ids(TRUE)) + if (!identical(topo_order, sort(topo_order, decreasing = FALSE))) { + stopf("ids must be topologically sorted.") + } + } + + old_pipeops = self$pipeops + old_edges = self$edges + + # if we fail, pipeops and edges get reset + on.exit({ + self$pipeops = old_pipeops + self$edges = old_edges + }) + + # drop the old pipeops + self$pipeops[ids] = NULL + # FIXME: do we also need to drop the respective params or is this handled automatically? + + # add all substitute pipeops + substitute_ids = substitute$ids(TRUE) + for (i in seq_along(substitute_ids)) { + self$add_pipeop(substitute$pipeops[[substitute_ids[i]]]) + } + + # add all edges connecting to or from the substitute pipeops + edges_to_add_ids = self$edges$src_id %in% ids | self$edges$dst_id %in% ids + edges_to_add = self$edges[edges_to_add_ids, ] + for (i in seq_along(ids)) { + edges_to_add$src_id[ids[i] == edges_to_add$src_id] = substitute_ids[i] + edges_to_add$dst_id[ids[i] == edges_to_add$dst_id] = substitute_ids[i] + } + self$edges = self$edges[!edges_to_add_ids, ] # subset the original edges to the ones that are not to be replaced + for (j in seq_len(nrow(edges_to_add))) { + self$add_edge(edges_to_add[j, ][["src_id"]], dst_id = edges_to_add[j, ][["dst_id"]], src_channel = edges_to_add[j, ][["src_channel"]], dst_channel = edges_to_add[j, ][["dst_channel"]]) + } + + on.exit() + invisible(self) + }, + plot = function(html = FALSE) { assert_flag(html) if (!length(self$pipeops)) { diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index eb90d9d08..ecd5e1101 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -426,3 +426,72 @@ test_that("dot output", { "6 [label=\"OUTPUT", "nop_output\",fontsize=24]"), out[-c(1L, 15L)]) }) + +test_that("replace_subgraph", { + task = tsk("iris") + + # Basics + gr = Graph$new()$add_pipeop(PipeOpDebugMulti$new(2, 2)) + expect_error(gr$replace_subgraph("id_not_present", PipeOpDebugMulti$new(2, 2)), + regexp = "Assertion on 'ids' failed") + expect_error(gr$replace_subgraph("debug.multi", NULL), + regexp = "op can not be converted to PipeOp") + expect_error(gr$replace_subgraph("debug.multi", PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")), + regexp = "Number of ids to replace differs from the number of PipeOps in the substitute") + address_old = address(gr) + gr_old = gr$clone(deep = TRUE) + output = gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2)) + expect_identical(output, gr) + expect_true(address_old == address(gr)) # in place modification + expect_deep_clone(gr_old, gr) # replacing with exactly the same pipeop is the same as a deep clone + + gr = PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2") + gr_old = gr$clone(deep = TRUE) + expect_error(gr$replace_subgraph("dm2", substitute = PipeOpDebugMulti$new(1, 2)), + regexp = "One of the following must apply") + expect_equal(gr$pipeops, gr_old$pipeops) + expect_equal(gr$edges, gr_old$edges) + expect_deep_clone(gr_old, gr) # nothing changed due to reset + + # Linear Graph + gr = po("scale") %>>% po("pca") %>>% lrn("classif.rpart") + gr_new = gr$clone(deep = TRUE) + gr_new$replace_subgraph("scale", substitute = po("scalemaxabs")) + expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "pca", "classif.rpart"))) + expect_null(gr_new$train(task)[[1L]]) + expect_prediction_classif(gr_new$predict(task)[[1L]]) + + gr_new = gr$clone(deep = TRUE) + gr_new$replace_subgraph(c("scale", "pca"), substitute = po("scalemaxabs") %>>% po("ica")) + expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "ica", "classif.rpart"))) + expect_null(gr_new$train(task)[[1L]]) + expect_prediction_classif(gr_new$predict(task)[[1L]]) + + gr_new = gr$clone(deep = TRUE) + expect_error(gr_new$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica")), + regexp = "ids must be topologically sorted") + + # Non linear Graph + gr = po("scale") %>>% + po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch") %>>% + lrn("classif.rpart") + gr_new = gr$clone(deep = TRUE) + gr_new$replace_subgraph("ica", substitute = po("pca", id = "pca2")) + expect_true(all(gr_new$ids(TRUE) == c("scale", "branch", "pca", "pca2", "unbranch", "classif.rpart"))) + expect_null(gr_new$train(task)[[1L]]) + state1 = gr_new$state + gr_new$param_set$values$branch.selection = "ica" + expect_null(gr_new$train(task)[[1L]]) + state2 = gr_new$state + expect_true(test_r6(state1$pca2, classes = "NO_OP")) + expect_true(test_r6(state2$pca, classes = "NO_OP")) + expect_class(state1$pca, classes = "prcomp") + expect_class(state2$pca2, classes = "prcomp") + expect_prediction_classif(gr_new$predict(task)[[1L]]) + + # ppl with multiplicities + gr = po("scale") %>>% ppl("bagging", graph = lrn("classif.rpart"), averager = po("classifavg", collect_multiplicity = TRUE)) + gr_new = gr$clone(deep = TRUE) + # FIXME: + gr_new$replace_subgraph("classif.rpart", substitute = lrn("classif.rpart")) +}) From f15df98f3a352616267db2032847afac3c092d78 Mon Sep 17 00:00:00 2001 From: sumny Date: Mon, 1 Feb 2021 00:12:57 +0100 Subject: [PATCH 2/5] reiterate --- R/Graph.R | 130 +++++++++++++++++++++++++++--------- tests/testthat/test_Graph.R | 111 +++++++++++++++--------------- 2 files changed, 157 insertions(+), 84 deletions(-) diff --git a/R/Graph.R b/R/Graph.R index 455373d1d..cb1520a78 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -70,6 +70,10 @@ #' will not be connected within the [`Graph`] at first.\cr #' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also #' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`]. +#' * `remove_pipeop(id)` \cr +#' (`character(1)`) -> `self` \cr +#' Mutates [`Graph`] by removing the [`PipeOp`] with the matching id from the [`Graph`]. +#' Corresponding edges are also removed as well as the corresponding [`ParamSet`][paradox::ParamSet]. #' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr #' (`character(1)`, `character(1)`, #' `character(1)` | `numeric(1)` | `NULL`, @@ -79,6 +83,9 @@ #' channel `dst_channel` (identified by its name or number as listed in the [`PipeOp`]'s `$input`). #' If source or destination [`PipeOp`] have only one input / output channel and `src_channel` / `dst_channel` #' are therefore unambiguous, they can be omitted (i.e. left as `NULL`). +#' * `replace_subgraph(ids, substitute)` \cr +#' (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr +#' Replace a subgraph specified via ids with the substitute subgraph. #' * `plot(html)` \cr #' (`logical(1)`) -> `NULL` \cr #' Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or @@ -98,7 +105,6 @@ #' * `update_ids(prefix = "", postfix = "")` \cr #' (`character`, `character`) -> `self` \cr #' Pre- or postfix [`PipeOp`]'s existing ids. Both `prefix` and `postfix` default to `""`, i.e. no changes. -#' * FIXME: replace_subgraph #' * `train(input, single_input = TRUE)` \cr #' (`any`, `logical(1)`) -> named `list`\cr #' Train [`Graph`] by traversing the [`Graph`]s' edges and calling all the [`PipeOp`]'s `$train` methods in turn. @@ -247,52 +253,112 @@ Graph = R6Class("Graph", invisible(self) }, - replace_subgraph = function(ids, substitute) { - assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE) - substitute = as_graph(substitute, clone = TRUE) - substitute_ids = substitute$ids(TRUE) - if (length(substitute_ids) != length(ids)) { - stopf("Number of ids to replace differs from the number of PipeOps in the substitute") - } - if (length(substitute_ids) > 1L) { - topo_order = match(ids, self$ids(TRUE)) - if (!identical(topo_order, sort(topo_order, decreasing = FALSE))) { - stopf("ids must be topologically sorted.") - } + remove_pipeop = function(id) { + assert_subset(id, choices = self$ids(TRUE), empty.ok = FALSE) + self$pipeops[[id]] = NULL + self$edges = self$edges[src_id != id & dst_id != id] + + if (!is.null(private$.param_set)) { + # param_set is built on-demand; if it has not been requested before, its value may be NULL + # and we don't need to remove anything. + private$.param_set$remove_sets(id) } + invisible(self) + }, + replace_subgraph = function(ids, substitute) { + # if this fails, pipeops, edges and param_set get reset old_pipeops = self$pipeops old_edges = self$edges - - # if we fail, pipeops and edges get reset + old_ps = private$.param_set on.exit({ self$pipeops = old_pipeops self$edges = old_edges + private$.param_set = old_ps }) - # drop the old pipeops - self$pipeops[ids] = NULL - # FIXME: do we also need to drop the respective params or is this handled automatically? + assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE) + ids = self$ids(TRUE)[match(ids, self$ids(TRUE))] # always reorder ids topologically + substitute = as_graph(substitute, clone = TRUE) - # add all substitute pipeops - substitute_ids = substitute$ids(TRUE) - for (i in seq_along(substitute_ids)) { - self$add_pipeop(substitute$pipeops[[substitute_ids[i]]]) + # check whether the input of the substitute is a vararg channel + if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) { + stopf("Using a substitute with a vararg input channel is not supported (yet).") } - # add all edges connecting to or from the substitute pipeops - edges_to_add_ids = self$edges$src_id %in% ids | self$edges$dst_id %in% ids - edges_to_add = self$edges[edges_to_add_ids, ] - for (i in seq_along(ids)) { - edges_to_add$src_id[ids[i] == edges_to_add$src_id] = substitute_ids[i] - edges_to_add$dst_id[ids[i] == edges_to_add$dst_id] = substitute_ids[i] + # check whether the last id that is to be replaced connects to a varag channel + if (nrow(self$edges)) { # this can be a data table with zero rows + type = self$edges[src_id == range(ids)[2L], dst_channel] + if (length(type)) { # can be of length 0 if this is the end of the graph + if (strip_multiplicity_type(type) == "...") { + stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).") + } + } } - self$edges = self$edges[!edges_to_add_ids, ] # subset the original edges to the ones that are not to be replaced - for (j in seq_len(nrow(edges_to_add))) { - self$add_edge(edges_to_add[j, ][["src_id"]], dst_id = edges_to_add[j, ][["dst_id"]], src_channel = edges_to_add[j, ][["src_channel"]], dst_channel = edges_to_add[j, ][["dst_channel"]]) + + input_orig = self$input + output_orig = self$output + + for (id in ids) { + self$remove_pipeop(id) # also handles param_set } - on.exit() + input = self$input[name != input_orig$name] + output = self$output[name != output_orig$name] + + for (pipeop in substitute$pipeops) { + self$add_pipeop(pipeop) # also handles param_set + } + if (nrow(substitute$edges)) { + self$edges = rbind(self$edges, substitute$edges) + } + + # build edges from free output channels of substitute and free input channels of self + n_input = nrow(input) + if (n_input) { + if (nrow(substitute$output) != n_input) { + stopf("PipeOps to be connected have mismatching number of inputs / outputs.") + } + for (row in seq_len(n_input)) { + if (!are_types_compatible(strip_multiplicity_type(substitute$output$train[row]), strip_multiplicity_type(input$train[row]))) { + stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)", + substitute$output$op.id[row], substitute$output$train[row], input$op.id[row], input$train[row]) + } + if (!are_types_compatible(strip_multiplicity_type(substitute$output$predict[row]), strip_multiplicity_type(input$predict[row]))) { + stopf("Output type of PipeOp %s during prediction (%s) incompatible with input type of PipeOp %s (%s)", + substitute$output$op.id[row], substitute$output$predict[row], input$op.id[row], input$predict[row]) + } + } + new_edges = cbind(substitute$output[, list(src_id = get("op.id"), src_channel = get("channel.name"))], input[, list(dst_id = get("op.id"), dst_channel = get("channel.name"))]) + self$edges = rbind(self$edges, new_edges) + } + + # build edges from free output channels of self and free input channels of substitute + n_output = nrow(output) + if (n_output) { + if (n_output != nrow(substitute$input)) { + stopf("PipeOps to be connected have mismatching number of inputs / outputs.") + } + for (row in seq_len(n_output)) { + if (!are_types_compatible(strip_multiplicity_type(output$train[row]), strip_multiplicity_type(substitute$input$train[row]))) { + stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)", + output$op.id[row], output$train[row], substitute$input$op.id[row], substitute$input$train[row]) + } + if (!are_types_compatible(strip_multiplicity_type(output$predict[row]), strip_multiplicity_type(substitute$input$predict[row]))) { + stopf("Output type of PipeOp %s during prediction (%s) incompatible with input type of PipeOp %s (%s)", + output$op.id[row], output$predict[row], substitute$input$op.id[row], substitute$input$predict[row]) + } + } + new_edges = cbind(output[, list(src_id = get("op.id"), src_channel = get("channel.name"))], substitute$input[, list(dst_id = get("op.id"), dst_channel = get("channel.name"))]) + self$edges = rbind(self$edges, new_edges) + } + + # check if valid DAG + tryCatch(self$ids(TRUE), error = function(error_condition) { + stopf("Failed to infer new Graph structure. Resetting.") + }) + + on.exit({}) invisible(self) }, diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index ecd5e1101..ab6ba3877 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -4,9 +4,9 @@ test_that("linear graph", { g = Graph$new() expect_equal(g$ids(sorted = TRUE), character(0)) - # FIXME: we should "dummy" ops, so we can change properties of the ops at will + # FIXME: we should use "dummy" ops, so we can change properties of the ops at will # we should NOT use PipeOpNOP, because we want to check that $train/$predict actually does something. - # FIXME: we should packages of the graph + # FIXME: we should check packages of the graph op_ds = PipeOpSubsample$new() op_pca = PipeOpPCA$new() op_lrn = PipeOpLearner$new(mlr_learners$get("classif.rpart")) @@ -427,71 +427,78 @@ test_that("dot output", { "nop_output\",fontsize=24]"), out[-c(1L, 15L)]) }) + + test_that("replace_subgraph", { task = tsk("iris") # Basics gr = Graph$new()$add_pipeop(PipeOpDebugMulti$new(2, 2)) + address_old = address(gr) + gr_old = gr$clone(deep = TRUE) expect_error(gr$replace_subgraph("id_not_present", PipeOpDebugMulti$new(2, 2)), regexp = "Assertion on 'ids' failed") expect_error(gr$replace_subgraph("debug.multi", NULL), regexp = "op can not be converted to PipeOp") - expect_error(gr$replace_subgraph("debug.multi", PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")), - regexp = "Number of ids to replace differs from the number of PipeOps in the substitute") - address_old = address(gr) - gr_old = gr$clone(deep = TRUE) - output = gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2)) - expect_identical(output, gr) + expect_equal(gr, gr_old) # error results in a clean reset + expect_true(address_old == address(gr)) + expect_deep_clone(gr_old, gr) + + gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2)) + expect_equal(gr_old, gr) expect_true(address_old == address(gr)) # in place modification expect_deep_clone(gr_old, gr) # replacing with exactly the same pipeop is the same as a deep clone - gr = PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2") - gr_old = gr$clone(deep = TRUE) - expect_error(gr$replace_subgraph("dm2", substitute = PipeOpDebugMulti$new(1, 2)), - regexp = "One of the following must apply") - expect_equal(gr$pipeops, gr_old$pipeops) - expect_equal(gr$edges, gr_old$edges) - expect_deep_clone(gr_old, gr) # nothing changed due to reset - # Linear Graph gr = po("scale") %>>% po("pca") %>>% lrn("classif.rpart") - gr_new = gr$clone(deep = TRUE) - gr_new$replace_subgraph("scale", substitute = po("scalemaxabs")) - expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "pca", "classif.rpart"))) - expect_null(gr_new$train(task)[[1L]]) - expect_prediction_classif(gr_new$predict(task)[[1L]]) - - gr_new = gr$clone(deep = TRUE) - gr_new$replace_subgraph(c("scale", "pca"), substitute = po("scalemaxabs") %>>% po("ica")) - expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "ica", "classif.rpart"))) - expect_null(gr_new$train(task)[[1L]]) - expect_prediction_classif(gr_new$predict(task)[[1L]]) - - gr_new = gr$clone(deep = TRUE) - expect_error(gr_new$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica")), - regexp = "ids must be topologically sorted") + gr_old = gr$clone(deep = TRUE) + gr$replace_subgraph("scale", substitute = po("scalemaxabs")) # replace beginning + expect_set_equal(gr$ids(), c("scalemaxabs", "pca", "classif.rpart")) + expect_true(gr$input$op.id == "scalemaxabs") + expect_true(gr$output$op.id == "classif.rpart") + expect_null(gr$train(task)[[1L]]) + expect_prediction_classif(gr$predict(task)[[1L]]) + + gr = gr_old$clone(deep = TRUE) + gr$replace_subgraph("classif.rpart", substitute = lrn("classif.featureless")) # replace end + expect_set_equal(gr$ids(), c("scale", "pca", "classif.featureless")) + expect_true(gr$input$op.id == "scale") + expect_true(gr$output$op.id == "classif.featureless") + expect_null(gr$train(task)[[1L]]) + expect_prediction_classif(gr$predict(task)[[1L]]) + + gr = gr_old$clone(deep = TRUE) + gr$replace_subgraph(c("scale", "pca", "classif.rpart"), substitute = po("scalemaxabs") %>>% po("ica") %>>% lrn("classif.featureless")) # replace whole graph + expect_set_equal(gr$ids(), c("scalemaxabs", "ica", "classif.featureless")) + expect_true(gr$input$op.id == "scalemaxabs") + expect_true(gr$output$op.id == "classif.featureless") + expect_null(gr$train(task)[[1L]]) + expect_prediction_classif(gr$predict(task)[[1L]]) + + gr = gr_old$clone(deep = TRUE) + gr$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica")) # replace linear subgraph + expect_set_equal(gr$ids(), c("scalemaxabs", "ica", "classif.rpart")) + expect_true(gr$input$op.id == "scalemaxabs") + expect_true(gr$output$op.id == "classif.rpart") + expect_null(gr$train(task)[[1L]]) + expect_prediction_classif(gr$predict(task)[[1L]]) # Non linear Graph - gr = po("scale") %>>% - po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch") %>>% - lrn("classif.rpart") - gr_new = gr$clone(deep = TRUE) - gr_new$replace_subgraph("ica", substitute = po("pca", id = "pca2")) - expect_true(all(gr_new$ids(TRUE) == c("scale", "branch", "pca", "pca2", "unbranch", "classif.rpart"))) - expect_null(gr_new$train(task)[[1L]]) - state1 = gr_new$state - gr_new$param_set$values$branch.selection = "ica" - expect_null(gr_new$train(task)[[1L]]) - state2 = gr_new$state - expect_true(test_r6(state1$pca2, classes = "NO_OP")) + gr = po("scale") %>>% po("branch", c("pca", "nop")) %>>% gunion(list(po("pca"), po("nop"))) %>>% po("unbranch") %>>% lrn("classif.rpart") + gr_old = gr$clone(deep = TRUE) + expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported") + expect_error(gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = lrn("classif.featureless")), + regexp = "Output type of PipeOp classif.featureless during training") + gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch")) + expect_set_equal(gr$ids(TRUE), c("scale", "branch", "pca", "ica", "unbranch", "classif.rpart")) + expect_true(gr$input$op.id == "scale") + expect_true(gr$output$op.id == "classif.rpart") + expect_null(gr$train(task)[[1L]]) + state1 = gr$state + gr$param_set$values$branch.selection = "ica" + expect_null(gr$train(task)[[1L]]) + state2 = gr$state + expect_true(test_r6(state1$ica, classes = "NO_OP")) expect_true(test_r6(state2$pca, classes = "NO_OP")) - expect_class(state1$pca, classes = "prcomp") - expect_class(state2$pca2, classes = "prcomp") - expect_prediction_classif(gr_new$predict(task)[[1L]]) - - # ppl with multiplicities - gr = po("scale") %>>% ppl("bagging", graph = lrn("classif.rpart"), averager = po("classifavg", collect_multiplicity = TRUE)) - gr_new = gr$clone(deep = TRUE) - # FIXME: - gr_new$replace_subgraph("classif.rpart", substitute = lrn("classif.rpart")) + expect_prediction_classif(gr$predict(task)[[1L]]) }) From 1829aaf605a1f4bf10a5969fe5e43cc78bf3071a Mon Sep 17 00:00:00 2001 From: sumny Date: Mon, 1 Feb 2021 00:16:09 +0100 Subject: [PATCH 3/5] rerun docs --- R/Graph.R | 2 +- man/Graph.Rd | 7 +++++++ man/mlr_learners_avg.Rd | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/Graph.R b/R/Graph.R index cb1520a78..d1b80e6bf 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -85,7 +85,7 @@ #' are therefore unambiguous, they can be omitted (i.e. left as `NULL`). #' * `replace_subgraph(ids, substitute)` \cr #' (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr -#' Replace a subgraph specified via ids with the substitute subgraph. +#' Mutates [`Graph`] by replace a subgraph specified via ids with the substitute subgraph. #' * `plot(html)` \cr #' (`logical(1)`) -> `NULL` \cr #' Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or diff --git a/man/Graph.Rd b/man/Graph.Rd index 599473314..68906377d 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -82,6 +82,10 @@ Mutates \code{\link{Graph}} by adding a \code{\link{PipeOp}} to the \code{\link{ will not be connected within the \code{\link{Graph}} at first.\cr Instead of supplying a \code{\link{PipeOp}} directly, an object that can naturally be converted to a \code{\link{PipeOp}} can also be supplied, e.g. a \code{\link[mlr3:Learner]{Learner}} or a \code{\link[mlr3filters:Filter]{Filter}}; see \code{\link[=as_pipeop]{as_pipeop()}}. +\item \code{remove_pipeop(id)} \cr +(\code{character(1)}) -> \code{self} \cr +Mutates \code{\link{Graph}} by removing the \code{\link{PipeOp}} with the matching id from the \code{\link{Graph}}. +Corresponding edges are also removed as well as the corresponding \code{\link[paradox:ParamSet]{ParamSet}}. \item \code{add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)} \cr (\code{character(1)}, \code{character(1)}, \code{character(1)} | \code{numeric(1)} | \code{NULL}, @@ -91,6 +95,9 @@ Add an edge from \code{\link{PipeOp}} \code{src_id}, and its channel \code{src_c channel \code{dst_channel} (identified by its name or number as listed in the \code{\link{PipeOp}}'s \verb{$input}). If source or destination \code{\link{PipeOp}} have only one input / output channel and \code{src_channel} / \code{dst_channel} are therefore unambiguous, they can be omitted (i.e. left as \code{NULL}). +\item \code{replace_subgraph(ids, substitute)} \cr +(\code{character()}, \code{\link{Graph}} | \code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr +Mutates \code{\link{Graph}} by replace a subgraph specified via ids with the substitute subgraph. \item \code{plot(html)} \cr (\code{logical(1)}) -> \code{NULL} \cr Plot the \code{\link{Graph}}, using either the \pkg{igraph} package (for \code{html = FALSE}, default) or diff --git a/man/mlr_learners_avg.Rd b/man/mlr_learners_avg.Rd index be07bbe04..a8553c79e 100644 --- a/man/mlr_learners_avg.Rd +++ b/man/mlr_learners_avg.Rd @@ -41,7 +41,7 @@ and \code{"regr.mse"}, i.e. mean squared error for regression. \item \code{optimizer} :: \code{\link[bbotk:Optimizer]{Optimizer}} | \code{character(1)}\cr \code{\link[bbotk:Optimizer]{Optimizer}} used to find optimal thresholds. If \code{character}, converts to \code{\link[bbotk:Optimizer]{Optimizer}} -via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:OptimizerNLoptr]{OptimizerNLoptr}}. +via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:mlr_optimizers_nloptr]{OptimizerNLoptr}}. Nloptr hyperparameters are initialized to \code{xtol_rel = 1e-8}, \code{algorithm = "NLOPT_LN_COBYLA"} and equal initial weights for each learner. For more fine-grained control, it is recommended to supply a instantiated \code{\link[bbotk:Optimizer]{Optimizer}}. From 93ca68c804576964ab162831d602bb9c111e38cf Mon Sep 17 00:00:00 2001 From: sumny Date: Mon, 1 Feb 2021 00:19:00 +0100 Subject: [PATCH 4/5] static code checks --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 885b08e68..42012754e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -36,6 +36,6 @@ register_mlr3 = function() { } # nocov end # static code checks should not complain about commonly used data.table columns -utils::globalVariables(c("src_id", "dst_id", "name", "op.id", "response", "truth")) +utils::globalVariables(c("src_id", "dst_id", "src_channel", "dst_channel", "name", "op.id", "response", "truth")) leanify_package() From 2903dcc89194586b34ebf5adcf70e28e78803502 Mon Sep 17 00:00:00 2001 From: sumny Date: Thu, 4 Feb 2021 23:11:29 +0100 Subject: [PATCH 5/5] vararg inputs --- R/Graph.R | 43 +++++++++++++++++++------------------ man/Graph.Rd | 5 +++-- tests/testthat/test_Graph.R | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/R/Graph.R b/R/Graph.R index d1b80e6bf..83bd21043 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -26,7 +26,7 @@ #' @section Fields: #' * `pipeops` :: named `list` of [`PipeOp`] \cr #' Contains all [`PipeOp`]s in the [`Graph`], named by the [`PipeOp`]'s `$id`s. -#' * `edges` :: [`data.table`] with columns `src_id` (`character`), `src_channel` (`character`), `dst_id` (`character`), `dst_channel` (`character`)\cr +#' * `edges` :: [`data.table`] with columns `src_id` (`character`), `src_channel` (`character`), `dst_id` (`character`), `dst_channel` (`character`)\cr #' Table of connections between the [`PipeOp`]s. A [`data.table`]. `src_id` and `dst_id` are `$id`s of [`PipeOp`]s that must be present in #' the `$pipeops` list. `src_channel` and `dst_channel` must respectively be `$output` and `$input` channel names of the #' respective [`PipeOp`]s. @@ -85,7 +85,8 @@ #' are therefore unambiguous, they can be omitted (i.e. left as `NULL`). #' * `replace_subgraph(ids, substitute)` \cr #' (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr -#' Mutates [`Graph`] by replace a subgraph specified via ids with the substitute subgraph. +#' Mutates [`Graph`] by replacing a subgraph specified via ids with the supplied substitute subgraph. +#' Note that the supplied ids are always reordered in topological order with respect to the [`Graph`]. #' * `plot(html)` \cr #' (`logical(1)`) -> `NULL` \cr #' Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or @@ -281,20 +282,23 @@ Graph = R6Class("Graph", ids = self$ids(TRUE)[match(ids, self$ids(TRUE))] # always reorder ids topologically substitute = as_graph(substitute, clone = TRUE) + # FIXME: check that ids are actually a valid subgraph of graph + + # FIXME: # check whether the input of the substitute is a vararg channel - if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) { - stopf("Using a substitute with a vararg input channel is not supported (yet).") - } + #if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) { + # stopf("Using a substitute with a vararg input channel is not supported (yet).") + #} # check whether the last id that is to be replaced connects to a varag channel - if (nrow(self$edges)) { # this can be a data table with zero rows - type = self$edges[src_id == range(ids)[2L], dst_channel] - if (length(type)) { # can be of length 0 if this is the end of the graph - if (strip_multiplicity_type(type) == "...") { - stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).") - } - } - } + #if (nrow(self$edges)) { # this can be a data table with zero rows + # type = self$edges[src_id == range(ids)[2L], dst_channel] + # if (length(type)) { # can be of length 0 if this is the end of the graph + # if (strip_multiplicity_type(type) == "...") { + # stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).") + # } + # } + #} input_orig = self$input output_orig = self$output @@ -313,12 +317,11 @@ Graph = R6Class("Graph", self$edges = rbind(self$edges, substitute$edges) } + # FIXME: this reuses a lot of `%>>%`, we could write a general helper # build edges from free output channels of substitute and free input channels of self n_input = nrow(input) if (n_input) { - if (nrow(substitute$output) != n_input) { - stopf("PipeOps to be connected have mismatching number of inputs / outputs.") - } + # FIXME: check number of inputs / outputs for (row in seq_len(n_input)) { if (!are_types_compatible(strip_multiplicity_type(substitute$output$train[row]), strip_multiplicity_type(input$train[row]))) { stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)", @@ -336,9 +339,7 @@ Graph = R6Class("Graph", # build edges from free output channels of self and free input channels of substitute n_output = nrow(output) if (n_output) { - if (n_output != nrow(substitute$input)) { - stopf("PipeOps to be connected have mismatching number of inputs / outputs.") - } + # FIXME: check number of inputs / outputs for (row in seq_len(n_output)) { if (!are_types_compatible(strip_multiplicity_type(output$train[row]), strip_multiplicity_type(substitute$input$train[row]))) { stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)", @@ -354,9 +355,9 @@ Graph = R6Class("Graph", } # check if valid DAG - tryCatch(self$ids(TRUE), error = function(error_condition) { + invisible(tryCatch(self$ids(TRUE), error = function(error_condition) { stopf("Failed to infer new Graph structure. Resetting.") - }) + })) on.exit({}) invisible(self) diff --git a/man/Graph.Rd b/man/Graph.Rd index 68906377d..c79987a84 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -34,7 +34,7 @@ the \code{\link{PipeOp}} results along the edges as input to other \code{\link{P \itemize{ \item \code{pipeops} :: named \code{list} of \code{\link{PipeOp}} \cr Contains all \code{\link{PipeOp}}s in the \code{\link{Graph}}, named by the \code{\link{PipeOp}}'s \verb{$id}s. -\item \code{edges} :: \code{\link{data.table}} with columns \code{src_id} (\code{character}), \code{src_channel} (\code{character}), \code{dst_id} (\code{character}), \code{dst_channel} (\code{character})\cr +\item \code{edges} :: \code{\link{data.table}} with columns \code{src_id} (\code{character}), \code{src_channel} (\code{character}), \code{dst_id} (\code{character}), \code{dst_channel} (\code{character})\cr Table of connections between the \code{\link{PipeOp}}s. A \code{\link{data.table}}. \code{src_id} and \code{dst_id} are \verb{$id}s of \code{\link{PipeOp}}s that must be present in the \verb{$pipeops} list. \code{src_channel} and \code{dst_channel} must respectively be \verb{$output} and \verb{$input} channel names of the respective \code{\link{PipeOp}}s. @@ -97,7 +97,8 @@ If source or destination \code{\link{PipeOp}} have only one input / output chann are therefore unambiguous, they can be omitted (i.e. left as \code{NULL}). \item \code{replace_subgraph(ids, substitute)} \cr (\code{character()}, \code{\link{Graph}} | \code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr -Mutates \code{\link{Graph}} by replace a subgraph specified via ids with the substitute subgraph. +Mutates \code{\link{Graph}} by replacing a subgraph specified via ids with the supplied substitute subgraph. +Note that the supplied ids are always reordered in topological order with respect to the \code{\link{Graph}}. \item \code{plot(html)} \cr (\code{logical(1)}) -> \code{NULL} \cr Plot the \code{\link{Graph}}, using either the \pkg{igraph} package (for \code{html = FALSE}, default) or diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index ab6ba3877..fe4a9dac4 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -486,7 +486,7 @@ test_that("replace_subgraph", { # Non linear Graph gr = po("scale") %>>% po("branch", c("pca", "nop")) %>>% gunion(list(po("pca"), po("nop"))) %>>% po("unbranch") %>>% lrn("classif.rpart") gr_old = gr$clone(deep = TRUE) - expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported") + #expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported") # FIXME: expect_error(gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = lrn("classif.featureless")), regexp = "Output type of PipeOp classif.featureless during training") gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch"))