From ae5be2b8b44a81c0971a3b6ec00f304606f2ad65 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Fri, 4 Dec 2020 17:36:09 +0100
Subject: [PATCH 1/5] init design

---
 R/Graph.R                   | 50 +++++++++++++++++++++++++++
 tests/testthat/test_Graph.R | 69 +++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

diff --git a/R/Graph.R b/R/Graph.R
index f97b7b318..455373d1d 100644
--- a/R/Graph.R
+++ b/R/Graph.R
@@ -98,6 +98,7 @@
 #' * `update_ids(prefix = "", postfix = "")` \cr
 #'   (`character`, `character`) -> `self` \cr
 #'   Pre- or postfix [`PipeOp`]'s existing ids. Both `prefix` and `postfix` default to `""`, i.e. no changes.
+#' * FIXME: replace_subgraph
 #' * `train(input, single_input = TRUE)` \cr
 #'   (`any`, `logical(1)`) -> named `list`\cr
 #'   Train [`Graph`] by traversing the [`Graph`]s' edges and calling all the [`PipeOp`]'s `$train` methods in turn.
@@ -246,6 +247,55 @@ Graph = R6Class("Graph",
       invisible(self)
     },
 
+    replace_subgraph = function(ids, substitute) {
+      assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE)
+      substitute = as_graph(substitute, clone = TRUE)
+      substitute_ids = substitute$ids(TRUE)
+      if (length(substitute_ids) != length(ids)) {
+        stopf("Number of ids to replace differs from the number of PipeOps in the substitute")
+      }
+      if (length(substitute_ids) > 1L) {
+        topo_order = match(ids, self$ids(TRUE))
+        if (!identical(topo_order, sort(topo_order, decreasing =  FALSE))) {
+          stopf("ids must be topologically sorted.")
+        }
+      }
+
+      old_pipeops = self$pipeops
+      old_edges = self$edges
+
+      # if we fail, pipeops and edges get reset
+      on.exit({
+        self$pipeops = old_pipeops
+        self$edges = old_edges
+      })
+
+      # drop the old pipeops
+      self$pipeops[ids] = NULL
+      # FIXME: do we also need to drop the respective params or is this handled automatically?
+
+      # add all substitute pipeops
+      substitute_ids = substitute$ids(TRUE)
+      for (i in seq_along(substitute_ids)) {
+        self$add_pipeop(substitute$pipeops[[substitute_ids[i]]])
+      }
+
+      # add all edges connecting to or from the substitute pipeops
+      edges_to_add_ids = self$edges$src_id %in% ids | self$edges$dst_id %in% ids
+      edges_to_add = self$edges[edges_to_add_ids, ]
+      for (i in seq_along(ids)) {
+        edges_to_add$src_id[ids[i] == edges_to_add$src_id] = substitute_ids[i]
+        edges_to_add$dst_id[ids[i] == edges_to_add$dst_id] = substitute_ids[i]
+      }
+      self$edges = self$edges[!edges_to_add_ids, ]  # subset the original edges to the ones that are not to be replaced
+      for (j in seq_len(nrow(edges_to_add))) {
+        self$add_edge(edges_to_add[j, ][["src_id"]], dst_id = edges_to_add[j, ][["dst_id"]], src_channel = edges_to_add[j, ][["src_channel"]], dst_channel = edges_to_add[j, ][["dst_channel"]])
+      }
+
+      on.exit()
+      invisible(self)
+    },
+
     plot = function(html = FALSE) {
       assert_flag(html)
       if (!length(self$pipeops)) {
diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R
index eb90d9d08..ecd5e1101 100644
--- a/tests/testthat/test_Graph.R
+++ b/tests/testthat/test_Graph.R
@@ -426,3 +426,72 @@ test_that("dot output", {
     "6 [label=\"OUTPUT",
     "nop_output\",fontsize=24]"), out[-c(1L, 15L)])
 })
+
+test_that("replace_subgraph", {
+  task = tsk("iris")
+
+  # Basics
+  gr = Graph$new()$add_pipeop(PipeOpDebugMulti$new(2, 2))
+  expect_error(gr$replace_subgraph("id_not_present", PipeOpDebugMulti$new(2, 2)),
+    regexp = "Assertion on 'ids' failed")
+  expect_error(gr$replace_subgraph("debug.multi", NULL),
+    regexp = "op can not be converted to PipeOp")
+  expect_error(gr$replace_subgraph("debug.multi", PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")),
+    regexp = "Number of ids to replace differs from the number of PipeOps in the substitute")
+  address_old = address(gr)
+  gr_old = gr$clone(deep = TRUE)
+  output = gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2))
+  expect_identical(output, gr)
+  expect_true(address_old == address(gr))  # in place modification
+  expect_deep_clone(gr_old, gr)  # replacing with exactly the same pipeop is the same as a deep clone
+
+  gr = PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")
+  gr_old = gr$clone(deep = TRUE)
+  expect_error(gr$replace_subgraph("dm2", substitute = PipeOpDebugMulti$new(1, 2)),
+    regexp = "One of the following must apply")
+  expect_equal(gr$pipeops, gr_old$pipeops)
+  expect_equal(gr$edges, gr_old$edges)
+  expect_deep_clone(gr_old, gr)  # nothing changed due to reset
+
+  # Linear Graph
+  gr = po("scale") %>>% po("pca") %>>% lrn("classif.rpart")
+  gr_new = gr$clone(deep = TRUE)
+  gr_new$replace_subgraph("scale", substitute = po("scalemaxabs"))
+  expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "pca", "classif.rpart")))
+  expect_null(gr_new$train(task)[[1L]])
+  expect_prediction_classif(gr_new$predict(task)[[1L]])
+
+  gr_new = gr$clone(deep = TRUE)
+  gr_new$replace_subgraph(c("scale", "pca"), substitute = po("scalemaxabs") %>>% po("ica"))
+  expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "ica", "classif.rpart")))
+  expect_null(gr_new$train(task)[[1L]])
+  expect_prediction_classif(gr_new$predict(task)[[1L]])
+
+  gr_new = gr$clone(deep = TRUE)
+  expect_error(gr_new$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica")),
+    regexp = "ids must be topologically sorted")
+
+  # Non linear Graph
+  gr = po("scale") %>>%
+    po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch") %>>%
+    lrn("classif.rpart")
+  gr_new = gr$clone(deep = TRUE)
+  gr_new$replace_subgraph("ica", substitute = po("pca", id = "pca2"))
+  expect_true(all(gr_new$ids(TRUE) == c("scale", "branch", "pca", "pca2", "unbranch", "classif.rpart")))
+  expect_null(gr_new$train(task)[[1L]])
+  state1 = gr_new$state
+  gr_new$param_set$values$branch.selection = "ica"
+  expect_null(gr_new$train(task)[[1L]])
+  state2 = gr_new$state
+  expect_true(test_r6(state1$pca2, classes = "NO_OP"))
+  expect_true(test_r6(state2$pca, classes = "NO_OP"))
+  expect_class(state1$pca, classes = "prcomp")
+  expect_class(state2$pca2, classes = "prcomp")
+  expect_prediction_classif(gr_new$predict(task)[[1L]])
+
+  # ppl with multiplicities
+  gr = po("scale") %>>% ppl("bagging", graph = lrn("classif.rpart"), averager = po("classifavg", collect_multiplicity = TRUE))
+  gr_new = gr$clone(deep = TRUE)
+  # FIXME:
+  gr_new$replace_subgraph("classif.rpart", substitute = lrn("classif.rpart"))
+})

From f15df98f3a352616267db2032847afac3c092d78 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Mon, 1 Feb 2021 00:12:57 +0100
Subject: [PATCH 2/5] reiterate

---
 R/Graph.R                   | 130 +++++++++++++++++++++++++++---------
 tests/testthat/test_Graph.R | 111 +++++++++++++++---------------
 2 files changed, 157 insertions(+), 84 deletions(-)

diff --git a/R/Graph.R b/R/Graph.R
index 455373d1d..cb1520a78 100644
--- a/R/Graph.R
+++ b/R/Graph.R
@@ -70,6 +70,10 @@
 #'   will not be connected within the [`Graph`] at first.\cr
 #'   Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also
 #'   be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`].
+#' * `remove_pipeop(id)` \cr
+#'   (`character(1)`) -> `self` \cr
+#'   Mutates [`Graph`] by removing the [`PipeOp`] with the matching id from the [`Graph`].
+#'   Corresponding edges are also removed as well as the corresponding [`ParamSet`][paradox::ParamSet].
 #' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr
 #'   (`character(1)`, `character(1)`,
 #'   `character(1)` | `numeric(1)` | `NULL`,
@@ -79,6 +83,9 @@
 #'   channel `dst_channel` (identified by its name or number as listed in the [`PipeOp`]'s `$input`).
 #'   If source or destination [`PipeOp`] have only one input / output channel and `src_channel` / `dst_channel`
 #'   are therefore unambiguous, they can be omitted (i.e. left as `NULL`).
+#' * `replace_subgraph(ids, substitute)` \cr
+#'   (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr
+#'   Replace a subgraph specified via ids with the substitute subgraph.
 #' * `plot(html)` \cr
 #'   (`logical(1)`) -> `NULL` \cr
 #'   Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or
@@ -98,7 +105,6 @@
 #' * `update_ids(prefix = "", postfix = "")` \cr
 #'   (`character`, `character`) -> `self` \cr
 #'   Pre- or postfix [`PipeOp`]'s existing ids. Both `prefix` and `postfix` default to `""`, i.e. no changes.
-#' * FIXME: replace_subgraph
 #' * `train(input, single_input = TRUE)` \cr
 #'   (`any`, `logical(1)`) -> named `list`\cr
 #'   Train [`Graph`] by traversing the [`Graph`]s' edges and calling all the [`PipeOp`]'s `$train` methods in turn.
@@ -247,52 +253,112 @@ Graph = R6Class("Graph",
       invisible(self)
     },
 
-    replace_subgraph = function(ids, substitute) {
-      assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE)
-      substitute = as_graph(substitute, clone = TRUE)
-      substitute_ids = substitute$ids(TRUE)
-      if (length(substitute_ids) != length(ids)) {
-        stopf("Number of ids to replace differs from the number of PipeOps in the substitute")
-      }
-      if (length(substitute_ids) > 1L) {
-        topo_order = match(ids, self$ids(TRUE))
-        if (!identical(topo_order, sort(topo_order, decreasing =  FALSE))) {
-          stopf("ids must be topologically sorted.")
-        }
+    remove_pipeop = function(id) {
+      assert_subset(id, choices = self$ids(TRUE), empty.ok = FALSE)
+      self$pipeops[[id]] = NULL
+      self$edges = self$edges[src_id != id & dst_id != id]
+
+      if (!is.null(private$.param_set)) {
+        # param_set is built on-demand; if it has not been requested before, its value may be NULL
+        # and we don't need to remove anything.
+        private$.param_set$remove_sets(id)
       }
+      invisible(self)
+    },
 
+    replace_subgraph = function(ids, substitute) {
+      # if this fails, pipeops, edges and param_set get reset
       old_pipeops = self$pipeops
       old_edges = self$edges
-
-      # if we fail, pipeops and edges get reset
+      old_ps = private$.param_set
       on.exit({
         self$pipeops = old_pipeops
         self$edges = old_edges
+        private$.param_set = old_ps
       })
 
-      # drop the old pipeops
-      self$pipeops[ids] = NULL
-      # FIXME: do we also need to drop the respective params or is this handled automatically?
+      assert_subset(ids, choices = self$ids(TRUE), empty.ok = FALSE)
+      ids = self$ids(TRUE)[match(ids, self$ids(TRUE))]  # always reorder ids topologically
+      substitute = as_graph(substitute, clone = TRUE)
 
-      # add all substitute pipeops
-      substitute_ids = substitute$ids(TRUE)
-      for (i in seq_along(substitute_ids)) {
-        self$add_pipeop(substitute$pipeops[[substitute_ids[i]]])
+      # check whether the input of the substitute is a vararg channel
+      if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) {
+        stopf("Using a substitute with a vararg input channel is not supported (yet).")
       }
 
-      # add all edges connecting to or from the substitute pipeops
-      edges_to_add_ids = self$edges$src_id %in% ids | self$edges$dst_id %in% ids
-      edges_to_add = self$edges[edges_to_add_ids, ]
-      for (i in seq_along(ids)) {
-        edges_to_add$src_id[ids[i] == edges_to_add$src_id] = substitute_ids[i]
-        edges_to_add$dst_id[ids[i] == edges_to_add$dst_id] = substitute_ids[i]
+      # check whether the last id that is to be replaced connects to a varag channel
+      if (nrow(self$edges)) {  # this can be a data table with zero rows
+        type = self$edges[src_id == range(ids)[2L], dst_channel]
+        if (length(type)) {  # can be of length 0 if this is the end of the graph
+          if (strip_multiplicity_type(type) == "...") {
+            stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).")
+          }
+        }
       }
-      self$edges = self$edges[!edges_to_add_ids, ]  # subset the original edges to the ones that are not to be replaced
-      for (j in seq_len(nrow(edges_to_add))) {
-        self$add_edge(edges_to_add[j, ][["src_id"]], dst_id = edges_to_add[j, ][["dst_id"]], src_channel = edges_to_add[j, ][["src_channel"]], dst_channel = edges_to_add[j, ][["dst_channel"]])
+
+      input_orig = self$input
+      output_orig = self$output
+
+      for (id in ids) {
+        self$remove_pipeop(id)  # also handles param_set
       }
 
-      on.exit()
+      input = self$input[name != input_orig$name]
+      output = self$output[name != output_orig$name]
+
+      for (pipeop in substitute$pipeops) {
+        self$add_pipeop(pipeop)  # also handles param_set
+      }
+      if (nrow(substitute$edges)) {
+        self$edges = rbind(self$edges, substitute$edges)
+      }
+
+      # build edges from free output channels of substitute and free input channels of self
+      n_input = nrow(input)
+      if (n_input) {
+        if (nrow(substitute$output) != n_input) {
+          stopf("PipeOps to be connected have mismatching number of inputs / outputs.")
+        }
+        for (row in seq_len(n_input)) {
+          if (!are_types_compatible(strip_multiplicity_type(substitute$output$train[row]), strip_multiplicity_type(input$train[row]))) {
+            stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)",
+              substitute$output$op.id[row], substitute$output$train[row], input$op.id[row], input$train[row])
+          }
+          if (!are_types_compatible(strip_multiplicity_type(substitute$output$predict[row]), strip_multiplicity_type(input$predict[row]))) {
+            stopf("Output type of PipeOp %s during prediction (%s) incompatible with input type of PipeOp %s (%s)",
+              substitute$output$op.id[row], substitute$output$predict[row], input$op.id[row], input$predict[row])
+          }
+        }
+        new_edges = cbind(substitute$output[, list(src_id = get("op.id"), src_channel = get("channel.name"))], input[, list(dst_id = get("op.id"), dst_channel = get("channel.name"))])
+        self$edges = rbind(self$edges, new_edges)
+      }
+
+      # build edges from free output channels of self and free input channels of substitute
+      n_output = nrow(output)
+      if (n_output) {
+        if (n_output != nrow(substitute$input)) {
+          stopf("PipeOps to be connected have mismatching number of inputs / outputs.")
+        }
+        for (row in seq_len(n_output)) {
+          if (!are_types_compatible(strip_multiplicity_type(output$train[row]), strip_multiplicity_type(substitute$input$train[row]))) {
+            stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)",
+              output$op.id[row], output$train[row], substitute$input$op.id[row], substitute$input$train[row])
+          }
+          if (!are_types_compatible(strip_multiplicity_type(output$predict[row]), strip_multiplicity_type(substitute$input$predict[row]))) {
+            stopf("Output type of PipeOp %s during prediction (%s) incompatible with input type of PipeOp %s (%s)",
+              output$op.id[row], output$predict[row], substitute$input$op.id[row], substitute$input$predict[row])
+          }
+        }
+        new_edges = cbind(output[, list(src_id = get("op.id"), src_channel = get("channel.name"))], substitute$input[, list(dst_id = get("op.id"), dst_channel = get("channel.name"))])
+        self$edges = rbind(self$edges, new_edges)
+      }
+
+      # check if valid DAG
+      tryCatch(self$ids(TRUE), error = function(error_condition) {
+        stopf("Failed to infer new Graph structure. Resetting.")
+      })
+
+      on.exit({})
       invisible(self)
     },
 
diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R
index ecd5e1101..ab6ba3877 100644
--- a/tests/testthat/test_Graph.R
+++ b/tests/testthat/test_Graph.R
@@ -4,9 +4,9 @@ test_that("linear graph", {
   g = Graph$new()
   expect_equal(g$ids(sorted = TRUE), character(0))
 
-  # FIXME: we should  "dummy" ops, so we can change properties of the ops at will
+  # FIXME: we should use "dummy" ops, so we can change properties of the ops at will
   # we should NOT use PipeOpNOP, because we want to check that $train/$predict actually does something.
-  # FIXME: we should packages of the graph
+  # FIXME: we should check packages of the graph
   op_ds = PipeOpSubsample$new()
   op_pca = PipeOpPCA$new()
   op_lrn = PipeOpLearner$new(mlr_learners$get("classif.rpart"))
@@ -427,71 +427,78 @@ test_that("dot output", {
     "nop_output\",fontsize=24]"), out[-c(1L, 15L)])
 })
 
+
+
 test_that("replace_subgraph", {
   task = tsk("iris")
 
   # Basics
   gr = Graph$new()$add_pipeop(PipeOpDebugMulti$new(2, 2))
+  address_old = address(gr)
+  gr_old = gr$clone(deep = TRUE)
   expect_error(gr$replace_subgraph("id_not_present", PipeOpDebugMulti$new(2, 2)),
     regexp = "Assertion on 'ids' failed")
   expect_error(gr$replace_subgraph("debug.multi", NULL),
     regexp = "op can not be converted to PipeOp")
-  expect_error(gr$replace_subgraph("debug.multi", PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")),
-    regexp = "Number of ids to replace differs from the number of PipeOps in the substitute")
-  address_old = address(gr)
-  gr_old = gr$clone(deep = TRUE)
-  output = gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2))
-  expect_identical(output, gr)
+  expect_equal(gr, gr_old)  # error results in a clean reset
+  expect_true(address_old == address(gr))
+  expect_deep_clone(gr_old, gr)
+
+  gr$replace_subgraph("debug.multi", substitute = PipeOpDebugMulti$new(2, 2))
+  expect_equal(gr_old, gr)
   expect_true(address_old == address(gr))  # in place modification
   expect_deep_clone(gr_old, gr)  # replacing with exactly the same pipeop is the same as a deep clone
 
-  gr = PipeOpDebugMulti$new(2, 2, id = "dm1") %>>% PipeOpDebugMulti$new(2, 2, id = "dm2")
-  gr_old = gr$clone(deep = TRUE)
-  expect_error(gr$replace_subgraph("dm2", substitute = PipeOpDebugMulti$new(1, 2)),
-    regexp = "One of the following must apply")
-  expect_equal(gr$pipeops, gr_old$pipeops)
-  expect_equal(gr$edges, gr_old$edges)
-  expect_deep_clone(gr_old, gr)  # nothing changed due to reset
-
   # Linear Graph
   gr = po("scale") %>>% po("pca") %>>% lrn("classif.rpart")
-  gr_new = gr$clone(deep = TRUE)
-  gr_new$replace_subgraph("scale", substitute = po("scalemaxabs"))
-  expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "pca", "classif.rpart")))
-  expect_null(gr_new$train(task)[[1L]])
-  expect_prediction_classif(gr_new$predict(task)[[1L]])
-
-  gr_new = gr$clone(deep = TRUE)
-  gr_new$replace_subgraph(c("scale", "pca"), substitute = po("scalemaxabs") %>>% po("ica"))
-  expect_true(all(gr_new$ids(TRUE) == c("scalemaxabs", "ica", "classif.rpart")))
-  expect_null(gr_new$train(task)[[1L]])
-  expect_prediction_classif(gr_new$predict(task)[[1L]])
-
-  gr_new = gr$clone(deep = TRUE)
-  expect_error(gr_new$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica")),
-    regexp = "ids must be topologically sorted")
+  gr_old = gr$clone(deep = TRUE)
+  gr$replace_subgraph("scale", substitute = po("scalemaxabs"))  # replace beginning
+  expect_set_equal(gr$ids(), c("scalemaxabs", "pca", "classif.rpart"))
+  expect_true(gr$input$op.id == "scalemaxabs")
+  expect_true(gr$output$op.id == "classif.rpart")
+  expect_null(gr$train(task)[[1L]])
+  expect_prediction_classif(gr$predict(task)[[1L]])
+
+  gr = gr_old$clone(deep = TRUE)
+  gr$replace_subgraph("classif.rpart", substitute = lrn("classif.featureless"))  # replace end
+  expect_set_equal(gr$ids(), c("scale", "pca", "classif.featureless"))
+  expect_true(gr$input$op.id == "scale")
+  expect_true(gr$output$op.id == "classif.featureless")
+  expect_null(gr$train(task)[[1L]])
+  expect_prediction_classif(gr$predict(task)[[1L]])
+
+  gr = gr_old$clone(deep = TRUE)
+  gr$replace_subgraph(c("scale", "pca", "classif.rpart"), substitute = po("scalemaxabs") %>>% po("ica") %>>% lrn("classif.featureless"))  # replace whole graph
+  expect_set_equal(gr$ids(), c("scalemaxabs", "ica", "classif.featureless"))
+  expect_true(gr$input$op.id == "scalemaxabs")
+  expect_true(gr$output$op.id == "classif.featureless")
+  expect_null(gr$train(task)[[1L]])
+  expect_prediction_classif(gr$predict(task)[[1L]])
+
+  gr = gr_old$clone(deep = TRUE)
+  gr$replace_subgraph(c("pca", "scale"), substitute = po("scalemaxabs") %>>% po("ica"))  # replace linear subgraph
+  expect_set_equal(gr$ids(), c("scalemaxabs", "ica", "classif.rpart"))
+  expect_true(gr$input$op.id == "scalemaxabs")
+  expect_true(gr$output$op.id == "classif.rpart")
+  expect_null(gr$train(task)[[1L]])
+  expect_prediction_classif(gr$predict(task)[[1L]])
 
   # Non linear Graph
-  gr = po("scale") %>>%
-    po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch") %>>%
-    lrn("classif.rpart")
-  gr_new = gr$clone(deep = TRUE)
-  gr_new$replace_subgraph("ica", substitute = po("pca", id = "pca2"))
-  expect_true(all(gr_new$ids(TRUE) == c("scale", "branch", "pca", "pca2", "unbranch", "classif.rpart")))
-  expect_null(gr_new$train(task)[[1L]])
-  state1 = gr_new$state
-  gr_new$param_set$values$branch.selection = "ica"
-  expect_null(gr_new$train(task)[[1L]])
-  state2 = gr_new$state
-  expect_true(test_r6(state1$pca2, classes = "NO_OP"))
+  gr = po("scale") %>>% po("branch", c("pca", "nop")) %>>% gunion(list(po("pca"), po("nop"))) %>>% po("unbranch") %>>% lrn("classif.rpart")
+  gr_old = gr$clone(deep = TRUE)
+  expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported")
+  expect_error(gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = lrn("classif.featureless")),
+    regexp = "Output type of PipeOp classif.featureless during training")
+  gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch"))
+  expect_set_equal(gr$ids(TRUE), c("scale", "branch", "pca", "ica", "unbranch", "classif.rpart"))
+  expect_true(gr$input$op.id == "scale")
+  expect_true(gr$output$op.id == "classif.rpart")
+  expect_null(gr$train(task)[[1L]])
+  state1 = gr$state
+  gr$param_set$values$branch.selection = "ica"
+  expect_null(gr$train(task)[[1L]])
+  state2 = gr$state
+  expect_true(test_r6(state1$ica, classes = "NO_OP"))
   expect_true(test_r6(state2$pca, classes = "NO_OP"))
-  expect_class(state1$pca, classes = "prcomp")
-  expect_class(state2$pca2, classes = "prcomp")
-  expect_prediction_classif(gr_new$predict(task)[[1L]])
-
-  # ppl with multiplicities
-  gr = po("scale") %>>% ppl("bagging", graph = lrn("classif.rpart"), averager = po("classifavg", collect_multiplicity = TRUE))
-  gr_new = gr$clone(deep = TRUE)
-  # FIXME:
-  gr_new$replace_subgraph("classif.rpart", substitute = lrn("classif.rpart"))
+  expect_prediction_classif(gr$predict(task)[[1L]])
 })

From 1829aaf605a1f4bf10a5969fe5e43cc78bf3071a Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Mon, 1 Feb 2021 00:16:09 +0100
Subject: [PATCH 3/5] rerun docs

---
 R/Graph.R               | 2 +-
 man/Graph.Rd            | 7 +++++++
 man/mlr_learners_avg.Rd | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/Graph.R b/R/Graph.R
index cb1520a78..d1b80e6bf 100644
--- a/R/Graph.R
+++ b/R/Graph.R
@@ -85,7 +85,7 @@
 #'   are therefore unambiguous, they can be omitted (i.e. left as `NULL`).
 #' * `replace_subgraph(ids, substitute)` \cr
 #'   (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr
-#'   Replace a subgraph specified via ids with the substitute subgraph.
+#'   Mutates [`Graph`] by replace a subgraph specified via ids with the substitute subgraph.
 #' * `plot(html)` \cr
 #'   (`logical(1)`) -> `NULL` \cr
 #'   Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or
diff --git a/man/Graph.Rd b/man/Graph.Rd
index 599473314..68906377d 100644
--- a/man/Graph.Rd
+++ b/man/Graph.Rd
@@ -82,6 +82,10 @@ Mutates \code{\link{Graph}} by adding a \code{\link{PipeOp}} to the \code{\link{
 will not be connected within the \code{\link{Graph}} at first.\cr
 Instead of supplying a \code{\link{PipeOp}} directly, an object that can naturally be converted to a \code{\link{PipeOp}} can also
 be supplied, e.g. a \code{\link[mlr3:Learner]{Learner}} or a \code{\link[mlr3filters:Filter]{Filter}}; see \code{\link[=as_pipeop]{as_pipeop()}}.
+\item \code{remove_pipeop(id)} \cr
+(\code{character(1)}) -> \code{self} \cr
+Mutates \code{\link{Graph}} by removing the \code{\link{PipeOp}} with the matching id from the \code{\link{Graph}}.
+Corresponding edges are also removed as well as the corresponding \code{\link[paradox:ParamSet]{ParamSet}}.
 \item \code{add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)} \cr
 (\code{character(1)}, \code{character(1)},
 \code{character(1)} | \code{numeric(1)} | \code{NULL},
@@ -91,6 +95,9 @@ Add an edge from \code{\link{PipeOp}} \code{src_id}, and its channel \code{src_c
 channel \code{dst_channel} (identified by its name or number as listed in the \code{\link{PipeOp}}'s \verb{$input}).
 If source or destination \code{\link{PipeOp}} have only one input / output channel and \code{src_channel} / \code{dst_channel}
 are therefore unambiguous, they can be omitted (i.e. left as \code{NULL}).
+\item \code{replace_subgraph(ids, substitute)} \cr
+(\code{character()}, \code{\link{Graph}} | \code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr
+Mutates \code{\link{Graph}} by replace a subgraph specified via ids with the substitute subgraph.
 \item \code{plot(html)} \cr
 (\code{logical(1)}) -> \code{NULL} \cr
 Plot the \code{\link{Graph}}, using either the \pkg{igraph} package (for \code{html = FALSE}, default) or
diff --git a/man/mlr_learners_avg.Rd b/man/mlr_learners_avg.Rd
index be07bbe04..a8553c79e 100644
--- a/man/mlr_learners_avg.Rd
+++ b/man/mlr_learners_avg.Rd
@@ -41,7 +41,7 @@ and \code{"regr.mse"}, i.e. mean squared error for regression.
 \item \code{optimizer} :: \code{\link[bbotk:Optimizer]{Optimizer}} | \code{character(1)}\cr
 \code{\link[bbotk:Optimizer]{Optimizer}} used to find optimal thresholds.
 If \code{character}, converts to \code{\link[bbotk:Optimizer]{Optimizer}}
-via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:OptimizerNLoptr]{OptimizerNLoptr}}.
+via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:mlr_optimizers_nloptr]{OptimizerNLoptr}}.
 Nloptr hyperparameters are initialized to \code{xtol_rel = 1e-8}, \code{algorithm = "NLOPT_LN_COBYLA"}
 and equal initial weights for each learner.
 For more fine-grained control, it is recommended to supply a instantiated \code{\link[bbotk:Optimizer]{Optimizer}}.

From 93ca68c804576964ab162831d602bb9c111e38cf Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Mon, 1 Feb 2021 00:19:00 +0100
Subject: [PATCH 4/5] static code checks

---
 R/zzz.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/zzz.R b/R/zzz.R
index 885b08e68..42012754e 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -36,6 +36,6 @@ register_mlr3 = function() {
 } # nocov end
 
 # static code checks should not complain about commonly used data.table columns
-utils::globalVariables(c("src_id", "dst_id", "name", "op.id", "response", "truth"))
+utils::globalVariables(c("src_id", "dst_id", "src_channel", "dst_channel", "name", "op.id", "response", "truth"))
 
 leanify_package()

From 2903dcc89194586b34ebf5adcf70e28e78803502 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Thu, 4 Feb 2021 23:11:29 +0100
Subject: [PATCH 5/5] vararg inputs

---
 R/Graph.R                   | 43 +++++++++++++++++++------------------
 man/Graph.Rd                |  5 +++--
 tests/testthat/test_Graph.R |  2 +-
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/R/Graph.R b/R/Graph.R
index d1b80e6bf..83bd21043 100644
--- a/R/Graph.R
+++ b/R/Graph.R
@@ -26,7 +26,7 @@
 #' @section Fields:
 #' * `pipeops` :: named `list` of [`PipeOp`] \cr
 #'   Contains all [`PipeOp`]s in the [`Graph`], named by the [`PipeOp`]'s `$id`s.
-#' * `edges` :: [`data.table`]  with columns `src_id` (`character`), `src_channel` (`character`), `dst_id` (`character`), `dst_channel` (`character`)\cr
+#' * `edges` :: [`data.table`] with columns `src_id` (`character`), `src_channel` (`character`), `dst_id` (`character`), `dst_channel` (`character`)\cr
 #'   Table of connections between the [`PipeOp`]s. A [`data.table`]. `src_id` and `dst_id` are `$id`s of [`PipeOp`]s that must be present in
 #'   the `$pipeops` list. `src_channel` and `dst_channel` must respectively be `$output` and `$input` channel names of the
 #'   respective [`PipeOp`]s.
@@ -85,7 +85,8 @@
 #'   are therefore unambiguous, they can be omitted (i.e. left as `NULL`).
 #' * `replace_subgraph(ids, substitute)` \cr
 #'   (`character()`, [`Graph`] | [`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr
-#'   Mutates [`Graph`] by replace a subgraph specified via ids with the substitute subgraph.
+#'   Mutates [`Graph`] by replacing a subgraph specified via ids with the supplied substitute subgraph.
+#'   Note that the supplied ids are always reordered in topological order with respect to the [`Graph`].
 #' * `plot(html)` \cr
 #'   (`logical(1)`) -> `NULL` \cr
 #'   Plot the [`Graph`], using either the \pkg{igraph} package (for `html = FALSE`, default) or
@@ -281,20 +282,23 @@ Graph = R6Class("Graph",
       ids = self$ids(TRUE)[match(ids, self$ids(TRUE))]  # always reorder ids topologically
       substitute = as_graph(substitute, clone = TRUE)
 
+      # FIXME: check that ids are actually a valid subgraph of graph
+
+      # FIXME:
       # check whether the input of the substitute is a vararg channel
-      if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) {
-        stopf("Using a substitute with a vararg input channel is not supported (yet).")
-      }
+      #if (any(strip_multiplicity_type(substitute$input$channel.name) == "...")) {
+      #  stopf("Using a substitute with a vararg input channel is not supported (yet).")
+      #}
 
       # check whether the last id that is to be replaced connects to a varag channel
-      if (nrow(self$edges)) {  # this can be a data table with zero rows
-        type = self$edges[src_id == range(ids)[2L], dst_channel]
-        if (length(type)) {  # can be of length 0 if this is the end of the graph
-          if (strip_multiplicity_type(type) == "...") {
-            stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).")
-          }
-        }
-      }
+      #if (nrow(self$edges)) {  # this can be a data table with zero rows
+      #  type = self$edges[src_id == range(ids)[2L], dst_channel]
+      #  if (length(type)) {  # can be of length 0 if this is the end of the graph
+      #    if (strip_multiplicity_type(type) == "...") {
+      #      stopf("Replacing a Subgraph that is connected to a vararg channel is not supported (yet).")
+      #    }
+      #  }
+      #}
 
       input_orig = self$input
       output_orig = self$output
@@ -313,12 +317,11 @@ Graph = R6Class("Graph",
         self$edges = rbind(self$edges, substitute$edges)
       }
 
+      # FIXME: this reuses a lot of `%>>%`, we could write a general helper
       # build edges from free output channels of substitute and free input channels of self
       n_input = nrow(input)
       if (n_input) {
-        if (nrow(substitute$output) != n_input) {
-          stopf("PipeOps to be connected have mismatching number of inputs / outputs.")
-        }
+        # FIXME: check number of inputs / outputs
         for (row in seq_len(n_input)) {
           if (!are_types_compatible(strip_multiplicity_type(substitute$output$train[row]), strip_multiplicity_type(input$train[row]))) {
             stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)",
@@ -336,9 +339,7 @@ Graph = R6Class("Graph",
       # build edges from free output channels of self and free input channels of substitute
       n_output = nrow(output)
       if (n_output) {
-        if (n_output != nrow(substitute$input)) {
-          stopf("PipeOps to be connected have mismatching number of inputs / outputs.")
-        }
+        # FIXME: check number of inputs / outputs
         for (row in seq_len(n_output)) {
           if (!are_types_compatible(strip_multiplicity_type(output$train[row]), strip_multiplicity_type(substitute$input$train[row]))) {
             stopf("Output type of PipeOp %s during training (%s) incompatible with input type of PipeOp %s (%s)",
@@ -354,9 +355,9 @@ Graph = R6Class("Graph",
       }
 
       # check if valid DAG
-      tryCatch(self$ids(TRUE), error = function(error_condition) {
+      invisible(tryCatch(self$ids(TRUE), error = function(error_condition) {
         stopf("Failed to infer new Graph structure. Resetting.")
-      })
+      }))
 
       on.exit({})
       invisible(self)
diff --git a/man/Graph.Rd b/man/Graph.Rd
index 68906377d..c79987a84 100644
--- a/man/Graph.Rd
+++ b/man/Graph.Rd
@@ -34,7 +34,7 @@ the \code{\link{PipeOp}} results along the edges as input to other \code{\link{P
 \itemize{
 \item \code{pipeops} :: named \code{list} of \code{\link{PipeOp}} \cr
 Contains all \code{\link{PipeOp}}s in the \code{\link{Graph}}, named by the \code{\link{PipeOp}}'s \verb{$id}s.
-\item \code{edges} :: \code{\link{data.table}}  with columns \code{src_id} (\code{character}), \code{src_channel} (\code{character}), \code{dst_id} (\code{character}), \code{dst_channel} (\code{character})\cr
+\item \code{edges} :: \code{\link{data.table}} with columns \code{src_id} (\code{character}), \code{src_channel} (\code{character}), \code{dst_id} (\code{character}), \code{dst_channel} (\code{character})\cr
 Table of connections between the \code{\link{PipeOp}}s. A \code{\link{data.table}}. \code{src_id} and \code{dst_id} are \verb{$id}s of \code{\link{PipeOp}}s that must be present in
 the \verb{$pipeops} list. \code{src_channel} and \code{dst_channel} must respectively be \verb{$output} and \verb{$input} channel names of the
 respective \code{\link{PipeOp}}s.
@@ -97,7 +97,8 @@ If source or destination \code{\link{PipeOp}} have only one input / output chann
 are therefore unambiguous, they can be omitted (i.e. left as \code{NULL}).
 \item \code{replace_subgraph(ids, substitute)} \cr
 (\code{character()}, \code{\link{Graph}} | \code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr
-Mutates \code{\link{Graph}} by replace a subgraph specified via ids with the substitute subgraph.
+Mutates \code{\link{Graph}} by replacing a subgraph specified via ids with the supplied substitute subgraph.
+Note that the supplied ids are always reordered in topological order with respect to the \code{\link{Graph}}.
 \item \code{plot(html)} \cr
 (\code{logical(1)}) -> \code{NULL} \cr
 Plot the \code{\link{Graph}}, using either the \pkg{igraph} package (for \code{html = FALSE}, default) or
diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R
index ab6ba3877..fe4a9dac4 100644
--- a/tests/testthat/test_Graph.R
+++ b/tests/testthat/test_Graph.R
@@ -486,7 +486,7 @@ test_that("replace_subgraph", {
   # Non linear Graph
   gr = po("scale") %>>% po("branch", c("pca", "nop")) %>>% gunion(list(po("pca"), po("nop"))) %>>% po("unbranch") %>>% lrn("classif.rpart")
   gr_old = gr$clone(deep = TRUE)
-  expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported")
+  #expect_error(gr$replace_subgraph(c("nop"), substitute = po("ica")), regexp = "connected to a vararg channel is not supported")  # FIXME:
   expect_error(gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = lrn("classif.featureless")),
     regexp = "Output type of PipeOp classif.featureless during training")
   gr$replace_subgraph(c("branch", "pca", "nop", "unbranch"), substitute = po("branch", c("pca", "ica")) %>>% gunion(list(po("pca"), po("ica"))) %>>% po("unbranch"))