feat: add n features to data table view of archive (#91)

* feat: add n features to data table view of archive * chore: update news * test: extract_inner_fselect_archives
mlr-org · Dec 15, 2023 · 76451ec · 76451ec
1 parent 8c13d7c
commit 76451ec
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 23 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # mlr3fselect (development version)
 
+* feat: Add number of features to `as.data.table.ArchiveFSelect()`.
 * feat: Features can be always included with the `always_include` column role.
 * fix: Add `$phash()` method to `AutoFSelector`.
 * fix: Include `FSelector` in hash of  `AutoFSelector`.

diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R
@@ -157,6 +157,7 @@ as.data.table.ArchiveFSelect = function(x, ..., exclude_columns = "uhash", measu
 
   # add feature vector
   tab[, "features" := lapply(transpose(.SD), function(col) x$cols_x[col]), .SDcols = x$cols_x]
+  tab[, "n_features" := map(get("features"), length)]
 
   if (x$benchmark_result$n_resample_results) {
     # add extra measures

diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R
@@ -61,45 +61,45 @@ test_that("ArchiveFSelect as.data.table function works", {
 
   # default
   tab = as.data.table(instance$archive)
-  expect_data_table(tab, nrows = 4, ncols = 16)
+  expect_data_table(tab, nrows = 4, ncols = 17)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
+    "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))
 
   # extra measure
   tab = as.data.table(instance$archive, measures = msr("classif.acc"))
-  expect_data_table(tab, nrows = 4, ncols = 17)
+  expect_data_table(tab, nrows = 4, ncols = 18)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "classif.acc", "runtime_learners", "timestamp", "batch_nr",  "warnings", "errors", "features", "resample_result"))
+    "classif.acc", "runtime_learners", "timestamp", "batch_nr",  "warnings", "errors", "features", "n_features", "resample_result"))
 
   # extra measures
   tab = as.data.table(instance$archive, measures = msrs(c("classif.acc", "classif.mcc")))
-  expect_data_table(tab, nrows = 4, ncols = 18)
+  expect_data_table(tab, nrows = 4, ncols = 19)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
+    "classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))
 
   # exclude column
   tab = as.data.table(instance$archive, exclude_columns = "timestamp")
-  expect_data_table(tab, nrows = 4, ncols = 16)
+  expect_data_table(tab, nrows = 4, ncols = 17)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "resample_result"))
+    "runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "n_features", "resample_result"))
 
   # exclude columns
   tab = as.data.table(instance$archive, exclude_columns = c("timestamp", "uhash"))
-  expect_data_table(tab, nrows = 4, ncols = 15)
+  expect_data_table(tab, nrows = 4, ncols = 16)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "batch_nr",  "warnings", "errors", "features", "resample_result"))
+    "runtime_learners", "batch_nr",  "warnings", "errors", "features", "n_features", "resample_result"))
 
   # no exclude
   tab = as.data.table(instance$archive, exclude_columns = NULL)
-  expect_data_table(tab, nrows = 4, ncols = 17)
+  expect_data_table(tab, nrows = 4, ncols = 18)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "timestamp", "batch_nr",  "uhash", "warnings", "errors", "features", "resample_result"))
+    "runtime_learners", "timestamp", "batch_nr",  "uhash", "warnings", "errors", "features", "n_features", "resample_result"))
 
   # no unnest
   tab = as.data.table(instance$archive, unnest = NULL)
-  expect_data_table(tab, nrows = 4, ncols = 16)
+  expect_data_table(tab, nrows = 4, ncols = 17)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "timestamp", "batch_nr",  "warnings", "errors", "features", "resample_result"))
+    "runtime_learners", "timestamp", "batch_nr",  "warnings", "errors", "features", "n_features", "resample_result"))
 
   # without benchmark result
   instance = FSelectInstanceSingleCrit$new(
@@ -113,9 +113,9 @@ test_that("ArchiveFSelect as.data.table function works", {
   fselector$optimize(instance)
 
   tab = as.data.table(instance$archive)
-  expect_data_table(tab, nrows = 4, ncols = 15)
+  expect_data_table(tab, nrows = 4, ncols = 16)
   expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
-    "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features"))
+    "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features"))
 
   # empty archive
   instance = FSelectInstanceSingleCrit$new(

diff --git a/tests/testthat/test_extract_inner_fselect_archives.R b/tests/testthat/test_extract_inner_fselect_archives.R
@@ -4,7 +4,7 @@ test_that("extract_inner_fselect_archives function works with resample and cv",
 
   irr = extract_inner_fselect_archives(rr)
   expect_data_table(irr, nrows = 8)
-  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
 })
 
 test_that("extract_inner_fselect_archives function works with resample and repeated cv", {
@@ -13,7 +13,7 @@ test_that("extract_inner_fselect_archives function works with resample and repea
 
   irr = extract_inner_fselect_archives(rr)
   expect_data_table(irr, nrows = 24)
-  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
 })
 
 test_that("extract_inner_fselect_archives function works with benchmark and cv", {
@@ -25,7 +25,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and cv",
 
   ibmr = extract_inner_fselect_archives(bmr)
   expect_data_table(ibmr, nrows = 16)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2))
 })
 
@@ -38,7 +38,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and repe
 
   ibmr = extract_inner_fselect_archives(bmr)
   expect_data_table(ibmr, nrows = 48)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2))
 })
 
@@ -51,7 +51,7 @@ test_that("extract_inner_fselect_archives function works with multiple tasks", {
 
   ibmr = extract_inner_fselect_archives(bmr)
   expect_data_table(ibmr, nrows = 32)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2, 3, 4))
 })
 
@@ -92,7 +92,7 @@ test_that("extract_inner_fselect_archives function works with mixed store instan
   bmr = benchmark(grid, store_models = TRUE)
 
   ibmr = extract_inner_fselect_archives(bmr)
-  expect_data_table(ibmr, ncols = 17)
+  expect_data_table(ibmr, ncols = 18)
   expect_equal(unique(ibmr$experiment), 2)
 })
 
@@ -105,6 +105,6 @@ test_that("extract_inner_fselect_archives function works with autofselector and
 
   ibmr = extract_inner_fselect_archives(bmr)
   expect_data_table(ibmr, nrows = 8)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), 1)
 })