Skip to content

Commit

Permalink
feat: add n features to data table view of archive (#91)
Browse files Browse the repository at this point in the history
* feat: add n features to data table view of archive

* chore: update news

* test: extract_inner_fselect_archives
  • Loading branch information
be-marc authored Dec 15, 2023
1 parent 8c13d7c commit 76451ec
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 23 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# mlr3fselect (development version)

* feat: Add number of features to `as.data.table.ArchiveFSelect()`.
* feat: Features can be always included with the `always_include` column role.
* fix: Add `$phash()` method to `AutoFSelector`.
* fix: Include `FSelector` in hash of `AutoFSelector`.
Expand Down
1 change: 1 addition & 0 deletions R/ArchiveFSelect.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ as.data.table.ArchiveFSelect = function(x, ..., exclude_columns = "uhash", measu

# add feature vector
tab[, "features" := lapply(transpose(.SD), function(col) x$cols_x[col]), .SDcols = x$cols_x]
tab[, "n_features" := map(get("features"), length)]

if (x$benchmark_result$n_resample_results) {
# add extra measures
Expand Down
32 changes: 16 additions & 16 deletions tests/testthat/test_ArchiveFSelect.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,45 +61,45 @@ test_that("ArchiveFSelect as.data.table function works", {

# default
tab = as.data.table(instance$archive)
expect_data_table(tab, nrows = 4, ncols = 16)
expect_data_table(tab, nrows = 4, ncols = 17)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))

# extra measure
tab = as.data.table(instance$archive, measures = msr("classif.acc"))
expect_data_table(tab, nrows = 4, ncols = 17)
expect_data_table(tab, nrows = 4, ncols = 18)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"classif.acc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
"classif.acc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))

# extra measures
tab = as.data.table(instance$archive, measures = msrs(c("classif.acc", "classif.mcc")))
expect_data_table(tab, nrows = 4, ncols = 18)
expect_data_table(tab, nrows = 4, ncols = 19)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
"classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))

# exclude column
tab = as.data.table(instance$archive, exclude_columns = "timestamp")
expect_data_table(tab, nrows = 4, ncols = 16)
expect_data_table(tab, nrows = 4, ncols = 17)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "resample_result"))
"runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "n_features", "resample_result"))

# exclude columns
tab = as.data.table(instance$archive, exclude_columns = c("timestamp", "uhash"))
expect_data_table(tab, nrows = 4, ncols = 15)
expect_data_table(tab, nrows = 4, ncols = 16)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "batch_nr", "warnings", "errors", "features", "resample_result"))
"runtime_learners", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))

# no exclude
tab = as.data.table(instance$archive, exclude_columns = NULL)
expect_data_table(tab, nrows = 4, ncols = 17)
expect_data_table(tab, nrows = 4, ncols = 18)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "timestamp", "batch_nr", "uhash", "warnings", "errors", "features", "resample_result"))
"runtime_learners", "timestamp", "batch_nr", "uhash", "warnings", "errors", "features", "n_features", "resample_result"))

# no unnest
tab = as.data.table(instance$archive, unnest = NULL)
expect_data_table(tab, nrows = 4, ncols = 16)
expect_data_table(tab, nrows = 4, ncols = 17)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result"))
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result"))

# without benchmark result
instance = FSelectInstanceSingleCrit$new(
Expand All @@ -113,9 +113,9 @@ test_that("ArchiveFSelect as.data.table function works", {
fselector$optimize(instance)

tab = as.data.table(instance$archive)
expect_data_table(tab, nrows = 4, ncols = 15)
expect_data_table(tab, nrows = 4, ncols = 16)
expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce",
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features"))
"runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features"))

# empty archive
instance = FSelectInstanceSingleCrit$new(
Expand Down
14 changes: 7 additions & 7 deletions tests/testthat/test_extract_inner_fselect_archives.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ test_that("extract_inner_fselect_archives function works with resample and cv",

irr = extract_inner_fselect_archives(rr)
expect_data_table(irr, nrows = 8)
expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
})

test_that("extract_inner_fselect_archives function works with resample and repeated cv", {
Expand All @@ -13,7 +13,7 @@ test_that("extract_inner_fselect_archives function works with resample and repea

irr = extract_inner_fselect_archives(rr)
expect_data_table(irr, nrows = 24)
expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
})

test_that("extract_inner_fselect_archives function works with benchmark and cv", {
Expand All @@ -25,7 +25,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and cv",

ibmr = extract_inner_fselect_archives(bmr)
expect_data_table(ibmr, nrows = 16)
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_equal(unique(ibmr$experiment), c(1, 2))
})

Expand All @@ -38,7 +38,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and repe

ibmr = extract_inner_fselect_archives(bmr)
expect_data_table(ibmr, nrows = 48)
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_equal(unique(ibmr$experiment), c(1, 2))
})

Expand All @@ -51,7 +51,7 @@ test_that("extract_inner_fselect_archives function works with multiple tasks", {

ibmr = extract_inner_fselect_archives(bmr)
expect_data_table(ibmr, nrows = 32)
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_equal(unique(ibmr$experiment), c(1, 2, 3, 4))
})

Expand Down Expand Up @@ -92,7 +92,7 @@ test_that("extract_inner_fselect_archives function works with mixed store instan
bmr = benchmark(grid, store_models = TRUE)

ibmr = extract_inner_fselect_archives(bmr)
expect_data_table(ibmr, ncols = 17)
expect_data_table(ibmr, ncols = 18)
expect_equal(unique(ibmr$experiment), 2)
})

Expand All @@ -105,6 +105,6 @@ test_that("extract_inner_fselect_archives function works with autofselector and

ibmr = extract_inner_fselect_archives(bmr)
expect_data_table(ibmr, nrows = 8)
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id"))
expect_equal(unique(ibmr$experiment), 1)
})

0 comments on commit 76451ec

Please sign in to comment.