diff --git a/DESCRIPTION b/DESCRIPTION index 85253382c..a4309bd20 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -98,6 +98,7 @@ Roxygen: list(markdown = TRUE, r6 = FALSE) RoxygenNote: 7.3.2 VignetteBuilder: knitr Collate: + 'FilterEnsemble.R' 'Graph.R' 'GraphLearner.R' 'mlr_pipeops.R' diff --git a/R/FilterEnsemble.R b/R/FilterEnsemble.R new file mode 100644 index 000000000..9a9076790 --- /dev/null +++ b/R/FilterEnsemble.R @@ -0,0 +1,105 @@ + + +#' @title Filter Ensemble +#' +#' @usage NULL +#' @name mlr_filters_ensemble +#' @format [`R6Class`][R6::R6Class] object inheriting from [`Filter`][mlr3filters::Filter]. +#' +#' @description +#' Implements the filte rensemble proposed in `r cite_bib("binder_2020")`. + +FilterEnsemble = R6Class("FilterEnsemble", inherit = mlr3filters::Filter, + public = list( + initialize = function(filters) { + private$.wrapped = assert_list(filters, types = "Filter", min.len = 1) + fnames = map_chr(private$.wrapped, "id") + names(private$.wrapped) = fnames + types_list = map(discard(private$.wrapped, function(x) test_scalar_na(x$task_types)), "task_types") + if (length(types_list)) { + task_types = Reduce(intersect, types_list) + } else { + task_types = NA_character_ + } + .own_param_set = ps( + weights = p_uty(custom_check = crate(function(x) { + check_numeric(x, len = length(fnames)) %check&&% + (check_names(names(x), type = "unnamed") %check||% + check_names(names(x), type = "unique", permutation.of = fnames)) + }, fnames), + tags = "required" + ), + rank_transform = p_lgl(init = FALSE, tags = "required") + ) + + super$initialize( + id = paste(fnames, collapse = "."), + task_types = task_types, + task_properties = unique(unlist(map(private$.wrapped, "task_properties"))), + param_set = ParamSetCollection$new(c(list(.own_param_set), map(private$.wrapped, "param_set"))), + feature_types = Reduce(intersect, map(private$.wrapped, "feature_types")), + packages = unique(unlist(map(private$.wrapped, "packages"))), + label = "meta", + man = "mlr3pipelines::mlr_filters_ensemble" + ) + }, + get_weights_tunetoken = function(normalize_weights = "uniform") { + assert_choice(normalize_weights, c("uniform", "naive", "no")) + paradox::to_tune(self$get_weights_search_space(normalize_weights = normalize_weights)) + }, + set_weights_to_tune = function(normalize_weights = "uniform") { + assert_choice(normalize_weights, c("uniform", "naive", "no")) + self$param_set$set_values(.values = list(weights = self$get_weights_tunetoken(normalize_weights = normalize_weights))) + invisible(self) + }, + get_weights_search_space = function(weights_param_name = "weights", normalize_weights = "uniform", prefix = "w") { + assert_string(prefix) + assert_string(weights_param_name) + assert_choice(normalize_weights, c("uniform", "naive", "no")) + fnames = names(private$.wrapped) + innames = if (prefix == "") fnames else paste0(prefix, ".", fnames) + domains = rep(list(p_dbl(0, 1)), length(fnames)) + names(domains) = innames + + domains$.extra_trafo = crate(function(x) { + w = unlist(x[innames], use.names = FALSE) + names(w) = fnames + x[innames] = NULL + + if (normalize_weights == "uniform") { + w[w > 1 - .Machine$double.eps] = 1 - .Machine$double.eps + w = -log1p(-w) + w = w / max(sum(w), .Machine$double.eps) + } else if (normalize_weights == "naive") { + w = w / max(sum(w), .Machine$double.eps) + } + x[[weights_param_name]] = w + x + }, innames, fnames, normalize_weights, weights_param_name) + + do.call(paradox::ps, domains) + } + ), + private = list( + .wrapped = NULL, + .own_param_set = NULL, + .calculate = function(task, nfeat) { + pv = private$.own_param_set$get_values() + fn = task$feature_names + nfeat = length(fn) # need to rank all features in an ensemble + weights = pv$weights + wnames = names(private$.wrapped) + if (!is.null(names(weights))) { + weights = weights[wnames] + } + scores = pmap(list(private$.wrapped, weights), function(x, w) { + x$calculate(task, nfeat) + s = x$scores[fn] + if (pv$rank_transform) s = rank(s) + s * w + }) + structure(rowSums(as.data.frame(scores)), names = fn) + } + ) + +) diff --git a/R/bibentries.R b/R/bibentries.R index de55741d5..0af04346f 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -52,5 +52,16 @@ bibentries = c( author = "Yujun Wu and Dennis D Boos and Leonard A Stefanski", title = "Controlling Variable Selection by the Addition of Pseudovariables", journal = "Journal of the American Statistical Association" + ), + + binder_2020 = bibentry("inproceedings", + doi = "10.1145/3377930.3389815", + year = "2020", + publisher = "Association for Computing Machinery", + pages = "471--479", + author = "Martin Binder and Julia Moosbauer and Janek Thomas and Bernd Bischl", + title = "Multi-objective hyperparameter tuning and feature selection using filter ensembles", + booktitle = "Proceedings of the 2020 Genetic and Evolutionary Computation Conference" ) + ) diff --git a/R/zzz.R b/R/zzz.R index a4333c8a8..a61b149b7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -19,11 +19,24 @@ register_mlr3 = function() { x$pipeops$properties = c("validation", "internal_tuning") } +register_mlr3filters = function() { + if ("mlr3filters" %in% loadedNamespaces()) { + x = utils::getFromNamespace("mlr_filters", ns = "mlr3filters") + mlr_filters$add("ensemble", FilterEnsemble) + } +} + + + paradox_info <- list2env(list(is_old = FALSE), parent = emptyenv()) .onLoad = function(libname, pkgname) { # nocov start register_mlr3() - setHook(packageEvent("mlr3", "onLoad"), function(...) register_mlr3(), action = "append") + register_mlr3filters() + setHook(packageEvent("mlr3", "onLoad"), function(...) { + register_mlr3() + register_mlr3filters() + }, action = "append") backports::import(pkgname) assign("lg", lgr::get_logger("mlr3/mlr3pipelines"), envir = parent.env(environment())) diff --git a/man/mlr_filters_ensemble.Rd b/man/mlr_filters_ensemble.Rd new file mode 100644 index 000000000..f2c1b3ced --- /dev/null +++ b/man/mlr_filters_ensemble.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/FilterEnsemble.R +\name{mlr_filters_ensemble} +\alias{mlr_filters_ensemble} +\alias{FilterEnsemble} +\title{Filter Ensemble} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link[mlr3filters:Filter]{Filter}}. +} +\description{ +Implements the filte rensemble proposed in Binder (2020). +}