--- title: "mlr3mbo" subtitle: "Flexible Bayesian Optimization in R" output: rmarkdown::html_vignette bibliography: references.bib vignette: > %\VignetteIndexEntry{mlr3mbo} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} library(knitr) opts_chunk$set( collapse = TRUE, comment = "#>" ) update_db = function() { if (is.null(db$base) || is.null(db$aliases)) { hdb = hsearch_db(package = unique(c(db$index, db$hosted)), types = "help") db$base = setkeyv(as.data.table(hdb$Base), "ID") db$aliases = setkeyv(as.data.table(hdb$Aliases), "Alias") } } #' @title Hyperlink to Function Reference #' #' @description #' Creates a markdown link to a function reference. #' #' @param topic Name of the topic to link against. #' @param text Text to use for the link. Defaults to the topic name. #' @param format Either markdown or HTML. #' #' @return (`character(1)`) markdown link. ref = function(topic, text = topic, format = "markdown") { strip_parenthesis = function(x) sub("\\(\\)$", "", x) checkmate::assert_string(topic, pattern = "^[[:alnum:]._-]+(::[[:alnum:]._-]+)?(\\(\\))?$") checkmate::assert_string(text, min.chars = 1L) checkmate::assert_choice(format, c("markdown", "html")) topic = trimws(topic) text = trimws(text) if (stringi::stri_detect_fixed(topic, "::")) { parts = strsplit(topic, "::", fixed = TRUE)[[1L]] topic = parts[2L] name = strip_parenthesis(parts[2L]) pkg = parts[1L] } else { update_db() matched = db$base[db$aliases[list(strip_parenthesis(topic)), c("Alias", "ID"), on = "Alias", nomatch = 0L], on = "ID", nomatch = NULL] if (nrow(matched) == 0L) { stop(sprintf("Could not find help page for topic '%s'", topic)) } if (nrow(matched) >= 2L) { lgr$warn("Ambiguous link to '%s': %s", topic, paste0(paste(matched$Package, matched$Name, sep = "::"), collapse = " | ")) matched = head(matched, 1L) } pkg = matched$Package name = matched$Name lgr$debug("Resolved '%s' to '%s::%s'", topic, pkg, name) } if (pkg %in% db$hosted) { url = sprintf("https://%s.mlr-org.com/reference/%s.html", pkg, name) } else { url = sprintf("https://www.rdocumentation.org/packages/%s/topics/%s", pkg, name) } switch(format, "markdown" = sprintf("[`%s`](%s)", text, url), "html" = sprintf("%s", url, text) ) } #' @title Hyperlink to Package #' #' @description #' Links either to respective mlr3 website or to CRAN page. #' #' @param pkg Name of the package. #' @inheritParams ref #' #' @return (`character(1)`) markdown link. #' @export ref_pkg = function(pkg, format = "markdown") { checkmate::assert_string(pkg, pattern = "^[[:alnum:]._-]+$") checkmate::assert_choice(format, c("markdown", "html")) pkg = trimws(pkg) if (grepl("/", pkg, fixed = TRUE)) { gh_pkg(pkg, format = format) } else if (pkg %in% db$hosted) { mlr_pkg(pkg, format = format) } else { cran_pkg(pkg, format = format) } } #' @title Hyperlink to CRAN Package #' #' @description #' Creates a markdown link to a CRAN package. #' #' @inheritParams ref_pkg #' #' @return (`character(1)`) markdown link. cran_pkg = function(pkg, format = "markdown") { checkmate::assert_string(pkg, pattern = "^[[:alnum:]._-]+$") checkmate::assert_choice(format, c("markdown", "html")) pkg = trimws(pkg) if (pkg %in% c("stats", "graphics", "datasets")) { return(pkg) } url = sprintf("https://cran.r-project.org/package=%s", pkg) switch(format, "markdown" = sprintf("[%s](%s)", pkg, url), "html" = sprintf("%s", url, pkg) ) } #' @title Hyperlink to mlr3 Package #' #' @description #' Creates a markdown link to a mlr3 package with a "mlr-org.com" subdomain. #' #' @inheritParams ref_pkg #' #' @return (`character(1)`) markdown link. mlr_pkg = function(pkg, format = "markdown") { checkmate::assert_string(pkg, pattern = "^[[:alnum:]._-]+$") checkmate::assert_choice(format, c("markdown", "html")) pkg = trimws(pkg) url = sprintf("https://%1$s.mlr-org.com", pkg) switch(format, "markdown" = sprintf("[%s](%s)", pkg, url), "html" = sprintf("%s", url, pkg) ) } #' @title Hyperlink to GitHub Repository #' #' @description #' Creates a markdown link to GitHub repository. #' #' @param pkg Name of the repository specified as "{repo}/{name}". #' @inheritParams ref_pkg #' #' @return (`character(1)`) markdown link. gh_pkg = function(pkg, format = "markdown") { checkmate::assert_string(pkg, pattern = "^[[:alnum:]_-]+/[[:alnum:]._-]+$") checkmate::assert_choice(format, c("markdown", "html")) pkg = trimws(pkg) parts = strsplit(pkg, "/", fixed = TRUE)[[1L]] url = sprintf("https://github.com/%s", pkg) switch(format, "markdown" = sprintf("[%s](%s)", parts[2L], url), "html" = sprintf("%s", url, parts[2L]) ) } db = new.env() db$index = c("base", "utils", "datasets", "data.table", "stats") db$hosted = c("paradox", "mlr3misc", "mlr3", "mlr3data", "mlr3db", "mlr3proba", "mlr3pipelines", "mlr3learners", "mlr3filters", "bbotk", "mlr3tuning", "mlr3viz", "mlr3fselect", "mlr3cluster", "mlr3spatiotempcv", "mlr3spatial", "mlr3extralearners", "mlr3tuningspaces", "mlr3hyperband", "mlr3mbo") lgr = NULL ``` # Intro `r ref_pkg("mlr3mbo")` makes Bayesian Optimization (BO) available within the `r ref_pkg("mlr3")` ecosystem. BO can be used for optimizing any black box function, and is very suitable for hyperparameter optimization of machine learning models. `r ref_pkg("mlr3mbo")` allows for building custom BO algorithms relying on building blocks in a modular fashion, but also provides a variety of standard single- and multi-objective BO algorithms that can be used in a straightforward manner. We assume that the reader is somewhat familiar with black box optimization and `r ref_pkg("bbotk")`, hyperparameter optimization and `r ref_pkg("mlr3tuning")` and knows the basics of BO. Background material is, for example, given by @garnett_2022, @bischl_2021, and @mlr3book. # Building Blocks BO is an iterative optimization algorithm that makes use of a so-called surrogate to model the unknown black box function. After having observed an initial design of observations, the surrogate model is trained on all data points observed so far and an acquisition function is used to determine which points of the search space are promising candidates that should be evaluated next. The acquisition function relies on the prediction of the surrogate model and requires no evaluation of the true black box function and therefore is comparably cheap to optimize. After having evaluated the next candidate, the process repeats itself until a given termination criteria is met. Most BO flavors therefore follow a simple loop: 1. Fit the surrogate on all observations made so far. 2. Optimize the acquisition function to find the next candidate that should be evaluated. 3. Evaluate the next candidate. In the following, the basic building blocks of BO and their implementation in `r ref_pkg("mlr3mbo")` are introduced in more detail. ## Loop Function The `r ref("mlr3mbo::loop_function", "loop_function")` determines the behavior of the BO algorithm on a global level, i.e., how the subroutine should look like that is performed at each iteration. To get an overview of readily available `loop_function`s, the following dictionary can be inspected: ```{r} library(mlr3mbo) library(data.table) as.data.table(mlr_loop_functions) ``` The dictionary shows the `key`, i.e., ID of the `loop_function`, a brief description, for which optimization instance the resulting BO flavor can be used, as well as how documentation can be accessed. Technically, all `loop_function`s are members of the `S3` class `r ref( "mlr3mbo::loop_function", "loop_function")`, and are simply decorated `r ref("base::function", "functions")` (because using an `r ref("R6::R6Class", "R6Class")` class would be over the top here - but this may change in the future). To write an own `loop_function`, users can get inspiration from the readily available ones, e.g., `r ref("mlr3mbo::mlr_loop_functions_ego", "bayesopt_ego")` which performs sequential single-objective optimization: After having made some assertions and safety checks, and having evaluated the initial design, `bayesopt_ego` essentially only performs the following steps: 1. `acq_function$surrogate$update()` # update the surrogate model 2. `acq_function$update()` # update the acquisition function (e.g., update the best value observed so far) 3. `acq_optimizer$optimize()` # optimize the acquisition function to yield a new candidate ## Surrogate A surrogate encapsulates a regression learner that models the unknown black box function based on observed data. In `r ref_pkg("mlr3mbo")`, `r ref("mlr3mbo::SurrogateLearner", "SurrogateLearner")` and `r ref( "mlr3mbo::SurrogateLearnerCollection", "SurrogateLearnerCollection")` are the higher-level `R6` classes which should be used to construct a surrogate, inheriting from the base `r ref("mlr3mbo::Surrogate", "Surrogate")` class. As a learner, any `r ref("mlr3::LearnerRegr", "LearnerRegr")` from `r ref_pkg("mlr3")` can be used, however, most acquisition functions require both a mean and a variance prediction (therefore not all learners are suitable for all scenarios). Typical choices include: * A `r ref("mlr3learners::mlr_learners_regr.km", "Gaussian Process")` for low dimensional numeric search spaces * A `r ref("mlr3learners::mlr_learners_regr.ranger", "Random Forest")` for higher dimensional mixed (and / or hierarchical) search spaces A `SurrogateLearner` can be constructed via: ```{r} library(mlr3learners) surrogate = SurrogateLearner$new(lrn("regr.km")) ``` or using syntactic sugar: ```{r} surrogate = srlrn(lrn("regr.km")) ``` Note that `r ref("mlr3mbo::default_gp", "default_gp")` and `r ref("mlr3mbo::default_rf", "default_rf")` allow for the construction of a Gaussian Process and random forest with sensible hyperparamter values already specified. The encapsulated learner can be accessed via the `$learner` field: ```{r} surrogate$learner ``` The surrogate itself has the following hyperparameters: ```{r} surrogate$param_set ``` `assert_insample_perf = TRUE` results in the insample performance of the learner being calculated and asserted against a performance threshold after each `$update()`. This requires the specification of a `perf_measure` (any regression measure, e.g., `r ref("mlr3::mlr_measures_regr.rsq", "R squared")`) and a `perf_threshold`. If the threshold is not met, an error is thrown (that is caught within the optimization loop - unless `catch_errors = FALSE` and results in, e.g., proposing the next candidate uniformly at random). For more details on error handling, see the [Safety Nets](#safety-nets) section. Note that this insample performance assertion is not always meaningful, e.g., in the case of using a Gaussian Process with no nugget, the insample performance will always be perfect. Internally, the learner is fitted on a `r ref("mlr3::TaskRegr", "regression task")` constructed from the `r ref("bbotk::Archive", "Archive")` of the `r ref("bbotk::OptimInstance", "OptimInstance")` that is to be optimized and features and the target variable are determined automatically but can also be specified via the `$cols_x` and `$cols_y` active bindings. Ideally, the `archive` is already passed during construction, however, lazy initialization is also possible (i.e., the `$archive` field will be automatically populated within the optimization routine of an `r ref("mlr3mbo::mlr_optimizers_mbo", "OptimizerMbo")`). Important methods are `$update()` and `$predict()` with the former one typically being used within the `loop_function` and the latter one being used within the implementation of an acquisition function. Depending on the choice of the `loop_function`, multiple targets must be modelled by (currently independent) surrogates, in which case a `SurrogateLearnerCollection` should be used. Construction and hyperparameters are analogous to the single target scenario described above. To get an overview of all available regression learners within the `r ref_pkg("mlr3")` ecosystem, use: ```{r, eval=FALSE} library(mlr3) library(mlr3learners) # there are plenty of more in mlr3extralearners # library(mlr3extralearners) learners = as.data.table(mlr_learners) learners[task_type == "regr"] ``` To use a custom learner not included in `r ref_pkg("mlr3")`, `r ref_pkg("mlr3learners")`, or `r ref_pkg("mlr3extralearners")`, you can inherit from `r ref("mlr3::LearnerRegr", "LearnerRegr")` and use this custom learner within the surrogate. ## Acquisition Function Based on a surrogate, an acquisition function quantifies the attractiveness of each point of the search space if it were to be evaluated in the next iteration. A popular example is given by the Expected Improvement [@jones_1998]: $$ \mathbb{E} \left[ \max \left( f_{\mathrm{min}} - Y(\mathbf{x}), 0 \right) \right] $$ Here, $Y(\mathbf{x})$ is the surrogate prediction (a random variable) for a given point $\mathbf{x}$ (which when using a Gaussian Process follows a normal distribution) and $f_{\mathrm{min}}$ is the currently best function value observed so far (when assuming minimization). To get an overview of available acquisition functions, the following dictionary can be inspected: ```{r} as.data.table(mlr_acqfunctions) ``` The dictionary shows the `key`, i.e., ID of the acquisition function, a brief description, and how the documentation can be accessed. Technically, all acquisition functions inherit from the `R6` class `r ref( "mlr3mbo::AcqFunction", "AcqFunction")` which itself simply inherits from the base `r ref("bbotk::Objective", "Objective")` class. Construction is straightforward via: ```{r} acq_function = AcqFunctionEI$new() ``` or using syntactic sugar: ```{r} acq_function = acqf("ei") ``` Internally, the `domain` and `codomain` are constructed based on the `r ref("bbotk::Archive", "Archive")` referenced by the `r ref("mlr3mbo::Surrogate", "Surrogate")` and therefore the `surrogate` should be passed as an argument already during construction. However, lazy initialization is also possible. In the case of the acquisition function itself being parameterized, hyperparameters should be passed as constants, e.g.: ```{r} acqf("cb") # lower / upper confidence bound with lambda hyperparameter ``` To use a custom acquisition function you should implement a new `R6` class inheriting from `r ref("mlr3mbo::AcqFunction", "AcqFunction")`. ## Acquisition Function Optimizer To find the most promising candidate for evaluation, the acquisition function itself must be optimized. Internally, an `r ref("bbotk::OptimInstance", "OptimInstance")` is constructed using the acquisition function as an `r ref("bbotk::Objective", "Objective")`. An acquisition function optimizer is then used to solve this optimization problem. Technically, this optimizer is a member of the `r ref("mlr3mbo::AcqOptimizer", "AcqOptimizer")` `R6` class. Construction requires specifying an `r ref("bbotk::Optimizer", "Optimizer")` as well as a `r ref( "bbotk::Terminator", "Terminator")`: ```{r} library(bbotk) acq_optimizer = AcqOptimizer$new(opt("random_search"), terminator = trm("evals")) ``` Syntactic sugar: ```{r} acq_optimizer = acqo(opt("random_search"), terminator = trm("evals")) ``` The optimizer and terminator can be accessed via the `$optimizer` and `$terminator` fields: ```{r} acq_optimizer$optimizer ``` ```{r} acq_optimizer$terminator ``` Internally, the acquisition function optimizer also requires the acquisition function and therefore the `acq_function` argument should be specified during construction. However, lazy initialization is also possible. An `r ref("mlr3mbo::AcqOptimizer", "AcqOptimizer")` has the following hyperparameters: ```{r} acq_optimizer$param_set ``` `catch_errors = TRUE` results in catching any errors that can happen during the acquisition function optimization which allows for, e.g., proposing the next candidate uniformly at random within the `loop_function`. For more details on this mechanism, see the [Safety Nets](#safety-nets) section. `logging_level` specifies the logging level during acquisition function optimization. Often it is useful to only log the progress of the BO loop and therefore `logging_level` is set to `"warn"` by default. For debugging purposes, this should be set to `"info"`. `skip_already_evaluated = TRUE` will result in not proposing candidates for evaluation that were already evaluated in previous iterations. `warmstart = TRUE` results in the best `warmstart_size` points present in the `archive` of the `OptimInstance` to also be evaluated on the acquisition function `OptimInstance` prior to running the actual acquisition function optimization. This is especially useful in the context of using evolutionary algorithms or variants of local search as the acquisition function optimizer (as the current best points should usually be part of the initial population to further optimize local optima). To get an overview of all `Optimizer`s implemented in `r ref_pkg("bbotk")` you can use: ```{r, eval=FALSE} as.data.table(mlr_optimizers) ``` And similarly for `Terminator`s: ```{r, eval=FALSE} as.data.table(mlr_terminators) ``` Again, you can also use custom `Optimizer`s or `Terminator`s by implementing new `R6` classes inheriting from `r ref("bbotk::Optimizer", "Optimizer")` and `r ref("bbotk::Terminator", "Terminator")` respectively. # Putting it Together Having introduced all building blocks we are now ready to put everything together in the form of an `r ref( "mlr3mbo::mlr_optimizers_mbo", "OptimizerMbo")` or `r ref("mlr3mbo::mlr_tuners_mbo", "TunerMbo")`. `r ref( "mlr3mbo::mlr_optimizers_mbo", "OptimizerMbo")` inherits from `r ref("bbotk::Optimizer", "Optimizer")` and requires a `loop_function`, `surrogate`, `acq_function` and `acq_optimizer`. Construction is performed via: ```{r} optimizer = OptimizerMbo$new(bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) ``` or using syntactic sugar: ```{r} optimizer = opt("mbo", loop_function = bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) ``` ```{r} optimizer ``` Additional arguments, i.e., arguments of the `loop_function` can be passed via the `args` argument during construction or the `$args` active binding. Finally, the mechanism how the final result is obtained after the optimization process (i.e., the best point in the case of single-objective and the Pareto set in the case of multi-objective optimization) can be changed via the `result_assigner` argument during construction or the `$result_assigner` active binding. As an example, `r ref("mlr3mbo::mlr_result_assigners_surrogate", "ResultAssignerSurrogate")` will choose the final solution based on the prediction of the surrogate instead of the evaluations logged in the `archive` which is sensible in the case of noisy objective functions. The default, however, is to use `r ref("mlr3mbo::mlr_result_assigners_archive", "ResultAssignerArchive")` which will directly choose the final solution based on the evaluations logged in the `archive`. To get an overview of available result assigners, the following dictionary can be inspected: ```{r} as.data.table(mlr_result_assigners) ``` The dictionary shows the `key`, i.e., ID of the result assigner, a brief description, and how the documentation can be accessed. Construction of result assigners is straightforward: ```{r} result_assigner = ResultAssignerArchive$new() ``` Syntactic sugar: ```{r} result_assigner = ras("archive") ``` Note that important fields of an `OptimizerMbo` such as `$param_classes`, `$packages`, `$properties` are automatically determined based on the choice of the `loop_function`, `surrogate`, `acq_function`, `acq_optimizer`, and `result_assigner`. If arguments such as the `surrogate`, `acq_function`, `acq_optimizer` and `result_assigner` were not fully initialized during construction, e.g., the `surrogate` missing the `archive`, or the `acq_function` missing the `surrogate`, lazy initialization is completed prior to the optimizer being used for optimization. An object of class `OptimizerMbo` can be used to optimize an object of class `OptimInstanceBatchSingleCrit` or `OptimInstanceBatchMultiCrit`. For hyperparameter optimization, `r ref("mlr3mbo::mlr_tuners_mbo", "TunerMbo")` should be used (which simply relies on an `OptimizerMbo` that is constructed internally): ```{r} tuner = TunerMbo$new(bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) mlr3misc::get_private(tuner)[[".optimizer"]] ``` ## The Initial Design `r ref_pkg("mlr3mbo")` offers two different ways for specifying an initial design: 1. One can simply evaluate points on the `OptimInstance` that is to be optimized prior to using an `OptimizerMbo`. In this case, the `loop_function` should skip the construction and evaluation of an initial design. 2. If no points were already evaluated on the `OptimInstance`, the `loop_function` should construct an initial design itself and evaluate it, e.g., `bayesopt_ego` then constructs an initial design of size $4D$ where $D$ is the dimensionality of the search space by sampling points uniformly at random. Functions for creating different initial designs are part of the `r ref_pkg("paradox")` package, e.g.: 1. `r ref("paradox::generate_design_random", "generate_design_random")`: uniformly at random 1. `r ref("paradox::generate_design_grid", "generate_design_grid")`: uniform sized grid 1. `r ref("paradox::generate_design_lhs", "generate_design_lhs")`: Latin hypercube sampling 1. `r ref("paradox::generate_design_sobol", "generate_design_sobol")`: Sobol sequence # Defaults `r ref_pkg("mlr3mbo")` tries to use intelligent defaults for the `loop_function`, `surrogate`, `acq_function`, and `acq_optimizer` within `OptimizerMbo` and `TunerMbo`. For details, see `r ref("mlr3mbo::mbo_defaults", "mbo_defaults")`. # Safety Nets `r ref_pkg("mlr3mbo")` is quite stable in the sense that - if desired - all kinds of errors can be caught and handled appropriately within the `r ref("mlr3mbo::loop_function", "loop_function")`. As an example, let's have a look at the inner workings of `r ref("mlr3mbo::mlr_loop_functions_ego", "bayesopt_ego")`: ```{r, eval = FALSE} repeat { xdt = tryCatch({ . . . acq_function$surrogate$update() acq_function$update() acq_optimizer$optimize() }, mbo_error = function(mbo_error_condition) { lg$info(paste0(class(mbo_error_condition), collapse = " / ")) lg$info("Proposing a randomly sampled point") SamplerUnif$new(domain)$sample(1L)$data }) . . . } ``` In each iteration, a new candidate is chosen based on updating the surrogate and acquisition function, and optimizing the acquisition function. If any error happens during any of these steps, errors are upgraded to errors of class `"mbo_error"` (and `"surrogate_update_error"` for surrogate related errors as well as `"acq_optimizer_error"` for acquisition function optimization related errors). These errors are then caught and a fallback is triggered: Evaluating the next candidate chosen uniformly at random. Note that the same mechanism is actually also used to handle random interleaving. To illustrate this error handling mechanism, consider the following scenario: We try to minimize $f: [-1, 1] \rightarrow \mathbb{R}, x \mapsto x^2$, however, our Gaussian Process fails due to data points being too close to each other: ```{r} set.seed(2906) domain = ps(x = p_dbl(lower = -1, upper = 1)) codomain = ps(y = p_dbl(tags = "minimize")) objective_function = function(xs) { list(y = xs$x ^ 2) } objective = ObjectiveRFun$new( fun = objective_function, domain = domain, codomain = codomain) instance = OptimInstanceBatchSingleCrit$new( objective = objective, terminator = trm("evals", n_evals = 10)) initial_design = data.table(x = rep(1, 4)) instance$eval_batch(initial_design) surrogate = srlrn(default_gp()) acq_function = acqf("ei") acq_optimizer = acqo(opt("random_search", batch_size = 1000), terminator = trm("evals", n_evals = 1000)) optimizer = opt("mbo", loop_function = bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) optimizer$optimize(instance) ``` The log tells us that an error happened and was caught: `"surrogate_update_error / mbo_error / error / condition"`. We also see in the `archive` that the first candidate after the initial design (the fifth point) was not proposed based on optimizing the acquisition function (because the `"acq_ei"` column is NA here): ```{r} instance$archive$data ``` Nevertheless, due to the safety net, the BO loop eventually worked just fine and did not simply throw an error. If we set `catch_errors = FALSE` within the surrogate, we see that the error was indeed caused by the surrogate: ```{r, error = TRUE} instance$archive$clear() instance$eval_batch(initial_design) optimizer$surrogate$param_set$values$catch_errors = FALSE optimizer$optimize(instance) ``` In case of the error belonging to the `acq_optimizer_error` class, it is helpful to increase the logging level of the acquisition function optimizer (e.g., `acq_optimizer$param_set$values$logging_level = "info"`) and also set `acq_optimizer$param_set$values$catch_errors = FALSE`. This allows for straightforward debugging. To make sure that your BO loop behaved as expected, always inspect the log of the optimization process and inspect the `archive` and check whether the acquisition function column is populated as expected. # Writing Your Own Loop Function Writing a custom `loop_function` is straightforward. Any `loop_function` must be an object of the S3 class `r ref("mlr3mbo::loop_function", "loop_function")` (simply a standard R function with some requirements regarding its arguments and attributes). Arguments of the function must include `instance`, `surrogate`, `acq_function`, and `acq_optimizer` and attributes must include `id` (id of the loop function), `label` (brief description), `instance` ("single-crit" and or "multi_crit"), and `man` (link to the manual page). Technically, any `loop_function` therefore looks like the following: ```{r} bayesopt_custom = function(instance, surrogate, acq_function, acq_optimizer) { # typically some assertions # initial design handling # actual loop function } class(bayesopt_custom) = "loop_function" attr(bayesopt_custom, "id") = "bayesopt_custom" attr(bayesopt_custom, "label") = "My custom BO loop" attr(bayesopt_custom, "instance") = "single-crit" attr(bayesopt_custom, "man") = "" # no man page # if you want to add it to the dictionary: mlr_loop_functions$add("bayesopt_custom", bayesopt_custom) ``` ```{r} bayesopt_custom ``` For some inspiration on how to write actual meaningful `loop_function`s, see, e.g., `r ref("mlr3mbo::mlr_loop_functions_ego", "bayesopt_ego")`. # Examples In this final section, some standard examples are provided. ## Single-Objective: 2D Schwefel Function ```{r, eval=FALSE} objective_function = function(xs) { list(y = 418.9829 * 2 - (sum(unlist(xs) * sin(sqrt(abs(unlist(xs))))))) } domain = ps(x1 = p_dbl(lower = -500, upper = 500), x2 = p_dbl(lower = -500, upper = 500)) codomain = ps(y = p_dbl(tags = "minimize")) objective = ObjectiveRFun$new( fun = objective_function, domain = domain, codomain = codomain) instance = OptimInstanceBatchSingleCrit$new( objective = objective, search_space = domain, terminator = trm("evals", n_evals = 60)) # Gaussian Process, EI, DIRECT surrogate = srlrn(default_gp()) acq_function = acqf("ei") acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_DIRECT_L"), terminator = trm("stagnation", threshold = 1e-8)) optimizer = opt("mbo", loop_function = bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) set.seed(2906) optimizer$optimize(instance) ``` ```{r, eval=FALSE} library(ggplot2) ggplot(aes(x = batch_nr, y = cummin(y)), data = instance$archive$data) + geom_point() + geom_step() + labs(x = "Batch Nr.", y = "Best y") + theme_minimal() ``` ```{r, eval=FALSE} xdt = generate_design_grid(instance$search_space, resolution = 101)$data ydt = objective$eval_dt(xdt) ggplot(aes(x = x1, y = x2, z = y), data = cbind(xdt, ydt)) + geom_contour_filled() + geom_point(aes(color = batch_nr), size = 2, data = instance$archive$data) + scale_color_gradient(low = "lightgrey", high = "red") + theme_minimal() ``` ## Multi-Objective: Schaffer Function N. 1 ### ParEGO ```{r, eval=FALSE} objective_function = function(xs) { list(y1 = xs$x^2, y2 = (xs$x - 2)^2) } domain = ps(x = p_dbl(lower = -10, upper = 10)) codomain = ps(y1 = p_dbl(tags = "minimize"), y2 = p_dbl(tags = "minimize")) objective = ObjectiveRFun$new( fun = objective_function, domain = domain, codomain = codomain) instance = OptimInstanceBatchMultiCrit$new( objective = objective, search_space = domain, terminator = trm("evals", n_evals = 30)) # Gaussian Process, EI, DIRECT surrogate = srlrn(default_gp()) acq_function = acqf("ei") acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_DIRECT_L"), terminator = trm("stagnation", threshold = 1e-8)) optimizer = opt("mbo", loop_function = bayesopt_parego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) set.seed(2906) optimizer$optimize(instance) ``` ```{r, eval=FALSE} ggplot(aes(x = y1, y = y2), data = instance$archive$best()) + geom_point() + theme_minimal() ``` ```{r, eval=FALSE} library(emoa) library(mlr3misc) library(data.table) anytime_hypervolume = map_dtr(unique(instance$archive$data$batch_nr), function(bnr) { pareto = instance$archive$best(batch = 1:bnr)[, instance$archive$cols_y, with = FALSE] dhv = dominated_hypervolume(t(pareto), ref = t(t(c(100, 144)))) data.table(batch_nr = bnr, dhv = dhv) }) ggplot(aes(x = batch_nr, y = dhv), data = anytime_hypervolume[batch_nr > 1]) + geom_point() + geom_step(direction = "vh") + labs(x = "Batch Nr.", y = "Dominated Hypervolume") + theme_minimal() ``` ### SMS-EGO ```{r, eval=FALSE} objective_function = function(xs) { list(y1 = xs$x^2, y2 = (xs$x - 2)^2) } domain = ps(x = p_dbl(lower = -10, upper = 10)) codomain = ps(y1 = p_dbl(tags = "minimize"), y2 = p_dbl(tags = "minimize")) objective = ObjectiveRFun$new( fun = objective_function, domain = domain, codomain = codomain) instance = OptimInstanceBatchMultiCrit$new( objective = objective, search_space = domain, terminator = trm("evals", n_evals = 30)) # Gaussian Processes, SMS-EGO, DIRECT learner_y1 = default_gp() learner_y2 = learner_y1$clone(deep = TRUE) surrogate = srlrn(list(learner_y1, learner_y2)) acq_function = acqf("smsego") acq_optimizer = acqo(opt("nloptr", algorithm = "NLOPT_GN_DIRECT_L"), terminator = trm("stagnation", threshold = 1e-8)) optimizer = opt("mbo", loop_function = bayesopt_smsego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) set.seed(2906) optimizer$optimize(instance) ``` ```{r, eval=FALSE} ggplot(aes(x = y1, y = y2), data = instance$archive$best()) + geom_point() + theme_minimal() ``` ```{r, eval=FALSE} anytime_hypervolume = map_dtr(unique(instance$archive$data$batch_nr), function(bnr) { pareto = instance$archive$best(batch = 1:bnr)[, instance$archive$cols_y, with = FALSE] dhv = dominated_hypervolume(t(pareto), ref = t(t(c(100, 144)))) data.table(batch_nr = bnr, dhv = dhv) }) ggplot(aes(x = batch_nr, y = dhv), data = anytime_hypervolume[batch_nr > 1]) + geom_point() + geom_step(direction = "vh") + labs(x = "Batch Nr.", y = "Dominated Hypervolume") + theme_minimal() ``` ## Single-Objective HPO ```{r, eval=FALSE} library(mlr3) task = tsk("wine") learner = lrn("classif.rpart", cp = to_tune(lower = 1e-4, upper = 1, logscale = TRUE), maxdepth = to_tune(lower = 1, upper = 10), minbucket = to_tune(lower = 1, upper = 10), minsplit = to_tune(lower = 1, upper = 10)) resampling = rsmp("cv", folds = 3) measure = msr("classif.acc") instance = TuningInstanceBatchSingleCrit$new( task = task, learner = learner, resampling = resampling, measure = measure, terminator = trm("evals", n_evals = 30)) # Gaussian Process, EI, FocusSearch surrogate = srlrn(default_gp(noisy = TRUE)) acq_function = acqf("ei") acq_optimizer = acqo(opt("focus_search", n_points = 100L, maxit = 9), terminator = trm("evals", n_evals = 3000)) tuner = tnr("mbo", loop_function = bayesopt_ego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) set.seed(2906) tuner$optimize(instance) instance$result ``` ## Multi-Objective HPO ```{r, eval=FALSE} task = tsk("wine") learner = lrn("classif.rpart", cp = to_tune(lower = 1e-4, upper = 1, logscale = TRUE), maxdepth = to_tune(lower = 1, upper = 10), minbucket = to_tune(lower = 1, upper = 10), minsplit = to_tune(lower = 1, upper = 10)) resampling = rsmp("cv", folds = 3) measures = msrs(c("classif.acc", "selected_features")) instance = TuningInstanceBatchMultiCrit$new( task = task, learner = learner, resampling = resampling, measures = measures, terminator = trm("evals", n_evals = 30), store_models = TRUE) # required due to selected features # Gaussian Process, EI, FocusSearch surrogate = srlrn(default_gp(noisy = TRUE)) acq_function = acqf("ei") acq_optimizer = acqo(opt("focus_search", n_points = 100L, maxit = 9), terminator = trm("evals", n_evals = 3000)) tuner = tnr("mbo", loop_function = bayesopt_parego, surrogate = surrogate, acq_function = acq_function, acq_optimizer = acq_optimizer) set.seed(2906) tuner$optimize(instance) instance$result ```