Coverage for mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py: 87%
103 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Contains the wrapper class for SMAC Bayesian optimizers.
8See Also: <https://automl.github.io/SMAC3/main/index.html>
9"""
11from logging import warning
12from pathlib import Path
13from tempfile import TemporaryDirectory
14from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
15from warnings import warn
17import ConfigSpace
18import numpy.typing as npt
19import pandas as pd
21from mlos_core.optimizers.bayesian_optimizers.bayesian_optimizer import (
22 BaseBayesianOptimizer,
23)
24from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
25from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter
26from mlos_core.util import drop_nulls
29class SmacOptimizer(BaseBayesianOptimizer):
30 """Wrapper class for SMAC based Bayesian optimization."""
32 def __init__(
33 self,
34 *, # pylint: disable=too-many-locals,too-many-arguments
35 parameter_space: ConfigSpace.ConfigurationSpace,
36 optimization_targets: List[str],
37 objective_weights: Optional[List[float]] = None,
38 space_adapter: Optional[BaseSpaceAdapter] = None,
39 seed: Optional[int] = 0,
40 run_name: Optional[str] = None,
41 output_directory: Optional[str] = None,
42 max_trials: int = 100,
43 n_random_init: Optional[int] = None,
44 max_ratio: Optional[float] = None,
45 use_default_config: bool = False,
46 n_random_probability: float = 0.1,
47 ):
48 """
49 Instantiate a new SMAC optimizer wrapper.
51 Parameters
52 ----------
53 parameter_space : ConfigSpace.ConfigurationSpace
54 The parameter space to optimize.
56 optimization_targets : List[str]
57 The names of the optimization targets to minimize.
59 objective_weights : Optional[List[float]]
60 Optional list of weights of optimization targets.
62 space_adapter : BaseSpaceAdapter
63 The space adapter class to employ for parameter space transformations.
65 seed : Optional[int]
66 By default SMAC uses a known seed (0) to keep results reproducible.
67 However, if a `None` seed is explicitly provided, we let a random seed
68 be produced by SMAC.
70 run_name : Optional[str]
71 Name of this run. This is used to easily distinguish across different runs.
72 If set to `None` (default), SMAC will generate a hash from metadata.
74 output_directory : Optional[str]
75 The directory where SMAC output will saved. If set to `None` (default),
76 a temporary dir will be used.
78 max_trials : int
79 Maximum number of trials (i.e., function evaluations) to be run. Defaults to 100.
80 Note that modifying this value directly affects the value of
81 `n_random_init`, if latter is set to `None`.
83 n_random_init : Optional[int]
84 Number of points evaluated at start to bootstrap the optimizer.
85 Default depends on max_trials and number of parameters and max_ratio.
86 Note: it can sometimes be useful to set this to 1 when pre-warming the
87 optimizer from historical data.
88 See Also: mlos_bench.optimizer.bulk_register
90 max_ratio : Optional[int]
91 Maximum ratio of max_trials to be random configs to be evaluated
92 at start to bootstrap the optimizer.
93 Useful if you want to explicitly control the number of random
94 configs evaluated at start.
96 use_default_config: bool
97 Whether to use the default config for the first trial after random initialization.
99 n_random_probability: float
100 Probability of choosing to evaluate a random configuration during optimization.
101 Defaults to `0.1`. Setting this to a higher value favors exploration over exploitation.
102 """
103 super().__init__(
104 parameter_space=parameter_space,
105 optimization_targets=optimization_targets,
106 objective_weights=objective_weights,
107 space_adapter=space_adapter,
108 )
110 # Declare at the top because we need it in __del__/cleanup()
111 self._temp_output_directory: Optional[TemporaryDirectory] = None
113 # pylint: disable=import-outside-toplevel
114 from smac import HyperparameterOptimizationFacade as Optimizer_Smac
115 from smac import Scenario
116 from smac.intensifier.abstract_intensifier import AbstractIntensifier
117 from smac.main.config_selector import ConfigSelector
118 from smac.random_design.probability_design import ProbabilityRandomDesign
119 from smac.runhistory import TrialInfo
121 # Store for TrialInfo instances returned by .ask()
122 self.trial_info_map: Dict[ConfigSpace.Configuration, TrialInfo] = {}
124 # The default when not specified is to use a known seed (0) to keep results reproducible.
125 # However, if a `None` seed is explicitly provided, we let a random seed be
126 # produced by SMAC.
127 # https://automl.github.io/SMAC3/main/api/smac.scenario.html#smac.scenario.Scenario
128 seed = -1 if seed is None else seed
130 # Create temporary directory for SMAC output (if none provided)
131 if output_directory is None:
132 # pylint: disable=consider-using-with
133 try:
134 # Argument added in Python 3.10
135 self._temp_output_directory = TemporaryDirectory(ignore_cleanup_errors=True)
136 except TypeError:
137 self._temp_output_directory = TemporaryDirectory()
138 output_directory = self._temp_output_directory.name
140 if n_random_init is not None:
141 assert isinstance(n_random_init, int) and n_random_init >= 0
142 if n_random_init == max_trials and use_default_config:
143 # Increase max budgeted trials to account for use_default_config.
144 max_trials += 1
146 scenario: Scenario = Scenario(
147 self.optimizer_parameter_space,
148 objectives=self._optimization_targets,
149 name=run_name,
150 output_directory=Path(output_directory),
151 deterministic=True,
152 use_default_config=use_default_config,
153 n_trials=max_trials,
154 seed=seed or -1, # if -1, SMAC will generate a random seed internally
155 n_workers=1, # Use a single thread for evaluating trials
156 )
157 intensifier: AbstractIntensifier = Optimizer_Smac.get_intensifier(
158 scenario,
159 max_config_calls=1,
160 )
161 config_selector: ConfigSelector = Optimizer_Smac.get_config_selector(
162 scenario,
163 retrain_after=1,
164 )
166 # TODO: When bulk registering prior configs to rewarm the optimizer,
167 # there is a way to inform SMAC's initial design that we have
168 # additional_configs and can set n_configs == 0.
169 # Additionally, we may want to consider encoding those values into the
170 # runhistory when prewarming the optimizer so that the initial design
171 # doesn't reperform random init.
172 # See Also: #488
174 initial_design_args: Dict[str, Union[list, int, float, Scenario]] = {
175 "scenario": scenario,
176 # Workaround a bug in SMAC that sets a default arg to a mutable
177 # value that can cause issues when multiple optimizers are
178 # instantiated with the use_default_config option within the same
179 # process that use different ConfigSpaces so that the second
180 # receives the default config from both as an additional config.
181 "additional_configs": [],
182 }
183 if n_random_init is not None:
184 initial_design_args["n_configs"] = n_random_init
185 if n_random_init > 0.25 * max_trials and max_ratio is None:
186 warning(
187 "Number of random initial configs (%d) is "
188 + "greater than 25%% of max_trials (%d). "
189 + "Consider setting max_ratio to avoid SMAC overriding n_random_init.",
190 n_random_init,
191 max_trials,
192 )
193 if max_ratio is not None:
194 assert isinstance(max_ratio, float) and 0.0 <= max_ratio <= 1.0
195 initial_design_args["max_ratio"] = max_ratio
197 # Use the default InitialDesign from SMAC.
198 # (currently SBOL instead of LatinHypercube due to better uniformity
199 # for initial sampling which results in lower overall samples required)
200 initial_design = Optimizer_Smac.get_initial_design(
201 **initial_design_args, # type: ignore[arg-type]
202 )
203 # initial_design = LatinHypercubeInitialDesign(
204 # **initial_design_args, # type: ignore[arg-type]
205 # )
207 # Workaround a bug in SMAC that doesn't pass the seed to the random
208 # design when generated a random_design for itself via the
209 # get_random_design static method when random_design is None.
210 assert isinstance(n_random_probability, float) and n_random_probability >= 0
211 random_design = ProbabilityRandomDesign(
212 probability=n_random_probability,
213 seed=scenario.seed,
214 )
216 self.base_optimizer = Optimizer_Smac(
217 scenario,
218 SmacOptimizer._dummy_target_func,
219 initial_design=initial_design,
220 intensifier=intensifier,
221 random_design=random_design,
222 config_selector=config_selector,
223 multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm(
224 scenario,
225 objective_weights=self._objective_weights,
226 ),
227 overwrite=True,
228 logging_level=False, # Use the existing logger
229 )
231 def __del__(self) -> None:
232 # Best-effort attempt to clean up, in case the user forgets to call .cleanup()
233 self.cleanup()
235 @property
236 def n_random_init(self) -> int:
237 """
238 Gets the number of random samples to use to initialize the optimizer's search
239 space sampling.
241 Note: This may not be equal to the value passed to the initializer, due to
242 logic present in the SMAC.
243 See Also: max_ratio
245 Returns
246 -------
247 int
248 The number of random samples used to initialize the optimizer's search space sampling.
249 """
250 # pylint: disable=protected-access
251 return self.base_optimizer._initial_design._n_configs
253 @staticmethod
254 def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None:
255 """
256 Dummy target function for SMAC optimizer.
258 Since we only use the ask-and-tell interface, this is never called.
260 Parameters
261 ----------
262 config : ConfigSpace.Configuration
263 Configuration to evaluate.
265 seed : int
266 Random seed to use for the target function. Not actually used.
267 """
268 # NOTE: Providing a target function when using the ask-and-tell interface is
269 # an imperfection of the API -- this is planned to be fixed in some future
270 # release: https://github.com/automl/SMAC3/issues/946
271 raise RuntimeError("This function should never be called.")
273 def _register(
274 self,
275 *,
276 configs: pd.DataFrame,
277 scores: pd.DataFrame,
278 context: Optional[pd.DataFrame] = None,
279 metadata: Optional[pd.DataFrame] = None,
280 ) -> None:
281 """
282 Registers the given configs and scores.
284 Parameters
285 ----------
286 configs : pd.DataFrame
287 Dataframe of configs / parameters. The columns are parameter names and
288 the rows are the configs.
290 scores : pd.DataFrame
291 Scores from running the configs. The index is the same as the index of
292 the configs.
294 context : pd.DataFrame
295 Not Yet Implemented.
297 metadata: pd.DataFrame
298 Not Yet Implemented.
299 """
300 from smac.runhistory import ( # pylint: disable=import-outside-toplevel
301 StatusType,
302 TrialInfo,
303 TrialValue,
304 )
306 if context is not None:
307 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
309 # Register each trial (one-by-one)
310 for config, (_i, score) in zip(
311 self._to_configspace_configs(configs=configs), scores.iterrows()
312 ):
313 # Retrieve previously generated TrialInfo (returned by .ask()) or create
314 # new TrialInfo instance
315 info: TrialInfo = self.trial_info_map.get(
316 config,
317 TrialInfo(config=config, seed=self.base_optimizer.scenario.seed),
318 )
319 value = TrialValue(cost=list(score.astype(float)), time=0.0, status=StatusType.SUCCESS)
320 self.base_optimizer.tell(info, value, save=False)
322 # Save optimizer once we register all configs
323 self.base_optimizer.optimizer.save()
325 def _suggest(
326 self,
327 *,
328 context: Optional[pd.DataFrame] = None,
329 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
330 """
331 Suggests a new configuration.
333 Parameters
334 ----------
335 context : pd.DataFrame
336 Not Yet Implemented.
338 Returns
339 -------
340 configuration : pd.DataFrame
341 Pandas dataframe with a single row. Column names are the parameter names.
343 metadata : Optional[pd.DataFrame]
344 Not yet implemented.
345 """
346 if TYPE_CHECKING:
347 # pylint: disable=import-outside-toplevel,unused-import
348 from smac.runhistory import TrialInfo
350 if context is not None:
351 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
353 trial: TrialInfo = self.base_optimizer.ask()
354 trial.config.check_valid_configuration()
355 ConfigSpace.Configuration(
356 self.optimizer_parameter_space,
357 values=trial.config,
358 ).check_valid_configuration()
359 assert trial.config.config_space == self.optimizer_parameter_space
360 self.trial_info_map[trial.config] = trial
361 config_df = pd.DataFrame(
362 [trial.config], columns=list(self.optimizer_parameter_space.keys())
363 )
364 return config_df, None
366 def register_pending(
367 self,
368 *,
369 configs: pd.DataFrame,
370 context: Optional[pd.DataFrame] = None,
371 metadata: Optional[pd.DataFrame] = None,
372 ) -> None:
373 raise NotImplementedError()
375 def surrogate_predict(
376 self,
377 *,
378 configs: pd.DataFrame,
379 context: Optional[pd.DataFrame] = None,
380 ) -> npt.NDArray:
381 # pylint: disable=import-outside-toplevel
382 from smac.utils.configspace import convert_configurations_to_array
384 if context is not None:
385 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
386 if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter):
387 raise NotImplementedError("Space adapter not supported for surrogate_predict.")
389 # pylint: disable=protected-access
390 if len(self._observations) <= self.base_optimizer._initial_design._n_configs:
391 raise RuntimeError(
392 "Surrogate model can make predictions *only* after "
393 "all initial points have been evaluated "
394 f"{len(self._observations)} <= {self.base_optimizer._initial_design._n_configs}"
395 )
396 if self.base_optimizer._config_selector._model is None:
397 raise RuntimeError("Surrogate model is not yet trained")
399 config_array: npt.NDArray = convert_configurations_to_array(
400 self._to_configspace_configs(configs=configs)
401 )
402 mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array)
403 return mean_predictions.reshape(
404 -1,
405 )
407 def acquisition_function(
408 self,
409 *,
410 configs: pd.DataFrame,
411 context: Optional[pd.DataFrame] = None,
412 ) -> npt.NDArray:
413 if context is not None:
414 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
415 if self._space_adapter:
416 raise NotImplementedError()
418 # pylint: disable=protected-access
419 if self.base_optimizer._config_selector._acquisition_function is None:
420 raise RuntimeError("Acquisition function is not yet initialized")
422 cs_configs: list = self._to_configspace_configs(configs=configs)
423 return self.base_optimizer._config_selector._acquisition_function(cs_configs).reshape(
424 -1,
425 )
427 def cleanup(self) -> None:
428 if hasattr(self, "_temp_output_directory") and self._temp_output_directory is not None:
429 self._temp_output_directory.cleanup()
430 self._temp_output_directory = None
432 def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace.Configuration]:
433 """
434 Convert a dataframe of configs to a list of ConfigSpace configs.
436 Parameters
437 ----------
438 configs : pd.DataFrame
439 Dataframe of configs / parameters. The columns are parameter names and
440 the rows are the configs.
442 Returns
443 -------
444 configs : list
445 List of ConfigSpace configs.
446 """
447 return [
448 ConfigSpace.Configuration(
449 self.optimizer_parameter_space,
450 # Remove None values for inactive parameters
451 values=drop_nulls(config.to_dict()),
452 allow_inactive_with_values=False,
453 )
454 for (_, config) in configs.astype("O").iterrows()
455 ]