Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%
127 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base class for an interface between the benchmarking framework and mlos_core
6optimizers.
7"""
9import logging
10from abc import ABCMeta, abstractmethod
11from distutils.util import strtobool # pylint: disable=deprecated-module
12from types import TracebackType
13from typing import Dict, Optional, Sequence, Tuple, Type, Union
15from ConfigSpace import ConfigurationSpace
16from typing_extensions import Literal
18from mlos_bench.config.schemas import ConfigSchema
19from mlos_bench.environments.status import Status
20from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
21from mlos_bench.services.base_service import Service
22from mlos_bench.tunables.tunable import TunableValue
23from mlos_bench.tunables.tunable_groups import TunableGroups
25_LOG = logging.getLogger(__name__)
28class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes
29 """An abstract interface between the benchmarking framework and mlos_core
30 optimizers.
31 """
33 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json
34 BASE_SUPPORTED_CONFIG_PROPS = {
35 "optimization_targets",
36 "max_suggestions",
37 "seed",
38 "start_with_defaults",
39 }
41 def __init__(
42 self,
43 tunables: TunableGroups,
44 config: dict,
45 global_config: Optional[dict] = None,
46 service: Optional[Service] = None,
47 ):
48 """
49 Create a new optimizer for the given configuration space defined by the
50 tunables.
52 Parameters
53 ----------
54 tunables : TunableGroups
55 The tunables to optimize.
56 config : dict
57 Free-format key/value pairs of configuration parameters to pass to the optimizer.
58 global_config : Optional[dict]
59 service : Optional[Service]
60 """
61 _LOG.info("Create optimizer for: %s", tunables)
62 _LOG.debug("Optimizer config: %s", config)
63 self._validate_json_config(config)
64 self._config = config.copy()
65 self._global_config = global_config or {}
66 self._tunables = tunables
67 self._config_space: Optional[ConfigurationSpace] = None
68 self._service = service
69 self._seed = int(config.get("seed", 42))
70 self._in_context = False
72 experiment_id = self._global_config.get("experiment_id")
73 self.experiment_id = str(experiment_id).strip() if experiment_id else None
75 self._iter = 0
76 # If False, use the optimizer to suggest the initial configuration;
77 # if True (default), use the already initialized values for the first iteration.
78 self._start_with_defaults: bool = bool(
79 strtobool(str(self._config.pop("start_with_defaults", True)))
80 )
81 self._max_suggestions = int(self._config.pop("max_suggestions", 100))
83 opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"})
84 self._opt_targets: Dict[str, Literal[1, -1]] = {}
85 for opt_target, opt_dir in opt_targets.items():
86 if opt_dir == "min":
87 self._opt_targets[opt_target] = 1
88 elif opt_dir == "max":
89 self._opt_targets[opt_target] = -1
90 else:
91 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}")
93 def _validate_json_config(self, config: dict) -> None:
94 """Reconstructs a basic json config that this class might have been instantiated
95 from in order to validate configs provided outside the file loading
96 mechanism.
97 """
98 json_config: dict = {
99 "class": self.__class__.__module__ + "." + self.__class__.__name__,
100 }
101 if config:
102 json_config["config"] = config
103 ConfigSchema.OPTIMIZER.validate(json_config)
105 def __repr__(self) -> str:
106 opt_targets = ",".join(
107 f"{opt_target}:{({1: 'min', -1: 'max'}[opt_dir])}"
108 for (opt_target, opt_dir) in self._opt_targets.items()
109 )
110 return f"{self.name}({opt_targets},config={self._config})"
112 def __enter__(self) -> "Optimizer":
113 """Enter the optimizer's context."""
114 _LOG.debug("Optimizer START :: %s", self)
115 assert not self._in_context
116 self._in_context = True
117 return self
119 def __exit__(
120 self,
121 ex_type: Optional[Type[BaseException]],
122 ex_val: Optional[BaseException],
123 ex_tb: Optional[TracebackType],
124 ) -> Literal[False]:
125 """Exit the context of the optimizer."""
126 if ex_val is None:
127 _LOG.debug("Optimizer END :: %s", self)
128 else:
129 assert ex_type and ex_val
130 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))
131 assert self._in_context
132 self._in_context = False
133 return False # Do not suppress exceptions
135 @property
136 def current_iteration(self) -> int:
137 """
138 The current number of iterations (suggestions) registered.
140 Note: this may or may not be the same as the number of configurations.
141 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
142 """
143 return self._iter
145 @property
146 def max_suggestions(self) -> int:
147 """
148 The maximum number of iterations (suggestions) to run.
150 Note: this may or may not be the same as the number of configurations.
151 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
152 """
153 return self._max_suggestions
155 @property
156 def seed(self) -> int:
157 """The random seed for the optimizer."""
158 return self._seed
160 @property
161 def start_with_defaults(self) -> bool:
162 """
163 Return True if the optimizer should start with the default values.
165 Note: This parameter is mutable and will be reset to False after the
166 defaults are first suggested.
167 """
168 return self._start_with_defaults
170 @property
171 def tunable_params(self) -> TunableGroups:
172 """
173 Get the tunable parameters of the optimizer as TunableGroups.
175 Returns
176 -------
177 tunables : TunableGroups
178 A collection of covariant groups of tunable parameters.
179 """
180 return self._tunables
182 @property
183 def config_space(self) -> ConfigurationSpace:
184 """
185 Get the tunable parameters of the optimizer as a ConfigurationSpace.
187 Returns
188 -------
189 ConfigurationSpace
190 The ConfigSpace representation of the tunable parameters.
191 """
192 if self._config_space is None:
193 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)
194 _LOG.debug("ConfigSpace: %s", self._config_space)
195 return self._config_space
197 @property
198 def name(self) -> str:
199 """
200 The name of the optimizer.
202 We save this information in mlos_bench storage to track the source of each
203 configuration.
204 """
205 return self.__class__.__name__
207 @property
208 def targets(self) -> Dict[str, Literal["min", "max"]]:
209 """A dictionary of {target: direction} of optimization targets."""
210 return {
211 opt_target: "min" if opt_dir == 1 else "max"
212 for (opt_target, opt_dir) in self._opt_targets.items()
213 }
215 @property
216 def supports_preload(self) -> bool:
217 """Return True if the optimizer supports pre-loading the data from previous
218 experiments.
219 """
220 return True
222 @abstractmethod
223 def bulk_register(
224 self,
225 configs: Sequence[dict],
226 scores: Sequence[Optional[Dict[str, TunableValue]]],
227 status: Optional[Sequence[Status]] = None,
228 ) -> bool:
229 """
230 Pre-load the optimizer with the bulk data from previous experiments.
232 Parameters
233 ----------
234 configs : Sequence[dict]
235 Records of tunable values from other experiments.
236 scores : Sequence[Optional[Dict[str, TunableValue]]]
237 Benchmark results from experiments that correspond to `configs`.
238 status : Optional[Sequence[Status]]
239 Status of the experiments that correspond to `configs`.
241 Returns
242 -------
243 is_not_empty : bool
244 True if there is data to register, false otherwise.
245 """
246 _LOG.info(
247 "Update the optimizer with: %d configs, %d scores, %d status values",
248 len(configs or []),
249 len(scores or []),
250 len(status or []),
251 )
252 if len(configs or []) != len(scores or []):
253 raise ValueError("Numbers of configs and scores do not match.")
254 if status is not None and len(configs or []) != len(status or []):
255 raise ValueError("Numbers of configs and status values do not match.")
256 has_data = bool(configs and scores)
257 if has_data and self._start_with_defaults:
258 _LOG.info("Prior data exists - do *NOT* use the default initialization.")
259 self._start_with_defaults = False
260 return has_data
262 def suggest(self) -> TunableGroups:
263 """
264 Generate the next suggestion. Base class' implementation increments the
265 iteration count and returns the current values of the tunables.
267 Returns
268 -------
269 tunables : TunableGroups
270 The next configuration to benchmark.
271 These are the same tunables we pass to the constructor,
272 but with the values set to the next suggestion.
273 """
274 self._iter += 1
275 _LOG.debug("Iteration %d :: Suggest", self._iter)
276 return self._tunables.copy()
278 @abstractmethod
279 def register(
280 self,
281 tunables: TunableGroups,
282 status: Status,
283 score: Optional[Dict[str, TunableValue]] = None,
284 ) -> Optional[Dict[str, float]]:
285 """
286 Register the observation for the given configuration.
288 Parameters
289 ----------
290 tunables : TunableGroups
291 The configuration that has been benchmarked.
292 Usually it's the same config that the `.suggest()` method returned.
293 status : Status
294 Final status of the experiment (e.g., SUCCEEDED or FAILED).
295 score : Optional[Dict[str, TunableValue]]
296 A dict with the final benchmark results.
297 None if the experiment was not successful.
299 Returns
300 -------
301 value : Optional[Dict[str, float]]
302 Benchmark scores extracted (and possibly transformed)
303 from the dataframe that's being MINIMIZED.
304 """
305 _LOG.info(
306 "Iteration %d :: Register: %s = %s score: %s",
307 self._iter,
308 tunables,
309 status,
310 score,
311 )
312 if status.is_succeeded() == (score is None): # XOR
313 raise ValueError("Status and score must be consistent.")
314 return self._get_scores(status, score)
316 def _get_scores(
317 self,
318 status: Status,
319 scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]],
320 ) -> Optional[Dict[str, float]]:
321 """
322 Extract a scalar benchmark score from the dataframe. Change the sign if we are
323 maximizing.
325 Parameters
326 ----------
327 status : Status
328 Final status of the experiment (e.g., SUCCEEDED or FAILED).
329 scores : Optional[Dict[str, TunableValue]]
330 A dict with the final benchmark results.
331 None if the experiment was not successful.
333 Returns
334 -------
335 score : Optional[Dict[str, float]]
336 An optional dict of benchmark scores to be used as targets for MINIMIZATION.
337 """
338 if not status.is_completed():
339 return None
341 if not status.is_succeeded():
342 assert scores is None
343 # TODO: Be more flexible with values used for failed trials (not just +inf).
344 # Issue: https://github.com/microsoft/MLOS/issues/523
345 return {opt_target: float("inf") for opt_target in self._opt_targets}
347 assert scores is not None
348 target_metrics: Dict[str, float] = {}
349 for opt_target, opt_dir in self._opt_targets.items():
350 val = scores[opt_target]
351 assert val is not None
352 target_metrics[opt_target] = float(val) * opt_dir
354 return target_metrics
356 def not_converged(self) -> bool:
357 """
358 Return True if not converged, False otherwise.
360 Base implementation just checks the iteration count.
361 """
362 return self._iter < self._max_suggestions
364 @abstractmethod
365 def get_best_observation(
366 self,
367 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
368 """
369 Get the best observation so far.
371 Returns
372 -------
373 (value, tunables) : Tuple[Dict[str, float], TunableGroups]
374 The best value and the corresponding configuration.
375 (None, None) if no successful observation has been registered yet.
376 """