Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%
128 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base class for an interface between the benchmarking framework and mlos_core
6optimizers.
7"""
9import logging
10from abc import ABCMeta, abstractmethod
11from collections.abc import Sequence
12from contextlib import AbstractContextManager as ContextManager
13from types import TracebackType
14from typing import Literal
16from ConfigSpace import ConfigurationSpace
18from mlos_bench.config.schemas import ConfigSchema
19from mlos_bench.environments.status import Status
20from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
21from mlos_bench.services.base_service import Service
22from mlos_bench.tunables.tunable import TunableValue
23from mlos_bench.tunables.tunable_groups import TunableGroups
24from mlos_bench.util import strtobool
26_LOG = logging.getLogger(__name__)
29class Optimizer(ContextManager, metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes
30 """An abstract interface between the benchmarking framework and mlos_core
31 optimizers.
32 """
34 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json
35 BASE_SUPPORTED_CONFIG_PROPS = {
36 "optimization_targets",
37 "max_suggestions",
38 "seed",
39 "start_with_defaults",
40 }
42 def __init__(
43 self,
44 tunables: TunableGroups,
45 config: dict,
46 global_config: dict | None = None,
47 service: Service | None = None,
48 ):
49 """
50 Create a new optimizer for the given configuration space defined by the
51 tunables.
53 Parameters
54 ----------
55 tunables : TunableGroups
56 The tunables to optimize.
57 config : dict
58 Free-format key/value pairs of configuration parameters to pass to the optimizer.
59 global_config : dict | None
60 service : Service | None
61 """
62 _LOG.info("Create optimizer for: %s", tunables)
63 _LOG.debug("Optimizer config: %s", config)
64 self._validate_json_config(config)
65 self._config = config.copy()
66 self._global_config = global_config or {}
67 self._tunables = tunables
68 self._config_space: ConfigurationSpace | None = None
69 self._service = service
70 self._seed = int(config.get("seed", 42))
71 self._in_context = False
73 experiment_id = self._global_config.get("experiment_id")
74 self.experiment_id = str(experiment_id).strip() if experiment_id else None
76 self._iter = 0
77 # If False, use the optimizer to suggest the initial configuration;
78 # if True (default), use the already initialized values for the first iteration.
79 self._start_with_defaults: bool = bool(
80 strtobool(str(self._config.pop("start_with_defaults", True)))
81 )
82 self._max_suggestions = int(self._config.pop("max_suggestions", 100))
84 opt_targets: dict[str, str] = self._config.pop("optimization_targets", {"score": "min"})
85 self._opt_targets: dict[str, Literal[1, -1]] = {}
86 for opt_target, opt_dir in opt_targets.items():
87 if opt_dir == "min":
88 self._opt_targets[opt_target] = 1
89 elif opt_dir == "max":
90 self._opt_targets[opt_target] = -1
91 else:
92 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}")
94 def _validate_json_config(self, config: dict) -> None:
95 """Reconstructs a basic json config that this class might have been instantiated
96 from in order to validate configs provided outside the file loading
97 mechanism.
98 """
99 json_config: dict = {
100 "class": self.__class__.__module__ + "." + self.__class__.__name__,
101 }
102 if config:
103 json_config["config"] = config
104 ConfigSchema.OPTIMIZER.validate(json_config)
106 def __repr__(self) -> str:
107 opt_targets = ",".join(
108 f"""{opt_target}:{({1: "min", -1: "max"}[opt_dir])}"""
109 for (opt_target, opt_dir) in self._opt_targets.items()
110 )
111 return f"{self.name}({opt_targets},config={self._config})"
113 def __enter__(self) -> "Optimizer":
114 """Enter the optimizer's context."""
115 _LOG.debug("Optimizer START :: %s", self)
116 assert not self._in_context
117 self._in_context = True
118 return self
120 def __exit__(
121 self,
122 ex_type: type[BaseException] | None,
123 ex_val: BaseException | None,
124 ex_tb: TracebackType | None,
125 ) -> Literal[False]:
126 """Exit the context of the optimizer."""
127 if ex_val is None:
128 _LOG.debug("Optimizer END :: %s", self)
129 else:
130 assert ex_type and ex_val
131 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))
132 assert self._in_context
133 self._in_context = False
134 return False # Do not suppress exceptions
136 @property
137 def current_iteration(self) -> int:
138 """
139 The current number of iterations (suggestions) registered.
141 Note: this may or may not be the same as the number of configurations.
142 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
143 """
144 return self._iter
146 @property
147 def max_suggestions(self) -> int:
148 """
149 The maximum number of iterations (suggestions) to run.
151 Note: this may or may not be the same as the number of configurations.
152 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
153 """
154 return self._max_suggestions
156 @property
157 def seed(self) -> int:
158 """The random seed for the optimizer."""
159 return self._seed
161 @property
162 def start_with_defaults(self) -> bool:
163 """
164 Return True if the optimizer should start with the default values.
166 Note: This parameter is mutable and will be reset to False after the
167 defaults are first suggested.
168 """
169 return self._start_with_defaults
171 @property
172 def tunable_params(self) -> TunableGroups:
173 """
174 Get the tunable parameters of the optimizer as TunableGroups.
176 Returns
177 -------
178 tunables : TunableGroups
179 A collection of covariant groups of tunable parameters.
180 """
181 return self._tunables
183 @property
184 def config_space(self) -> ConfigurationSpace:
185 """
186 Get the tunable parameters of the optimizer as a ConfigurationSpace.
188 Returns
189 -------
190 ConfigSpace.ConfigurationSpace
191 The ConfigSpace representation of the tunable parameters.
192 """
193 if self._config_space is None:
194 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)
195 _LOG.debug("ConfigSpace: %s", self._config_space)
196 return self._config_space
198 @property
199 def name(self) -> str:
200 """
201 The name of the optimizer.
203 We save this information in mlos_bench storage to track the source of each
204 configuration.
205 """
206 return self.__class__.__name__
208 @property
209 def targets(self) -> dict[str, Literal["min", "max"]]:
210 """Returns a dictionary of optimization targets and their direction."""
211 return {
212 opt_target: "min" if opt_dir == 1 else "max"
213 for (opt_target, opt_dir) in self._opt_targets.items()
214 }
216 @property
217 def supports_preload(self) -> bool:
218 """Return True if the optimizer supports pre-loading the data from previous
219 experiments.
220 """
221 return True
223 @abstractmethod
224 def bulk_register(
225 self,
226 configs: Sequence[dict],
227 scores: Sequence[dict[str, TunableValue] | None],
228 status: Sequence[Status] | None = None,
229 ) -> bool:
230 """
231 Pre-load the optimizer with the bulk data from previous experiments.
233 Parameters
234 ----------
235 configs : Sequence[dict]
236 Records of tunable values from other experiments.
237 scores : Sequence[Optional[dict[str, TunableValue]]]
238 Benchmark results from experiments that correspond to `configs`.
239 status : Optional[Sequence[Status]]
240 Status of the experiments that correspond to `configs`.
242 Returns
243 -------
244 is_not_empty : bool
245 True if there is data to register, false otherwise.
246 """
247 _LOG.info(
248 "Update the optimizer with: %d configs, %d scores, %d status values",
249 len(configs or []),
250 len(scores or []),
251 len(status or []),
252 )
253 if len(configs or []) != len(scores or []):
254 raise ValueError("Numbers of configs and scores do not match.")
255 if status is not None and len(configs or []) != len(status or []):
256 raise ValueError("Numbers of configs and status values do not match.")
257 has_data = bool(configs and scores)
258 if has_data and self._start_with_defaults:
259 _LOG.info("Prior data exists - do *NOT* use the default initialization.")
260 self._start_with_defaults = False
261 return has_data
263 def suggest(self) -> TunableGroups:
264 """
265 Generate the next suggestion. Base class' implementation increments the
266 iteration count and returns the current values of the tunables.
268 Returns
269 -------
270 tunables : TunableGroups
271 The next configuration to benchmark.
272 These are the same tunables we pass to the constructor,
273 but with the values set to the next suggestion.
274 """
275 self._iter += 1
276 _LOG.debug("Iteration %d :: Suggest", self._iter)
277 return self._tunables.copy()
279 @abstractmethod
280 def register(
281 self,
282 tunables: TunableGroups,
283 status: Status,
284 score: dict[str, TunableValue] | None = None,
285 ) -> dict[str, float] | None:
286 """
287 Register the observation for the given configuration.
289 Parameters
290 ----------
291 tunables : TunableGroups
292 The configuration that has been benchmarked.
293 Usually it's the same config that the `.suggest()` method returned.
294 status : Status
295 Final status of the experiment (e.g., SUCCEEDED or FAILED).
296 score : Optional[dict[str, TunableValue]]
297 A dict with the final benchmark results.
298 None if the experiment was not successful.
300 Returns
301 -------
302 value : Optional[dict[str, float]]
303 Benchmark scores extracted (and possibly transformed)
304 from the dataframe that's being MINIMIZED.
305 """
306 _LOG.info(
307 "Iteration %d :: Register: %s = %s score: %s",
308 self._iter,
309 tunables,
310 status,
311 score,
312 )
313 if status.is_succeeded() == (score is None): # XOR
314 raise ValueError("Status and score must be consistent.")
315 return self._get_scores(status, score)
317 def _get_scores(
318 self,
319 status: Status,
320 scores: dict[str, TunableValue] | dict[str, float] | None,
321 ) -> dict[str, float] | None:
322 """
323 Extract a scalar benchmark score from the dataframe. Change the sign if we are
324 maximizing.
326 Parameters
327 ----------
328 status : Status
329 Final status of the experiment (e.g., SUCCEEDED or FAILED).
330 scores : Optional[dict[str, TunableValue]]
331 A dict with the final benchmark results.
332 None if the experiment was not successful.
334 Returns
335 -------
336 score : Optional[dict[str, float]]
337 An optional dict of benchmark scores to be used as targets for MINIMIZATION.
338 """
339 if not status.is_completed():
340 return None
342 if not status.is_succeeded():
343 assert scores is None
344 # TODO: Be more flexible with values used for failed trials (not just +inf).
345 # Issue: https://github.com/microsoft/MLOS/issues/523
346 return {opt_target: float("inf") for opt_target in self._opt_targets}
348 assert scores is not None
349 target_metrics: dict[str, float] = {}
350 for opt_target, opt_dir in self._opt_targets.items():
351 val = scores[opt_target]
352 assert val is not None
353 target_metrics[opt_target] = float(val) * opt_dir
355 return target_metrics
357 def not_converged(self) -> bool:
358 """
359 Return True if not converged, False otherwise.
361 Base implementation just checks the iteration count.
362 """
363 return self._iter < self._max_suggestions
365 @abstractmethod
366 def get_best_observation(
367 self,
368 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]:
369 """
370 Get the best observation so far.
372 Returns
373 -------
374 (value, tunables) : tuple[dict[str, float], TunableGroups]
375 The best value and the corresponding configuration.
376 (None, None) if no successful observation has been registered yet.
377 """