Coverage for mlos_bench/mlos_bench/optimizers/base

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""Base class for an interface between the benchmarking framework and mlos_core

6optimizers.

7"""

9import logging

10from abc import ABCMeta, abstractmethod

11from collections.abc import Sequence

12from contextlib import AbstractContextManager as ContextManager

13from types import TracebackType

14from typing import Literal

16from ConfigSpace import ConfigurationSpace

18from mlos_bench.config.schemas import ConfigSchema

19from mlos_bench.environments.status import Status

20from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace

21from mlos_bench.services.base_service import Service

22from mlos_bench.tunables.tunable import TunableValue

23from mlos_bench.tunables.tunable_groups import TunableGroups

24from mlos_bench.util import strtobool

26_LOG = logging.getLogger(__name__)

29class Optimizer(ContextManager, metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes

30 """An abstract interface between the benchmarking framework and mlos_core

31 optimizers.

32 """

34 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json

35 BASE_SUPPORTED_CONFIG_PROPS = {

36 "optimization_targets",

37 "max_suggestions",

38 "seed",

39 "start_with_defaults",

40 }

42 def __init__(

43 self,

44 tunables: TunableGroups,

45 config: dict,

46 global_config: dict | None = None,

47 service: Service | None = None,

48 ):

49 """

50 Create a new optimizer for the given configuration space defined by the

51 tunables.

53 Parameters

54 ----------

55 tunables : TunableGroups

56 The tunables to optimize.

57 config : dict

58 Free-format key/value pairs of configuration parameters to pass to the optimizer.

59 global_config : dict | None

60 service : Service | None

61 """

62 _LOG.info("Create optimizer for: %s", tunables)

63 _LOG.debug("Optimizer config: %s", config)

64 self._validate_json_config(config)

65 self._config = config.copy()

66 self._global_config = global_config or {}

67 self._tunables = tunables

68 self._config_space: ConfigurationSpace | None = None

69 self._service = service

70 self._seed = int(config.get("seed", 42))

71 self._in_context = False

73 experiment_id = self._global_config.get("experiment_id")

74 self.experiment_id = str(experiment_id).strip() if experiment_id else None

76 self._iter = 0

77 # If False, use the optimizer to suggest the initial configuration;

78 # if True (default), use the already initialized values for the first iteration.

79 self._start_with_defaults: bool = bool(

80 strtobool(str(self._config.pop("start_with_defaults", True)))

81 )

82 self._max_suggestions = int(self._config.pop("max_suggestions", 100))

84 opt_targets: dict[str, str] = self._config.pop("optimization_targets", {"score": "min"})

85 self._opt_targets: dict[str, Literal[1, -1]] = {}

86 for opt_target, opt_dir in opt_targets.items():

87 if opt_dir == "min":

88 self._opt_targets[opt_target] = 1

89 elif opt_dir == "max":

90 self._opt_targets[opt_target] = -1

91 else:

92 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}")

94 def _validate_json_config(self, config: dict) -> None:

95 """Reconstructs a basic json config that this class might have been instantiated

96 from in order to validate configs provided outside the file loading

97 mechanism.

98 """

99 json_config: dict = {

100 "class": self.__class__.__module__ + "." + self.__class__.__name__,

101 }

102 if config:

103 json_config["config"] = config

104 ConfigSchema.OPTIMIZER.validate(json_config)

105

106 def __repr__(self) -> str:

107 opt_targets = ",".join(

108 f"""{opt_target}:{({1: "min", -1: "max"}[opt_dir])}"""

109 for (opt_target, opt_dir) in self._opt_targets.items()

110 )

111 return f"{self.name}({opt_targets},config={self._config})"

112

113 def __enter__(self) -> "Optimizer":

114 """Enter the optimizer's context."""

115 _LOG.debug("Optimizer START :: %s", self)

116 assert not self._in_context

117 self._in_context = True

118 return self

119

120 def __exit__(

121 self,

122 ex_type: type[BaseException] | None,

123 ex_val: BaseException | None,

124 ex_tb: TracebackType | None,

125 ) -> Literal[False]:

126 """Exit the context of the optimizer."""

127 if ex_val is None:

128 _LOG.debug("Optimizer END :: %s", self)

129 else:

130 assert ex_type and ex_val

131 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))

132 assert self._in_context

133 self._in_context = False

134 return False # Do not suppress exceptions

135

136 @property

137 def current_iteration(self) -> int:

138 """

139 The current number of iterations (suggestions) registered.

140

141 Note: this may or may not be the same as the number of configurations.

142 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.

143 """

144 return self._iter

145

146 @property

147 def max_suggestions(self) -> int:

148 """

149 The maximum number of iterations (suggestions) to run.

150

151 Note: this may or may not be the same as the number of configurations.

152 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.

153 """

154 return self._max_suggestions

155

156 @property

157 def seed(self) -> int:

158 """The random seed for the optimizer."""

159 return self._seed

160

161 @property

162 def start_with_defaults(self) -> bool:

163 """

164 Return True if the optimizer should start with the default values.

165

166 Note: This parameter is mutable and will be reset to False after the

167 defaults are first suggested.

168 """

169 return self._start_with_defaults

170

171 @property

172 def tunable_params(self) -> TunableGroups:

173 """

174 Get the tunable parameters of the optimizer as TunableGroups.

175

176 Returns

177 -------

178 tunables : TunableGroups

179 A collection of covariant groups of tunable parameters.

180 """

181 return self._tunables

182

183 @property

184 def config_space(self) -> ConfigurationSpace:

185 """

186 Get the tunable parameters of the optimizer as a ConfigurationSpace.

187

188 Returns

189 -------

190 ConfigSpace.ConfigurationSpace

191 The ConfigSpace representation of the tunable parameters.

192 """

193 if self._config_space is None:

194 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)

195 _LOG.debug("ConfigSpace: %s", self._config_space)

196 return self._config_space

197

198 @property

199 def name(self) -> str:

200 """

201 The name of the optimizer.

202

203 We save this information in mlos_bench storage to track the source of each

204 configuration.

205 """

206 return self.__class__.__name__

207

208 @property

209 def targets(self) -> dict[str, Literal["min", "max"]]:

210 """Returns a dictionary of optimization targets and their direction."""

211 return {

212 opt_target: "min" if opt_dir == 1 else "max"

213 for (opt_target, opt_dir) in self._opt_targets.items()

214 }

215

216 @property

217 def supports_preload(self) -> bool:

218 """Return True if the optimizer supports pre-loading the data from previous

219 experiments.

220 """

221 return True

222

223 @abstractmethod

224 def bulk_register(

225 self,

226 configs: Sequence[dict],

227 scores: Sequence[dict[str, TunableValue] | None],

228 status: Sequence[Status] | None = None,

229 ) -> bool:

230 """

231 Pre-load the optimizer with the bulk data from previous experiments.

232

233 Parameters

234 ----------

235 configs : Sequence[dict]

236 Records of tunable values from other experiments.

237 scores : Sequence[Optional[dict[str, TunableValue]]]

238 Benchmark results from experiments that correspond to `configs`.

239 status : Optional[Sequence[Status]]

240 Status of the experiments that correspond to `configs`.

241

242 Returns

243 -------

244 is_not_empty : bool

245 True if there is data to register, false otherwise.

246 """

247 _LOG.info(

248 "Update the optimizer with: %d configs, %d scores, %d status values",

249 len(configs or []),

250 len(scores or []),

251 len(status or []),

252 )

253 if len(configs or []) != len(scores or []):

254 raise ValueError("Numbers of configs and scores do not match.")

255 if status is not None and len(configs or []) != len(status or []):

256 raise ValueError("Numbers of configs and status values do not match.")

257 has_data = bool(configs and scores)

258 if has_data and self._start_with_defaults:

259 _LOG.info("Prior data exists - do *NOT* use the default initialization.")

260 self._start_with_defaults = False

261 return has_data

262

263 def suggest(self) -> TunableGroups:

264 """

265 Generate the next suggestion. Base class' implementation increments the

266 iteration count and returns the current values of the tunables.

267

268 Returns

269 -------

270 tunables : TunableGroups

271 The next configuration to benchmark.

272 These are the same tunables we pass to the constructor,

273 but with the values set to the next suggestion.

274 """

275 self._iter += 1

276 _LOG.debug("Iteration %d :: Suggest", self._iter)

277 return self._tunables.copy()

278

279 @abstractmethod

280 def register(

281 self,

282 tunables: TunableGroups,

283 status: Status,

284 score: dict[str, TunableValue] | None = None,

285 ) -> dict[str, float] | None:

286 """

287 Register the observation for the given configuration.

288

289 Parameters

290 ----------

291 tunables : TunableGroups

292 The configuration that has been benchmarked.

293 Usually it's the same config that the `.suggest()` method returned.

294 status : Status

295 Final status of the experiment (e.g., SUCCEEDED or FAILED).

296 score : Optional[dict[str, TunableValue]]

297 A dict with the final benchmark results.

298 None if the experiment was not successful.

299

300 Returns

301 -------

302 value : Optional[dict[str, float]]

303 Benchmark scores extracted (and possibly transformed)

304 from the dataframe that's being MINIMIZED.

305 """

306 _LOG.info(

307 "Iteration %d :: Register: %s = %s score: %s",

308 self._iter,

309 tunables,

310 status,

311 score,

312 )

313 if status.is_succeeded() == (score is None): # XOR

314 raise ValueError("Status and score must be consistent.")

315 return self._get_scores(status, score)

316

317 def _get_scores(

318 self,

319 status: Status,

320 scores: dict[str, TunableValue] | dict[str, float] | None,

321 ) -> dict[str, float] | None:

322 """

323 Extract a scalar benchmark score from the dataframe. Change the sign if we are

324 maximizing.

325

326 Parameters

327 ----------

328 status : Status

329 Final status of the experiment (e.g., SUCCEEDED or FAILED).

330 scores : Optional[dict[str, TunableValue]]

331 A dict with the final benchmark results.

332 None if the experiment was not successful.

333

334 Returns

335 -------

336 score : Optional[dict[str, float]]

337 An optional dict of benchmark scores to be used as targets for MINIMIZATION.

338 """

339 if not status.is_completed():

340 return None

341

342 if not status.is_succeeded():

343 assert scores is None

344 # TODO: Be more flexible with values used for failed trials (not just +inf).

345 # Issue: https://github.com/microsoft/MLOS/issues/523

346 return {opt_target: float("inf") for opt_target in self._opt_targets}

347

348 assert scores is not None

349 target_metrics: dict[str, float] = {}

350 for opt_target, opt_dir in self._opt_targets.items():

351 val = scores[opt_target]

352 assert val is not None

353 target_metrics[opt_target] = float(val) * opt_dir

354

355 return target_metrics

356

357 def not_converged(self) -> bool:

358 """

359 Return True if not converged, False otherwise.

360

361 Base implementation just checks the iteration count.

362 """

363 return self._iter < self._max_suggestions

364

365 @abstractmethod

366 def get_best_observation(

367 self,

368 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]:

369 """

370 Get the best observation so far.

371

372 Returns

373 -------

374 (value, tunables) : tuple[dict[str, float], TunableGroups]

375 The best value and the corresponding configuration.

376 (None, None) if no successful observation has been registered yet.

377 """

Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%

128 statements