Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%

128 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base class for an interface between the benchmarking framework and mlos_core 

6optimizers. 

7""" 

8 

9import logging 

10from abc import ABCMeta, abstractmethod 

11from collections.abc import Sequence 

12from contextlib import AbstractContextManager as ContextManager 

13from types import TracebackType 

14from typing import Literal 

15 

16from ConfigSpace import ConfigurationSpace 

17 

18from mlos_bench.config.schemas import ConfigSchema 

19from mlos_bench.environments.status import Status 

20from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace 

21from mlos_bench.services.base_service import Service 

22from mlos_bench.tunables.tunable import TunableValue 

23from mlos_bench.tunables.tunable_groups import TunableGroups 

24from mlos_bench.util import strtobool 

25 

26_LOG = logging.getLogger(__name__) 

27 

28 

29class Optimizer(ContextManager, metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes 

30 """An abstract interface between the benchmarking framework and mlos_core 

31 optimizers. 

32 """ 

33 

34 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json 

35 BASE_SUPPORTED_CONFIG_PROPS = { 

36 "optimization_targets", 

37 "max_suggestions", 

38 "seed", 

39 "start_with_defaults", 

40 } 

41 

42 def __init__( 

43 self, 

44 tunables: TunableGroups, 

45 config: dict, 

46 global_config: dict | None = None, 

47 service: Service | None = None, 

48 ): 

49 """ 

50 Create a new optimizer for the given configuration space defined by the 

51 tunables. 

52 

53 Parameters 

54 ---------- 

55 tunables : TunableGroups 

56 The tunables to optimize. 

57 config : dict 

58 Free-format key/value pairs of configuration parameters to pass to the optimizer. 

59 global_config : dict | None 

60 service : Service | None 

61 """ 

62 _LOG.info("Create optimizer for: %s", tunables) 

63 _LOG.debug("Optimizer config: %s", config) 

64 self._validate_json_config(config) 

65 self._config = config.copy() 

66 self._global_config = global_config or {} 

67 self._tunables = tunables 

68 self._config_space: ConfigurationSpace | None = None 

69 self._service = service 

70 self._seed = int(config.get("seed", 42)) 

71 self._in_context = False 

72 

73 experiment_id = self._global_config.get("experiment_id") 

74 self.experiment_id = str(experiment_id).strip() if experiment_id else None 

75 

76 self._iter = 0 

77 # If False, use the optimizer to suggest the initial configuration; 

78 # if True (default), use the already initialized values for the first iteration. 

79 self._start_with_defaults: bool = bool( 

80 strtobool(str(self._config.pop("start_with_defaults", True))) 

81 ) 

82 self._max_suggestions = int(self._config.pop("max_suggestions", 100)) 

83 

84 opt_targets: dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) 

85 self._opt_targets: dict[str, Literal[1, -1]] = {} 

86 for opt_target, opt_dir in opt_targets.items(): 

87 if opt_dir == "min": 

88 self._opt_targets[opt_target] = 1 

89 elif opt_dir == "max": 

90 self._opt_targets[opt_target] = -1 

91 else: 

92 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") 

93 

94 def _validate_json_config(self, config: dict) -> None: 

95 """Reconstructs a basic json config that this class might have been instantiated 

96 from in order to validate configs provided outside the file loading 

97 mechanism. 

98 """ 

99 json_config: dict = { 

100 "class": self.__class__.__module__ + "." + self.__class__.__name__, 

101 } 

102 if config: 

103 json_config["config"] = config 

104 ConfigSchema.OPTIMIZER.validate(json_config) 

105 

106 def __repr__(self) -> str: 

107 opt_targets = ",".join( 

108 f"""{opt_target}:{({1: "min", -1: "max"}[opt_dir])}""" 

109 for (opt_target, opt_dir) in self._opt_targets.items() 

110 ) 

111 return f"{self.name}({opt_targets},config={self._config})" 

112 

113 def __enter__(self) -> "Optimizer": 

114 """Enter the optimizer's context.""" 

115 _LOG.debug("Optimizer START :: %s", self) 

116 assert not self._in_context 

117 self._in_context = True 

118 return self 

119 

120 def __exit__( 

121 self, 

122 ex_type: type[BaseException] | None, 

123 ex_val: BaseException | None, 

124 ex_tb: TracebackType | None, 

125 ) -> Literal[False]: 

126 """Exit the context of the optimizer.""" 

127 if ex_val is None: 

128 _LOG.debug("Optimizer END :: %s", self) 

129 else: 

130 assert ex_type and ex_val 

131 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) 

132 assert self._in_context 

133 self._in_context = False 

134 return False # Do not suppress exceptions 

135 

136 @property 

137 def current_iteration(self) -> int: 

138 """ 

139 The current number of iterations (suggestions) registered. 

140 

141 Note: this may or may not be the same as the number of configurations. 

142 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

143 """ 

144 return self._iter 

145 

146 @property 

147 def max_suggestions(self) -> int: 

148 """ 

149 The maximum number of iterations (suggestions) to run. 

150 

151 Note: this may or may not be the same as the number of configurations. 

152 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

153 """ 

154 return self._max_suggestions 

155 

156 @property 

157 def seed(self) -> int: 

158 """The random seed for the optimizer.""" 

159 return self._seed 

160 

161 @property 

162 def start_with_defaults(self) -> bool: 

163 """ 

164 Return True if the optimizer should start with the default values. 

165 

166 Note: This parameter is mutable and will be reset to False after the 

167 defaults are first suggested. 

168 """ 

169 return self._start_with_defaults 

170 

171 @property 

172 def tunable_params(self) -> TunableGroups: 

173 """ 

174 Get the tunable parameters of the optimizer as TunableGroups. 

175 

176 Returns 

177 ------- 

178 tunables : TunableGroups 

179 A collection of covariant groups of tunable parameters. 

180 """ 

181 return self._tunables 

182 

183 @property 

184 def config_space(self) -> ConfigurationSpace: 

185 """ 

186 Get the tunable parameters of the optimizer as a ConfigurationSpace. 

187 

188 Returns 

189 ------- 

190 ConfigSpace.ConfigurationSpace 

191 The ConfigSpace representation of the tunable parameters. 

192 """ 

193 if self._config_space is None: 

194 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed) 

195 _LOG.debug("ConfigSpace: %s", self._config_space) 

196 return self._config_space 

197 

198 @property 

199 def name(self) -> str: 

200 """ 

201 The name of the optimizer. 

202 

203 We save this information in mlos_bench storage to track the source of each 

204 configuration. 

205 """ 

206 return self.__class__.__name__ 

207 

208 @property 

209 def targets(self) -> dict[str, Literal["min", "max"]]: 

210 """Returns a dictionary of optimization targets and their direction.""" 

211 return { 

212 opt_target: "min" if opt_dir == 1 else "max" 

213 for (opt_target, opt_dir) in self._opt_targets.items() 

214 } 

215 

216 @property 

217 def supports_preload(self) -> bool: 

218 """Return True if the optimizer supports pre-loading the data from previous 

219 experiments. 

220 """ 

221 return True 

222 

223 @abstractmethod 

224 def bulk_register( 

225 self, 

226 configs: Sequence[dict], 

227 scores: Sequence[dict[str, TunableValue] | None], 

228 status: Sequence[Status] | None = None, 

229 ) -> bool: 

230 """ 

231 Pre-load the optimizer with the bulk data from previous experiments. 

232 

233 Parameters 

234 ---------- 

235 configs : Sequence[dict] 

236 Records of tunable values from other experiments. 

237 scores : Sequence[Optional[dict[str, TunableValue]]] 

238 Benchmark results from experiments that correspond to `configs`. 

239 status : Optional[Sequence[Status]] 

240 Status of the experiments that correspond to `configs`. 

241 

242 Returns 

243 ------- 

244 is_not_empty : bool 

245 True if there is data to register, false otherwise. 

246 """ 

247 _LOG.info( 

248 "Update the optimizer with: %d configs, %d scores, %d status values", 

249 len(configs or []), 

250 len(scores or []), 

251 len(status or []), 

252 ) 

253 if len(configs or []) != len(scores or []): 

254 raise ValueError("Numbers of configs and scores do not match.") 

255 if status is not None and len(configs or []) != len(status or []): 

256 raise ValueError("Numbers of configs and status values do not match.") 

257 has_data = bool(configs and scores) 

258 if has_data and self._start_with_defaults: 

259 _LOG.info("Prior data exists - do *NOT* use the default initialization.") 

260 self._start_with_defaults = False 

261 return has_data 

262 

263 def suggest(self) -> TunableGroups: 

264 """ 

265 Generate the next suggestion. Base class' implementation increments the 

266 iteration count and returns the current values of the tunables. 

267 

268 Returns 

269 ------- 

270 tunables : TunableGroups 

271 The next configuration to benchmark. 

272 These are the same tunables we pass to the constructor, 

273 but with the values set to the next suggestion. 

274 """ 

275 self._iter += 1 

276 _LOG.debug("Iteration %d :: Suggest", self._iter) 

277 return self._tunables.copy() 

278 

279 @abstractmethod 

280 def register( 

281 self, 

282 tunables: TunableGroups, 

283 status: Status, 

284 score: dict[str, TunableValue] | None = None, 

285 ) -> dict[str, float] | None: 

286 """ 

287 Register the observation for the given configuration. 

288 

289 Parameters 

290 ---------- 

291 tunables : TunableGroups 

292 The configuration that has been benchmarked. 

293 Usually it's the same config that the `.suggest()` method returned. 

294 status : Status 

295 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

296 score : Optional[dict[str, TunableValue]] 

297 A dict with the final benchmark results. 

298 None if the experiment was not successful. 

299 

300 Returns 

301 ------- 

302 value : Optional[dict[str, float]] 

303 Benchmark scores extracted (and possibly transformed) 

304 from the dataframe that's being MINIMIZED. 

305 """ 

306 _LOG.info( 

307 "Iteration %d :: Register: %s = %s score: %s", 

308 self._iter, 

309 tunables, 

310 status, 

311 score, 

312 ) 

313 if status.is_succeeded() == (score is None): # XOR 

314 raise ValueError("Status and score must be consistent.") 

315 return self._get_scores(status, score) 

316 

317 def _get_scores( 

318 self, 

319 status: Status, 

320 scores: dict[str, TunableValue] | dict[str, float] | None, 

321 ) -> dict[str, float] | None: 

322 """ 

323 Extract a scalar benchmark score from the dataframe. Change the sign if we are 

324 maximizing. 

325 

326 Parameters 

327 ---------- 

328 status : Status 

329 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

330 scores : Optional[dict[str, TunableValue]] 

331 A dict with the final benchmark results. 

332 None if the experiment was not successful. 

333 

334 Returns 

335 ------- 

336 score : Optional[dict[str, float]] 

337 An optional dict of benchmark scores to be used as targets for MINIMIZATION. 

338 """ 

339 if not status.is_completed(): 

340 return None 

341 

342 if not status.is_succeeded(): 

343 assert scores is None 

344 # TODO: Be more flexible with values used for failed trials (not just +inf). 

345 # Issue: https://github.com/microsoft/MLOS/issues/523 

346 return {opt_target: float("inf") for opt_target in self._opt_targets} 

347 

348 assert scores is not None 

349 target_metrics: dict[str, float] = {} 

350 for opt_target, opt_dir in self._opt_targets.items(): 

351 val = scores[opt_target] 

352 assert val is not None 

353 target_metrics[opt_target] = float(val) * opt_dir 

354 

355 return target_metrics 

356 

357 def not_converged(self) -> bool: 

358 """ 

359 Return True if not converged, False otherwise. 

360 

361 Base implementation just checks the iteration count. 

362 """ 

363 return self._iter < self._max_suggestions 

364 

365 @abstractmethod 

366 def get_best_observation( 

367 self, 

368 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]: 

369 """ 

370 Get the best observation so far. 

371 

372 Returns 

373 ------- 

374 (value, tunables) : tuple[dict[str, float], TunableGroups] 

375 The best value and the corresponding configuration. 

376 (None, None) if no successful observation has been registered yet. 

377 """