Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%

127 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base class for an interface between the benchmarking framework and mlos_core 

6optimizers. 

7""" 

8 

9import logging 

10from abc import ABCMeta, abstractmethod 

11from distutils.util import strtobool # pylint: disable=deprecated-module 

12from types import TracebackType 

13from typing import Dict, Optional, Sequence, Tuple, Type, Union 

14 

15from ConfigSpace import ConfigurationSpace 

16from typing_extensions import Literal 

17 

18from mlos_bench.config.schemas import ConfigSchema 

19from mlos_bench.environments.status import Status 

20from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace 

21from mlos_bench.services.base_service import Service 

22from mlos_bench.tunables.tunable import TunableValue 

23from mlos_bench.tunables.tunable_groups import TunableGroups 

24 

25_LOG = logging.getLogger(__name__) 

26 

27 

28class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes 

29 """An abstract interface between the benchmarking framework and mlos_core 

30 optimizers. 

31 """ 

32 

33 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json 

34 BASE_SUPPORTED_CONFIG_PROPS = { 

35 "optimization_targets", 

36 "max_suggestions", 

37 "seed", 

38 "start_with_defaults", 

39 } 

40 

41 def __init__( 

42 self, 

43 tunables: TunableGroups, 

44 config: dict, 

45 global_config: Optional[dict] = None, 

46 service: Optional[Service] = None, 

47 ): 

48 """ 

49 Create a new optimizer for the given configuration space defined by the 

50 tunables. 

51 

52 Parameters 

53 ---------- 

54 tunables : TunableGroups 

55 The tunables to optimize. 

56 config : dict 

57 Free-format key/value pairs of configuration parameters to pass to the optimizer. 

58 global_config : Optional[dict] 

59 service : Optional[Service] 

60 """ 

61 _LOG.info("Create optimizer for: %s", tunables) 

62 _LOG.debug("Optimizer config: %s", config) 

63 self._validate_json_config(config) 

64 self._config = config.copy() 

65 self._global_config = global_config or {} 

66 self._tunables = tunables 

67 self._config_space: Optional[ConfigurationSpace] = None 

68 self._service = service 

69 self._seed = int(config.get("seed", 42)) 

70 self._in_context = False 

71 

72 experiment_id = self._global_config.get("experiment_id") 

73 self.experiment_id = str(experiment_id).strip() if experiment_id else None 

74 

75 self._iter = 0 

76 # If False, use the optimizer to suggest the initial configuration; 

77 # if True (default), use the already initialized values for the first iteration. 

78 self._start_with_defaults: bool = bool( 

79 strtobool(str(self._config.pop("start_with_defaults", True))) 

80 ) 

81 self._max_suggestions = int(self._config.pop("max_suggestions", 100)) 

82 

83 opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) 

84 self._opt_targets: Dict[str, Literal[1, -1]] = {} 

85 for opt_target, opt_dir in opt_targets.items(): 

86 if opt_dir == "min": 

87 self._opt_targets[opt_target] = 1 

88 elif opt_dir == "max": 

89 self._opt_targets[opt_target] = -1 

90 else: 

91 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") 

92 

93 def _validate_json_config(self, config: dict) -> None: 

94 """Reconstructs a basic json config that this class might have been instantiated 

95 from in order to validate configs provided outside the file loading 

96 mechanism. 

97 """ 

98 json_config: dict = { 

99 "class": self.__class__.__module__ + "." + self.__class__.__name__, 

100 } 

101 if config: 

102 json_config["config"] = config 

103 ConfigSchema.OPTIMIZER.validate(json_config) 

104 

105 def __repr__(self) -> str: 

106 opt_targets = ",".join( 

107 f"{opt_target}:{({1: 'min', -1: 'max'}[opt_dir])}" 

108 for (opt_target, opt_dir) in self._opt_targets.items() 

109 ) 

110 return f"{self.name}({opt_targets},config={self._config})" 

111 

112 def __enter__(self) -> "Optimizer": 

113 """Enter the optimizer's context.""" 

114 _LOG.debug("Optimizer START :: %s", self) 

115 assert not self._in_context 

116 self._in_context = True 

117 return self 

118 

119 def __exit__( 

120 self, 

121 ex_type: Optional[Type[BaseException]], 

122 ex_val: Optional[BaseException], 

123 ex_tb: Optional[TracebackType], 

124 ) -> Literal[False]: 

125 """Exit the context of the optimizer.""" 

126 if ex_val is None: 

127 _LOG.debug("Optimizer END :: %s", self) 

128 else: 

129 assert ex_type and ex_val 

130 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) 

131 assert self._in_context 

132 self._in_context = False 

133 return False # Do not suppress exceptions 

134 

135 @property 

136 def current_iteration(self) -> int: 

137 """ 

138 The current number of iterations (suggestions) registered. 

139 

140 Note: this may or may not be the same as the number of configurations. 

141 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

142 """ 

143 return self._iter 

144 

145 @property 

146 def max_suggestions(self) -> int: 

147 """ 

148 The maximum number of iterations (suggestions) to run. 

149 

150 Note: this may or may not be the same as the number of configurations. 

151 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

152 """ 

153 return self._max_suggestions 

154 

155 @property 

156 def seed(self) -> int: 

157 """The random seed for the optimizer.""" 

158 return self._seed 

159 

160 @property 

161 def start_with_defaults(self) -> bool: 

162 """ 

163 Return True if the optimizer should start with the default values. 

164 

165 Note: This parameter is mutable and will be reset to False after the 

166 defaults are first suggested. 

167 """ 

168 return self._start_with_defaults 

169 

170 @property 

171 def tunable_params(self) -> TunableGroups: 

172 """ 

173 Get the tunable parameters of the optimizer as TunableGroups. 

174 

175 Returns 

176 ------- 

177 tunables : TunableGroups 

178 A collection of covariant groups of tunable parameters. 

179 """ 

180 return self._tunables 

181 

182 @property 

183 def config_space(self) -> ConfigurationSpace: 

184 """ 

185 Get the tunable parameters of the optimizer as a ConfigurationSpace. 

186 

187 Returns 

188 ------- 

189 ConfigurationSpace 

190 The ConfigSpace representation of the tunable parameters. 

191 """ 

192 if self._config_space is None: 

193 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed) 

194 _LOG.debug("ConfigSpace: %s", self._config_space) 

195 return self._config_space 

196 

197 @property 

198 def name(self) -> str: 

199 """ 

200 The name of the optimizer. 

201 

202 We save this information in mlos_bench storage to track the source of each 

203 configuration. 

204 """ 

205 return self.__class__.__name__ 

206 

207 @property 

208 def targets(self) -> Dict[str, Literal["min", "max"]]: 

209 """A dictionary of {target: direction} of optimization targets.""" 

210 return { 

211 opt_target: "min" if opt_dir == 1 else "max" 

212 for (opt_target, opt_dir) in self._opt_targets.items() 

213 } 

214 

215 @property 

216 def supports_preload(self) -> bool: 

217 """Return True if the optimizer supports pre-loading the data from previous 

218 experiments. 

219 """ 

220 return True 

221 

222 @abstractmethod 

223 def bulk_register( 

224 self, 

225 configs: Sequence[dict], 

226 scores: Sequence[Optional[Dict[str, TunableValue]]], 

227 status: Optional[Sequence[Status]] = None, 

228 ) -> bool: 

229 """ 

230 Pre-load the optimizer with the bulk data from previous experiments. 

231 

232 Parameters 

233 ---------- 

234 configs : Sequence[dict] 

235 Records of tunable values from other experiments. 

236 scores : Sequence[Optional[Dict[str, TunableValue]]] 

237 Benchmark results from experiments that correspond to `configs`. 

238 status : Optional[Sequence[Status]] 

239 Status of the experiments that correspond to `configs`. 

240 

241 Returns 

242 ------- 

243 is_not_empty : bool 

244 True if there is data to register, false otherwise. 

245 """ 

246 _LOG.info( 

247 "Update the optimizer with: %d configs, %d scores, %d status values", 

248 len(configs or []), 

249 len(scores or []), 

250 len(status or []), 

251 ) 

252 if len(configs or []) != len(scores or []): 

253 raise ValueError("Numbers of configs and scores do not match.") 

254 if status is not None and len(configs or []) != len(status or []): 

255 raise ValueError("Numbers of configs and status values do not match.") 

256 has_data = bool(configs and scores) 

257 if has_data and self._start_with_defaults: 

258 _LOG.info("Prior data exists - do *NOT* use the default initialization.") 

259 self._start_with_defaults = False 

260 return has_data 

261 

262 def suggest(self) -> TunableGroups: 

263 """ 

264 Generate the next suggestion. Base class' implementation increments the 

265 iteration count and returns the current values of the tunables. 

266 

267 Returns 

268 ------- 

269 tunables : TunableGroups 

270 The next configuration to benchmark. 

271 These are the same tunables we pass to the constructor, 

272 but with the values set to the next suggestion. 

273 """ 

274 self._iter += 1 

275 _LOG.debug("Iteration %d :: Suggest", self._iter) 

276 return self._tunables.copy() 

277 

278 @abstractmethod 

279 def register( 

280 self, 

281 tunables: TunableGroups, 

282 status: Status, 

283 score: Optional[Dict[str, TunableValue]] = None, 

284 ) -> Optional[Dict[str, float]]: 

285 """ 

286 Register the observation for the given configuration. 

287 

288 Parameters 

289 ---------- 

290 tunables : TunableGroups 

291 The configuration that has been benchmarked. 

292 Usually it's the same config that the `.suggest()` method returned. 

293 status : Status 

294 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

295 score : Optional[Dict[str, TunableValue]] 

296 A dict with the final benchmark results. 

297 None if the experiment was not successful. 

298 

299 Returns 

300 ------- 

301 value : Optional[Dict[str, float]] 

302 Benchmark scores extracted (and possibly transformed) 

303 from the dataframe that's being MINIMIZED. 

304 """ 

305 _LOG.info( 

306 "Iteration %d :: Register: %s = %s score: %s", 

307 self._iter, 

308 tunables, 

309 status, 

310 score, 

311 ) 

312 if status.is_succeeded() == (score is None): # XOR 

313 raise ValueError("Status and score must be consistent.") 

314 return self._get_scores(status, score) 

315 

316 def _get_scores( 

317 self, 

318 status: Status, 

319 scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]], 

320 ) -> Optional[Dict[str, float]]: 

321 """ 

322 Extract a scalar benchmark score from the dataframe. Change the sign if we are 

323 maximizing. 

324 

325 Parameters 

326 ---------- 

327 status : Status 

328 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

329 scores : Optional[Dict[str, TunableValue]] 

330 A dict with the final benchmark results. 

331 None if the experiment was not successful. 

332 

333 Returns 

334 ------- 

335 score : Optional[Dict[str, float]] 

336 An optional dict of benchmark scores to be used as targets for MINIMIZATION. 

337 """ 

338 if not status.is_completed(): 

339 return None 

340 

341 if not status.is_succeeded(): 

342 assert scores is None 

343 # TODO: Be more flexible with values used for failed trials (not just +inf). 

344 # Issue: https://github.com/microsoft/MLOS/issues/523 

345 return {opt_target: float("inf") for opt_target in self._opt_targets} 

346 

347 assert scores is not None 

348 target_metrics: Dict[str, float] = {} 

349 for opt_target, opt_dir in self._opt_targets.items(): 

350 val = scores[opt_target] 

351 assert val is not None 

352 target_metrics[opt_target] = float(val) * opt_dir 

353 

354 return target_metrics 

355 

356 def not_converged(self) -> bool: 

357 """ 

358 Return True if not converged, False otherwise. 

359 

360 Base implementation just checks the iteration count. 

361 """ 

362 return self._iter < self._max_suggestions 

363 

364 @abstractmethod 

365 def get_best_observation( 

366 self, 

367 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: 

368 """ 

369 Get the best observation so far. 

370 

371 Returns 

372 ------- 

373 (value, tunables) : Tuple[Dict[str, float], TunableGroups] 

374 The best value and the corresponding configuration. 

375 (None, None) if no successful observation has been registered yet. 

376 """