Coverage for mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py: 87%

110 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-14 01:58 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Contains the wrapper class for the :py:class:`.SmacOptimizer`. 

7 

8Notes 

9----- 

10See the `SMAC3 Documentation <https://automl.github.io/SMAC3/main/index.html>`_ for 

11more details. 

12""" 

13 

14from logging import warning 

15from pathlib import Path 

16from tempfile import TemporaryDirectory 

17from typing import TYPE_CHECKING, Dict, List, Optional, Union 

18from warnings import warn 

19 

20import ConfigSpace 

21import numpy.typing as npt 

22import pandas as pd 

23from smac.utils.configspace import convert_configurations_to_array 

24 

25from mlos_core.data_classes import Observation, Observations, Suggestion 

26from mlos_core.optimizers.bayesian_optimizers.bayesian_optimizer import ( 

27 BaseBayesianOptimizer, 

28) 

29from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

30from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter 

31 

32 

33class SmacOptimizer(BaseBayesianOptimizer): 

34 """Wrapper class for SMAC based Bayesian optimization.""" 

35 

36 def __init__( 

37 self, 

38 *, # pylint: disable=too-many-locals,too-many-arguments 

39 parameter_space: ConfigSpace.ConfigurationSpace, 

40 optimization_targets: List[str], 

41 objective_weights: Optional[List[float]] = None, 

42 space_adapter: Optional[BaseSpaceAdapter] = None, 

43 seed: Optional[int] = 0, 

44 run_name: Optional[str] = None, 

45 output_directory: Optional[str] = None, 

46 max_trials: int = 100, 

47 n_random_init: Optional[int] = None, 

48 max_ratio: Optional[float] = None, 

49 use_default_config: bool = False, 

50 n_random_probability: float = 0.1, 

51 ): 

52 """ 

53 Instantiate a new SMAC optimizer wrapper. 

54 

55 Parameters 

56 ---------- 

57 parameter_space : ConfigSpace.ConfigurationSpace 

58 The parameter space to optimize. 

59 

60 optimization_targets : List[str] 

61 The names of the optimization targets to minimize. 

62 

63 objective_weights : Optional[List[float]] 

64 Optional list of weights of optimization targets. 

65 

66 space_adapter : BaseSpaceAdapter 

67 The space adapter class to employ for parameter space transformations. 

68 

69 seed : Optional[int] 

70 By default SMAC uses a known seed (0) to keep results reproducible. 

71 However, if a `None` seed is explicitly provided, we let a random seed 

72 be produced by SMAC. 

73 

74 run_name : Optional[str] 

75 Name of this run. This is used to easily distinguish across different runs. 

76 If set to `None` (default), SMAC will generate a hash from metadata. 

77 

78 output_directory : Optional[str] 

79 The directory where SMAC output will saved. If set to `None` (default), 

80 a temporary dir will be used. 

81 

82 max_trials : int 

83 Maximum number of trials (i.e., function evaluations) to be run. Defaults to 100. 

84 Note that modifying this value directly affects the value of 

85 `n_random_init`, if latter is set to `None`. 

86 

87 n_random_init : Optional[int] 

88 Number of points evaluated at start to bootstrap the optimizer. 

89 Default depends on max_trials and number of parameters and max_ratio. 

90 Note: it can sometimes be useful to set this to 1 when pre-warming the 

91 optimizer from historical data. See Also: 

92 :py:meth:`mlos_bench.optimizers.base_optimizer.Optimizer.bulk_register` 

93 

94 max_ratio : Optional[int] 

95 Maximum ratio of max_trials to be random configs to be evaluated 

96 at start to bootstrap the optimizer. 

97 Useful if you want to explicitly control the number of random 

98 configs evaluated at start. 

99 

100 use_default_config : bool 

101 Whether to use the default config for the first trial after random initialization. 

102 

103 n_random_probability : float 

104 Probability of choosing to evaluate a random configuration during optimization. 

105 Defaults to `0.1`. Setting this to a higher value favors exploration over exploitation. 

106 """ 

107 super().__init__( 

108 parameter_space=parameter_space, 

109 optimization_targets=optimization_targets, 

110 objective_weights=objective_weights, 

111 space_adapter=space_adapter, 

112 ) 

113 

114 # Declare at the top because we need it in __del__/cleanup() 

115 self._temp_output_directory: Optional[TemporaryDirectory] = None 

116 

117 # pylint: disable=import-outside-toplevel 

118 from smac import HyperparameterOptimizationFacade as Optimizer_Smac 

119 from smac import Scenario 

120 from smac.intensifier.abstract_intensifier import AbstractIntensifier 

121 from smac.main.config_selector import ConfigSelector 

122 from smac.random_design.probability_design import ProbabilityRandomDesign 

123 from smac.runhistory import TrialInfo 

124 

125 # Store for TrialInfo instances returned by .ask() 

126 self.trial_info_map: Dict[ConfigSpace.Configuration, TrialInfo] = {} 

127 

128 # The default when not specified is to use a known seed (0) to keep results reproducible. 

129 # However, if a `None` seed is explicitly provided, we let a random seed be 

130 # produced by SMAC. 

131 # https://automl.github.io/SMAC3/main/api/smac.scenario.html#smac.scenario.Scenario 

132 seed = -1 if seed is None else seed 

133 

134 # Create temporary directory for SMAC output (if none provided) 

135 if output_directory is None: 

136 # pylint: disable=consider-using-with 

137 try: 

138 # Argument added in Python 3.10 

139 self._temp_output_directory = TemporaryDirectory(ignore_cleanup_errors=True) 

140 except TypeError: 

141 self._temp_output_directory = TemporaryDirectory() 

142 output_directory = self._temp_output_directory.name 

143 assert output_directory is not None 

144 

145 if n_random_init is not None: 

146 assert isinstance(n_random_init, int) and n_random_init >= 0 

147 if n_random_init == max_trials and use_default_config: 

148 # Increase max budgeted trials to account for use_default_config. 

149 max_trials += 1 

150 

151 scenario: Scenario = Scenario( 

152 self.optimizer_parameter_space, 

153 objectives=self._optimization_targets, 

154 name=run_name, 

155 output_directory=Path(output_directory), 

156 deterministic=True, 

157 use_default_config=use_default_config, 

158 n_trials=max_trials, 

159 seed=seed or -1, # if -1, SMAC will generate a random seed internally 

160 n_workers=1, # Use a single thread for evaluating trials 

161 ) 

162 intensifier: AbstractIntensifier = Optimizer_Smac.get_intensifier( 

163 scenario, 

164 max_config_calls=1, 

165 ) 

166 config_selector: ConfigSelector = Optimizer_Smac.get_config_selector( 

167 scenario, 

168 retrain_after=1, 

169 ) 

170 

171 # TODO: When bulk registering prior configs to rewarm the optimizer, 

172 # there is a way to inform SMAC's initial design that we have 

173 # additional_configs and can set n_configs == 0. 

174 # Additionally, we may want to consider encoding those values into the 

175 # runhistory when prewarming the optimizer so that the initial design 

176 # doesn't reperform random init. 

177 # See Also: #488 

178 

179 initial_design_args: Dict[str, Union[list, int, float, Scenario]] = { 

180 "scenario": scenario, 

181 # Workaround a bug in SMAC that sets a default arg to a mutable 

182 # value that can cause issues when multiple optimizers are 

183 # instantiated with the use_default_config option within the same 

184 # process that use different ConfigSpaces so that the second 

185 # receives the default config from both as an additional config. 

186 "additional_configs": [], 

187 } 

188 if n_random_init is not None: 

189 initial_design_args["n_configs"] = n_random_init 

190 if n_random_init > 0.25 * max_trials and max_ratio is None: 

191 warning( 

192 "Number of random initial configs (%d) is " 

193 + "greater than 25%% of max_trials (%d). " 

194 + "Consider setting max_ratio to avoid SMAC overriding n_random_init.", 

195 n_random_init, 

196 max_trials, 

197 ) 

198 if max_ratio is not None: 

199 assert isinstance(max_ratio, float) and 0.0 <= max_ratio <= 1.0 

200 initial_design_args["max_ratio"] = max_ratio 

201 self._max_ratio = max_ratio 

202 

203 # Use the default InitialDesign from SMAC. 

204 # (currently SBOL instead of LatinHypercube due to better uniformity 

205 # for initial sampling which results in lower overall samples required) 

206 initial_design = Optimizer_Smac.get_initial_design( 

207 **initial_design_args, # type: ignore[arg-type] 

208 ) 

209 # initial_design = LatinHypercubeInitialDesign( 

210 # **initial_design_args, # type: ignore[arg-type] 

211 # ) 

212 

213 # Workaround a bug in SMAC that doesn't pass the seed to the random 

214 # design when generated a random_design for itself via the 

215 # get_random_design static method when random_design is None. 

216 assert isinstance(n_random_probability, float) and n_random_probability >= 0 

217 random_design = ProbabilityRandomDesign( 

218 probability=n_random_probability, 

219 seed=scenario.seed, 

220 ) 

221 

222 self.base_optimizer = Optimizer_Smac( 

223 scenario, 

224 SmacOptimizer._dummy_target_func, 

225 initial_design=initial_design, 

226 intensifier=intensifier, 

227 random_design=random_design, 

228 config_selector=config_selector, 

229 multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm( 

230 scenario, 

231 objective_weights=self._objective_weights, 

232 ), 

233 overwrite=True, 

234 logging_level=False, # Use the existing logger 

235 ) 

236 

237 def __del__(self) -> None: 

238 # Best-effort attempt to clean up, in case the user forgets to call .cleanup() 

239 self.cleanup() 

240 

241 @property 

242 def max_ratio(self) -> Optional[float]: 

243 """ 

244 Gets the `max_ratio` parameter used in py:meth:`constructor <.__init__>` of this 

245 SmacOptimizer. 

246 

247 Returns 

248 ------- 

249 float 

250 """ 

251 return self._max_ratio 

252 

253 @property 

254 def n_random_init(self) -> int: 

255 """ 

256 Gets the number of random samples to use to initialize the optimizer's search 

257 space sampling. 

258 

259 Note: This may not be equal to the value passed to the initializer, due to 

260 logic present in the SMAC. 

261 

262 See Also 

263 -------- 

264 :py:attr:`.max_ratio` 

265 

266 Returns 

267 ------- 

268 int 

269 The number of random samples used to initialize the optimizer's search space sampling. 

270 """ 

271 # pylint: disable=protected-access 

272 return self.base_optimizer._initial_design._n_configs 

273 

274 @staticmethod 

275 def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None: 

276 """ 

277 Dummy target function for SMAC optimizer. 

278 

279 Since we only use the ask-and-tell interface, this is never called. 

280 

281 Parameters 

282 ---------- 

283 config : ConfigSpace.Configuration 

284 Configuration to evaluate. 

285 

286 seed : int 

287 Random seed to use for the target function. Not actually used. 

288 """ 

289 # NOTE: Providing a target function when using the ask-and-tell interface is 

290 # an imperfection of the API -- this is planned to be fixed in some future 

291 # release: https://github.com/automl/SMAC3/issues/946 

292 raise RuntimeError("This function should never be called.") 

293 

294 def _register( 

295 self, 

296 observations: Observations, 

297 ) -> None: 

298 """ 

299 Registers one or more configs/score pairs (observations) with the underlying 

300 optimizer. 

301 

302 Parameters 

303 ---------- 

304 observations : Observations 

305 The set of config/scores to register. 

306 """ 

307 # TODO: Implement bulk registration. 

308 # (e.g., by rebuilding the base optimizer instance with all observations). 

309 for observation in observations: 

310 self._register_single(observation) 

311 

312 def _register_single( 

313 self, 

314 observation: Observation, 

315 ) -> None: 

316 """ 

317 Registers the given config and its score. 

318 

319 Parameters 

320 ---------- 

321 observation: Observation 

322 The observation to register. 

323 """ 

324 from smac.runhistory import ( # pylint: disable=import-outside-toplevel 

325 StatusType, 

326 TrialInfo, 

327 TrialValue, 

328 ) 

329 

330 if observation.context is not None: 

331 warn( 

332 f"Not Implemented: Ignoring context {list(observation.context.index)}", 

333 UserWarning, 

334 ) 

335 

336 # Retrieve previously generated TrialInfo (returned by .ask()) or create 

337 # new TrialInfo instance 

338 config = ConfigSpace.Configuration( 

339 self.optimizer_parameter_space, 

340 values=observation.config.dropna().to_dict(), 

341 ) 

342 info: TrialInfo = self.trial_info_map.get( 

343 config, 

344 TrialInfo(config=config, seed=self.base_optimizer.scenario.seed), 

345 ) 

346 value = TrialValue( 

347 cost=list(observation.score.astype(float)), 

348 time=0.0, 

349 status=StatusType.SUCCESS, 

350 ) 

351 self.base_optimizer.tell(info, value, save=False) 

352 

353 # Save optimizer once we register all configs 

354 self.base_optimizer.optimizer.save() 

355 

356 def _suggest( 

357 self, 

358 *, 

359 context: Optional[pd.Series] = None, 

360 ) -> Suggestion: 

361 """ 

362 Suggests a new configuration. 

363 

364 Parameters 

365 ---------- 

366 context : pd.DataFrame 

367 Not Yet Implemented. 

368 

369 Returns 

370 ------- 

371 suggestion: Suggestion 

372 The suggestion to evaluate. 

373 """ 

374 if TYPE_CHECKING: 

375 # pylint: disable=import-outside-toplevel,unused-import 

376 from smac.runhistory import TrialInfo 

377 

378 if context is not None: 

379 warn(f"Not Implemented: Ignoring context {list(context.index)}", UserWarning) 

380 

381 trial: TrialInfo = self.base_optimizer.ask() 

382 trial.config.check_valid_configuration() 

383 ConfigSpace.Configuration( 

384 self.optimizer_parameter_space, 

385 values=trial.config, 

386 ).check_valid_configuration() 

387 assert trial.config.config_space == self.optimizer_parameter_space 

388 self.trial_info_map[trial.config] = trial 

389 config_sr = pd.Series(dict(trial.config), dtype=object) 

390 return Suggestion(config=config_sr, context=context, metadata=None) 

391 

392 def register_pending(self, pending: Suggestion) -> None: 

393 raise NotImplementedError() 

394 

395 def surrogate_predict(self, suggestion: Suggestion) -> npt.NDArray: 

396 if suggestion.context is not None: 

397 warn( 

398 f"Not Implemented: Ignoring context {list(suggestion.context.index)}", 

399 UserWarning, 

400 ) 

401 if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter): 

402 raise NotImplementedError("Space adapter not supported for surrogate_predict.") 

403 

404 # pylint: disable=protected-access 

405 if len(self._observations) <= self.base_optimizer._initial_design._n_configs: 

406 raise RuntimeError( 

407 "Surrogate model can make predictions *only* after " 

408 "all initial points have been evaluated " 

409 f"{len(self._observations)} <= {self.base_optimizer._initial_design._n_configs}" 

410 ) 

411 if self.base_optimizer._config_selector._model is None: 

412 raise RuntimeError("Surrogate model is not yet trained") 

413 

414 config_array = convert_configurations_to_array( 

415 [ 

416 ConfigSpace.Configuration( 

417 self.optimizer_parameter_space, values=suggestion.config.to_dict() 

418 ) 

419 ] 

420 ) 

421 mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array) 

422 return mean_predictions.reshape( 

423 -1, 

424 ) 

425 

426 def acquisition_function(self, suggestion: Suggestion) -> npt.NDArray: 

427 if suggestion.context is not None: 

428 warn( 

429 f"Not Implemented: Ignoring context {list(suggestion.context.index)}", 

430 UserWarning, 

431 ) 

432 if self._space_adapter: 

433 raise NotImplementedError() 

434 

435 # pylint: disable=protected-access 

436 if self.base_optimizer._config_selector._acquisition_function is None: 

437 raise RuntimeError("Acquisition function is not yet initialized") 

438 

439 return self.base_optimizer._config_selector._acquisition_function( 

440 suggestion.config.config_to_configspace(self.optimizer_parameter_space) 

441 ).reshape( 

442 -1, 

443 ) 

444 

445 def cleanup(self) -> None: 

446 if hasattr(self, "_temp_output_directory") and self._temp_output_directory is not None: 

447 self._temp_output_directory.cleanup() 

448 self._temp_output_directory = None 

449 

450 def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace.Configuration]: 

451 """ 

452 Convert a dataframe of configs to a list of ConfigSpace configs. 

453 

454 Parameters 

455 ---------- 

456 configs : pd.DataFrame 

457 Dataframe of configs / parameters. The columns are parameter names and 

458 the rows are the configs. 

459 

460 Returns 

461 ------- 

462 configs : list 

463 List of ConfigSpace configs. 

464 """ 

465 return [ 

466 ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) 

467 for (_, config) in configs.astype("O").iterrows() 

468 ]