Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 98%

207 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 01:50 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Tests for Bayesian Optimizers.""" 

6 

7import logging 

8from copy import deepcopy 

9from typing import Any 

10 

11import ConfigSpace as CS 

12import numpy as np 

13import pandas as pd 

14import pytest 

15 

16from mlos_core.data_classes import Observations, Suggestion 

17from mlos_core.optimizers import ( 

18 BaseOptimizer, 

19 ConcreteOptimizer, 

20 OptimizerFactory, 

21 OptimizerType, 

22) 

23from mlos_core.optimizers.bayesian_optimizers import ( 

24 BaseBayesianOptimizer, 

25 SmacOptimizer, 

26) 

27from mlos_core.spaces.adapters import SpaceAdapterType 

28from mlos_core.tests import SEED, get_all_concrete_subclasses 

29 

30_LOG = logging.getLogger(__name__) 

31_LOG.setLevel(logging.DEBUG) 

32 

33 

34@pytest.mark.parametrize( 

35 ("optimizer_class", "kwargs"), 

36 [ 

37 *[(member.value, {}) for member in OptimizerType], 

38 ], 

39) 

40def test_create_optimizer_and_suggest( 

41 configuration_space: CS.ConfigurationSpace, 

42 optimizer_class: type[BaseOptimizer], 

43 kwargs: dict | None, 

44) -> None: 

45 """Test that we can create an optimizer and get a suggestion from it.""" 

46 if kwargs is None: 

47 kwargs = {} 

48 optimizer = optimizer_class( 

49 parameter_space=configuration_space, 

50 optimization_targets=["score"], 

51 **kwargs, 

52 ) 

53 assert optimizer is not None 

54 

55 assert optimizer.parameter_space is not None 

56 

57 suggestion = optimizer.suggest() 

58 assert suggestion is not None 

59 

60 myrepr = repr(optimizer) 

61 assert myrepr.startswith(optimizer_class.__name__) 

62 

63 # pending not implemented 

64 with pytest.raises(NotImplementedError): 

65 optimizer.register_pending(pending=suggestion) 

66 

67 

68@pytest.mark.parametrize( 

69 ("optimizer_class", "kwargs"), 

70 [ 

71 *[(member.value, {}) for member in OptimizerType], 

72 ], 

73) 

74def test_basic_interface_toy_problem( 

75 configuration_space: CS.ConfigurationSpace, 

76 optimizer_class: type[BaseOptimizer], 

77 kwargs: dict | None, 

78) -> None: 

79 """Toy problem to test the optimizers.""" 

80 # pylint: disable=too-many-locals 

81 max_iterations = 20 

82 if kwargs is None: 

83 kwargs = {} 

84 if optimizer_class == OptimizerType.SMAC.value: 

85 # SMAC sets the initial random samples as a percentage of the max 

86 # iterations, which defaults to 100. 

87 # To avoid having to train more than 25 model iterations, we set a lower 

88 # number of max iterations. 

89 kwargs["max_trials"] = max_iterations * 2 

90 

91 def objective(inp: float) -> pd.Series: 

92 series: pd.Series = pd.Series( 

93 {"score": (6 * inp - 2) ** 2 * np.sin(12 * inp - 4)} 

94 ) # needed for type hinting 

95 return series 

96 

97 # Emukit doesn't allow specifying a random state, so we set the global seed. 

98 np.random.seed(SEED) 

99 optimizer = optimizer_class( 

100 parameter_space=configuration_space, 

101 optimization_targets=["score"], 

102 **kwargs, 

103 ) 

104 

105 with pytest.raises(ValueError, match="No observations"): 

106 optimizer.get_best_observations() 

107 

108 with pytest.raises(ValueError, match="No observations"): 

109 optimizer.get_observations() 

110 

111 for _ in range(max_iterations): 

112 suggestion = optimizer.suggest() 

113 assert isinstance(suggestion, Suggestion) 

114 assert isinstance(suggestion.config, pd.Series) 

115 assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.Series) 

116 assert set(suggestion.config.index) == {"x", "y", "z"} 

117 # check that suggestion is in the space 

118 dict_config: dict = suggestion.config.to_dict() 

119 configuration = CS.Configuration(optimizer.parameter_space, dict_config) 

120 # Raises an error if outside of configuration space 

121 configuration.check_valid_configuration() 

122 inp: Any = suggestion.config["x"] 

123 assert isinstance(inp, (int, float)) 

124 observation = objective(inp) 

125 assert isinstance(observation, pd.Series) 

126 optimizer.register(observations=suggestion.complete(observation)) 

127 

128 best_observation = optimizer.get_best_observations() 

129 assert isinstance(best_observation, Observations) 

130 assert isinstance(best_observation.configs, pd.DataFrame) 

131 assert isinstance(best_observation.scores, pd.DataFrame) 

132 assert best_observation.contexts is None 

133 assert set(best_observation.configs.columns) == {"x", "y", "z"} 

134 assert set(best_observation.scores.columns) == {"score"} 

135 assert best_observation.configs.shape == (1, 3) 

136 assert best_observation.scores.shape == (1, 1) 

137 assert best_observation.scores.score.iloc[0] < -4 

138 

139 all_observations = optimizer.get_observations() 

140 assert isinstance(all_observations, Observations) 

141 assert isinstance(all_observations.configs, pd.DataFrame) 

142 assert isinstance(all_observations.scores, pd.DataFrame) 

143 assert all_observations.contexts is None 

144 assert set(all_observations.configs.columns) == {"x", "y", "z"} 

145 assert set(all_observations.scores.columns) == {"score"} 

146 assert all_observations.configs.shape == (20, 3) 

147 assert all_observations.scores.shape == (20, 1) 

148 

149 # It would be better to put this into bayesian_optimizer_test but then we'd have 

150 # to refit the model 

151 if isinstance(optimizer, BaseBayesianOptimizer): 

152 pred_best = [ 

153 optimizer.surrogate_predict(suggestion=observation.to_suggestion()) 

154 for observation in best_observation 

155 ] 

156 assert len(pred_best) == 1 

157 

158 pred_all = [ 

159 optimizer.surrogate_predict(suggestion=observation.to_suggestion()) 

160 for observation in all_observations 

161 ] 

162 assert len(pred_all) == 20 

163 

164 

165@pytest.mark.parametrize( 

166 ("optimizer_type"), 

167 [ 

168 # Enumerate all supported Optimizers 

169 # *[member for member in OptimizerType], 

170 *list(OptimizerType), 

171 ], 

172) 

173def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None: 

174 """Test that all optimizer types are listed in the ConcreteOptimizer constraints.""" 

175 assert optimizer_type.value in ConcreteOptimizer.__args__ 

176 

177 

178@pytest.mark.parametrize( 

179 ("optimizer_type", "kwargs"), 

180 [ 

181 # Default optimizer 

182 (None, {}), 

183 # Enumerate all supported Optimizers 

184 *[(member, {}) for member in OptimizerType], 

185 # Optimizer with non-empty kwargs argument 

186 ], 

187) 

188def test_create_optimizer_with_factory_method( 

189 configuration_space: CS.ConfigurationSpace, 

190 optimizer_type: OptimizerType | None, 

191 kwargs: dict | None, 

192) -> None: 

193 """Test that we can create an optimizer via a factory.""" 

194 if kwargs is None: 

195 kwargs = {} 

196 if optimizer_type is None: 

197 optimizer = OptimizerFactory.create( 

198 parameter_space=configuration_space, 

199 optimization_targets=["score"], 

200 optimizer_kwargs=kwargs, 

201 ) 

202 else: 

203 optimizer = OptimizerFactory.create( 

204 parameter_space=configuration_space, 

205 optimization_targets=["score"], 

206 optimizer_type=optimizer_type, 

207 optimizer_kwargs=kwargs, 

208 ) 

209 assert optimizer is not None 

210 

211 assert optimizer.parameter_space is not None 

212 

213 suggestion = optimizer.suggest() 

214 assert suggestion is not None 

215 

216 if optimizer_type is not None: 

217 myrepr = repr(optimizer) 

218 assert myrepr.startswith(optimizer_type.value.__name__) 

219 

220 

221@pytest.mark.parametrize( 

222 ("optimizer_type", "kwargs"), 

223 [ 

224 # Enumerate all supported Optimizers 

225 *[(member, {}) for member in OptimizerType], 

226 # Optimizer with non-empty kwargs argument 

227 ( 

228 OptimizerType.SMAC, 

229 { 

230 # Test with default config. 

231 "use_default_config": True, 

232 # 'n_random_init': 10, 

233 }, 

234 ), 

235 ], 

236) 

237def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: dict | None) -> None: 

238 """Toy problem to test the optimizers with llamatune space adapter.""" 

239 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals 

240 num_iters = 50 

241 if kwargs is None: 

242 kwargs = {} 

243 

244 def objective(point: pd.Series) -> pd.Series: 

245 # Best value can be reached by tuning an 1-dimensional search space 

246 ret: pd.Series = pd.Series({"score": np.sin(point.x * point.y)}) 

247 assert pd.notna(ret.score) 

248 return ret 

249 

250 input_space = CS.ConfigurationSpace(seed=1234) 

251 # Add two continuous inputs 

252 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3)) 

253 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3)) 

254 

255 # Initialize an optimizer that uses LlamaTune space adapter 

256 space_adapter_kwargs = { 

257 "num_low_dims": 1, 

258 "special_param_values": None, 

259 "max_unique_values_per_param": None, 

260 } 

261 

262 # Make some adjustments to the kwargs for the optimizer and LlamaTuned 

263 # optimizer for debug/testing. 

264 

265 # if optimizer_type == OptimizerType.SMAC: 

266 # # Allow us to override the number of random init samples. 

267 # kwargs['max_ratio'] = 1.0 

268 optimizer_kwargs = deepcopy(kwargs) 

269 llamatune_optimizer_kwargs = deepcopy(kwargs) 

270 # if optimizer_type == OptimizerType.SMAC: 

271 # optimizer_kwargs['n_random_init'] = 20 

272 # llamatune_optimizer_kwargs['n_random_init'] = 10 

273 

274 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create( 

275 parameter_space=input_space, 

276 optimization_targets=["score"], 

277 optimizer_type=optimizer_type, 

278 optimizer_kwargs=llamatune_optimizer_kwargs, 

279 space_adapter_type=SpaceAdapterType.LLAMATUNE, 

280 space_adapter_kwargs=space_adapter_kwargs, 

281 ) 

282 # Initialize an optimizer that uses the original space 

283 optimizer: BaseOptimizer = OptimizerFactory.create( 

284 parameter_space=input_space, 

285 optimization_targets=["score"], 

286 optimizer_type=optimizer_type, 

287 optimizer_kwargs=optimizer_kwargs, 

288 ) 

289 assert optimizer is not None 

290 assert llamatune_optimizer is not None 

291 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space 

292 

293 llamatune_n_random_init = 0 

294 opt_n_random_init = int(kwargs.get("n_random_init", 0)) 

295 if optimizer_type == OptimizerType.SMAC: 

296 assert isinstance(optimizer, SmacOptimizer) 

297 assert isinstance(llamatune_optimizer, SmacOptimizer) 

298 opt_n_random_init = optimizer.n_random_init 

299 llamatune_n_random_init = llamatune_optimizer.n_random_init 

300 

301 for i in range(num_iters): 

302 # Place to set a breakpoint for when the optimizer is done with random init. 

303 if llamatune_n_random_init and i > llamatune_n_random_init: 

304 _LOG.debug("LlamaTuned Optimizer is done with random init.") 

305 if opt_n_random_init and i >= opt_n_random_init: 

306 _LOG.debug("Optimizer is done with random init.") 

307 

308 # loop for optimizer 

309 suggestion = optimizer.suggest() 

310 observation = objective(suggestion.config) 

311 optimizer.register(observations=suggestion.complete(observation)) 

312 

313 # loop for llamatune-optimizer 

314 suggestion = llamatune_optimizer.suggest() 

315 _x, _y = suggestion.config["x"], suggestion.config["y"] 

316 # optimizer explores 1-dimensional space 

317 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3) 

318 observation = objective(suggestion.config) 

319 llamatune_optimizer.register(observations=suggestion.complete(observation)) 

320 

321 # Retrieve best observations 

322 best_observation: Observations = optimizer.get_best_observations() 

323 assert isinstance(best_observation, Observations) 

324 llamatune_best_observations: Observations = llamatune_optimizer.get_best_observations() 

325 assert isinstance(llamatune_best_observations, Observations) 

326 

327 for observations in (best_observation, llamatune_best_observations): 

328 assert isinstance(observations.configs, pd.DataFrame) 

329 assert isinstance(observations.scores, pd.DataFrame) 

330 assert observations.contexts is None 

331 assert set(observations.configs.columns) == {"x", "y"} 

332 assert set(observations.scores.columns) == {"score"} 

333 

334 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's 

335 # one, or close to that 

336 assert ( 

337 best_observation.scores.score.iloc[0] > llamatune_best_observations.scores.score.iloc[0] 

338 or best_observation.scores.score.iloc[0] + 1e-3 

339 > llamatune_best_observations.scores.score.iloc[0] 

340 ) 

341 

342 # Retrieve and check all observations 

343 for all_observations in ( 

344 optimizer.get_observations(), 

345 llamatune_optimizer.get_observations(), 

346 ): 

347 assert isinstance(all_observations.configs, pd.DataFrame) 

348 assert isinstance(all_observations.scores, pd.DataFrame) 

349 assert all_observations.contexts is None 

350 assert set(all_observations.configs.columns) == {"x", "y"} 

351 assert set(all_observations.scores.columns) == {"score"} 

352 assert len(all_observations.configs) == num_iters 

353 assert len(all_observations.scores) == num_iters 

354 assert len(all_observations) == num_iters 

355 

356 # .surrogate_predict method not currently implemented if space adapter is employed 

357 if isinstance(llamatune_optimizer, BaseBayesianOptimizer): 

358 with pytest.raises(NotImplementedError): 

359 for obs in llamatune_best_observations: 

360 llamatune_optimizer.surrogate_predict(suggestion=obs.to_suggestion()) 

361 

362 

363# Dynamically determine all of the optimizers we have implemented. 

364# Note: these must be sorted. 

365optimizer_subclasses: list[type[BaseOptimizer]] = get_all_concrete_subclasses( 

366 BaseOptimizer, # type: ignore[type-abstract] 

367 pkg_name="mlos_core", 

368) 

369assert optimizer_subclasses 

370 

371 

372@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses) 

373def test_optimizer_type_defs(optimizer_class: type[BaseOptimizer]) -> None: 

374 """Test that all optimizer classes are listed in the OptimizerType enum.""" 

375 optimizer_type_classes = {member.value for member in OptimizerType} 

376 assert optimizer_class in optimizer_type_classes 

377 

378 

379@pytest.mark.parametrize( 

380 ("optimizer_type", "kwargs"), 

381 [ 

382 # Default optimizer 

383 (None, {}), 

384 # Enumerate all supported Optimizers 

385 *[(member, {}) for member in OptimizerType], 

386 # Optimizer with non-empty kwargs argument 

387 ], 

388) 

389def test_mixed_numerics_type_input_space_types( 

390 optimizer_type: OptimizerType | None, 

391 kwargs: dict | None, 

392) -> None: 

393 """Toy problem to test the optimizers with mixed numeric types to ensure that 

394 original dtypes are retained. 

395 """ 

396 # pylint: disable=too-many-locals 

397 max_iterations = 10 

398 if kwargs is None: 

399 kwargs = {} 

400 

401 def objective(point: pd.Series) -> pd.Series: 

402 # mix of hyperparameters, optimal is to select the highest possible 

403 ret: pd.Series = pd.Series({"score": point["x"] + point["y"]}) 

404 return ret 

405 

406 input_space = CS.ConfigurationSpace(seed=SEED) 

407 # add a mix of numeric datatypes 

408 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5)) 

409 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0)) 

410 

411 if optimizer_type is None: 

412 optimizer = OptimizerFactory.create( 

413 parameter_space=input_space, 

414 optimization_targets=["score"], 

415 optimizer_kwargs=kwargs, 

416 ) 

417 else: 

418 optimizer = OptimizerFactory.create( 

419 parameter_space=input_space, 

420 optimization_targets=["score"], 

421 optimizer_type=optimizer_type, 

422 optimizer_kwargs=kwargs, 

423 ) 

424 

425 assert isinstance(optimizer, BaseOptimizer) 

426 

427 with pytest.raises(ValueError, match="No observations"): 

428 optimizer.get_best_observations() 

429 

430 with pytest.raises(ValueError, match="No observations"): 

431 optimizer.get_observations() 

432 

433 for _ in range(max_iterations): 

434 suggestion = optimizer.suggest() 

435 assert isinstance(suggestion, Suggestion) 

436 assert isinstance(suggestion.config, pd.Series) 

437 assert set(suggestion.config.index) == {"x", "y"} 

438 # Check suggestion values are the expected dtype 

439 assert isinstance(suggestion.config["x"], int) 

440 assert isinstance(suggestion.config["y"], float) 

441 # Check that suggestion is in the space 

442 test_configuration = CS.Configuration( 

443 optimizer.parameter_space, suggestion.config.to_dict() 

444 ) 

445 # Raises an error if outside of configuration space 

446 test_configuration.check_valid_configuration() 

447 # Test registering the suggested configuration with a score. 

448 observation = objective(suggestion.config) 

449 assert isinstance(observation, pd.Series) 

450 optimizer.register(observations=suggestion.complete(observation)) 

451 

452 best_observations = optimizer.get_best_observations() 

453 assert isinstance(best_observations.configs, pd.DataFrame) 

454 assert isinstance(best_observations.scores, pd.DataFrame) 

455 assert best_observations.contexts is None 

456 

457 all_observations = optimizer.get_observations() 

458 assert isinstance(all_observations.configs, pd.DataFrame) 

459 assert isinstance(all_observations.scores, pd.DataFrame) 

460 assert all_observations.contexts is None