Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 97%

194 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Tests for Bayesian Optimizers.""" 

6 

7import logging 

8from copy import deepcopy 

9from typing import List, Optional, Type 

10 

11import ConfigSpace as CS 

12import numpy as np 

13import pandas as pd 

14import pytest 

15 

16from mlos_core.optimizers import ( 

17 BaseOptimizer, 

18 ConcreteOptimizer, 

19 OptimizerFactory, 

20 OptimizerType, 

21) 

22from mlos_core.optimizers.bayesian_optimizers import ( 

23 BaseBayesianOptimizer, 

24 SmacOptimizer, 

25) 

26from mlos_core.spaces.adapters import SpaceAdapterType 

27from mlos_core.tests import SEED, get_all_concrete_subclasses 

28 

29_LOG = logging.getLogger(__name__) 

30_LOG.setLevel(logging.DEBUG) 

31 

32 

33@pytest.mark.parametrize( 

34 ("optimizer_class", "kwargs"), 

35 [ 

36 *[(member.value, {}) for member in OptimizerType], 

37 ], 

38) 

39def test_create_optimizer_and_suggest( 

40 configuration_space: CS.ConfigurationSpace, 

41 optimizer_class: Type[BaseOptimizer], 

42 kwargs: Optional[dict], 

43) -> None: 

44 """Test that we can create an optimizer and get a suggestion from it.""" 

45 if kwargs is None: 

46 kwargs = {} 

47 optimizer = optimizer_class( 

48 parameter_space=configuration_space, 

49 optimization_targets=["score"], 

50 **kwargs, 

51 ) 

52 assert optimizer is not None 

53 

54 assert optimizer.parameter_space is not None 

55 

56 suggestion, metadata = optimizer.suggest() 

57 assert suggestion is not None 

58 

59 myrepr = repr(optimizer) 

60 assert myrepr.startswith(optimizer_class.__name__) 

61 

62 # pending not implemented 

63 with pytest.raises(NotImplementedError): 

64 optimizer.register_pending(configs=suggestion, metadata=metadata) 

65 

66 

67@pytest.mark.parametrize( 

68 ("optimizer_class", "kwargs"), 

69 [ 

70 *[(member.value, {}) for member in OptimizerType], 

71 ], 

72) 

73def test_basic_interface_toy_problem( 

74 configuration_space: CS.ConfigurationSpace, 

75 optimizer_class: Type[BaseOptimizer], 

76 kwargs: Optional[dict], 

77) -> None: 

78 """Toy problem to test the optimizers.""" 

79 # pylint: disable=too-many-locals 

80 max_iterations = 20 

81 if kwargs is None: 

82 kwargs = {} 

83 if optimizer_class == OptimizerType.SMAC.value: 

84 # SMAC sets the initial random samples as a percentage of the max 

85 # iterations, which defaults to 100. 

86 # To avoid having to train more than 25 model iterations, we set a lower 

87 # number of max iterations. 

88 kwargs["max_trials"] = max_iterations * 2 

89 

90 def objective(x: pd.Series) -> pd.DataFrame: 

91 return pd.DataFrame({"score": (6 * x - 2) ** 2 * np.sin(12 * x - 4)}) 

92 

93 # Emukit doesn't allow specifying a random state, so we set the global seed. 

94 np.random.seed(SEED) 

95 optimizer = optimizer_class( 

96 parameter_space=configuration_space, 

97 optimization_targets=["score"], 

98 **kwargs, 

99 ) 

100 

101 with pytest.raises(ValueError, match="No observations"): 

102 optimizer.get_best_observations() 

103 

104 with pytest.raises(ValueError, match="No observations"): 

105 optimizer.get_observations() 

106 

107 for _ in range(max_iterations): 

108 suggestion, metadata = optimizer.suggest() 

109 assert isinstance(suggestion, pd.DataFrame) 

110 assert metadata is None or isinstance(metadata, pd.DataFrame) 

111 assert set(suggestion.columns) == {"x", "y", "z"} 

112 # check that suggestion is in the space 

113 configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) 

114 # Raises an error if outside of configuration space 

115 configuration.check_valid_configuration() 

116 observation = objective(suggestion["x"]) 

117 assert isinstance(observation, pd.DataFrame) 

118 optimizer.register(configs=suggestion, scores=observation, metadata=metadata) 

119 

120 (best_config, best_score, best_context) = optimizer.get_best_observations() 

121 assert isinstance(best_config, pd.DataFrame) 

122 assert isinstance(best_score, pd.DataFrame) 

123 assert best_context is None 

124 assert set(best_config.columns) == {"x", "y", "z"} 

125 assert set(best_score.columns) == {"score"} 

126 assert best_config.shape == (1, 3) 

127 assert best_score.shape == (1, 1) 

128 assert best_score.score.iloc[0] < -5 

129 

130 (all_configs, all_scores, all_contexts) = optimizer.get_observations() 

131 assert isinstance(all_configs, pd.DataFrame) 

132 assert isinstance(all_scores, pd.DataFrame) 

133 assert all_contexts is None 

134 assert set(all_configs.columns) == {"x", "y", "z"} 

135 assert set(all_scores.columns) == {"score"} 

136 assert all_configs.shape == (20, 3) 

137 assert all_scores.shape == (20, 1) 

138 

139 # It would be better to put this into bayesian_optimizer_test but then we'd have 

140 # to refit the model 

141 if isinstance(optimizer, BaseBayesianOptimizer): 

142 pred_best = optimizer.surrogate_predict(configs=best_config) 

143 assert pred_best.shape == (1,) 

144 

145 pred_all = optimizer.surrogate_predict(configs=all_configs) 

146 assert pred_all.shape == (20,) 

147 

148 

149@pytest.mark.parametrize( 

150 ("optimizer_type"), 

151 [ 

152 # Enumerate all supported Optimizers 

153 # *[member for member in OptimizerType], 

154 *list(OptimizerType), 

155 ], 

156) 

157def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None: 

158 """Test that all optimizer types are listed in the ConcreteOptimizer constraints.""" 

159 # pylint: disable=no-member 

160 assert optimizer_type.value in ConcreteOptimizer.__constraints__ 

161 

162 

163@pytest.mark.parametrize( 

164 ("optimizer_type", "kwargs"), 

165 [ 

166 # Default optimizer 

167 (None, {}), 

168 # Enumerate all supported Optimizers 

169 *[(member, {}) for member in OptimizerType], 

170 # Optimizer with non-empty kwargs argument 

171 ], 

172) 

173def test_create_optimizer_with_factory_method( 

174 configuration_space: CS.ConfigurationSpace, 

175 optimizer_type: Optional[OptimizerType], 

176 kwargs: Optional[dict], 

177) -> None: 

178 """Test that we can create an optimizer via a factory.""" 

179 if kwargs is None: 

180 kwargs = {} 

181 if optimizer_type is None: 

182 optimizer = OptimizerFactory.create( 

183 parameter_space=configuration_space, 

184 optimization_targets=["score"], 

185 optimizer_kwargs=kwargs, 

186 ) 

187 else: 

188 optimizer = OptimizerFactory.create( 

189 parameter_space=configuration_space, 

190 optimization_targets=["score"], 

191 optimizer_type=optimizer_type, 

192 optimizer_kwargs=kwargs, 

193 ) 

194 assert optimizer is not None 

195 

196 assert optimizer.parameter_space is not None 

197 

198 suggestion = optimizer.suggest() 

199 assert suggestion is not None 

200 

201 if optimizer_type is not None: 

202 myrepr = repr(optimizer) 

203 assert myrepr.startswith(optimizer_type.value.__name__) 

204 

205 

206@pytest.mark.parametrize( 

207 ("optimizer_type", "kwargs"), 

208 [ 

209 # Enumerate all supported Optimizers 

210 *[(member, {}) for member in OptimizerType], 

211 # Optimizer with non-empty kwargs argument 

212 ( 

213 OptimizerType.SMAC, 

214 { 

215 # Test with default config. 

216 "use_default_config": True, 

217 # 'n_random_init': 10, 

218 }, 

219 ), 

220 ], 

221) 

222def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: 

223 """Toy problem to test the optimizers with llamatune space adapter.""" 

224 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals 

225 num_iters = 50 

226 if kwargs is None: 

227 kwargs = {} 

228 

229 def objective(point: pd.DataFrame) -> pd.DataFrame: 

230 # Best value can be reached by tuning an 1-dimensional search space 

231 ret = pd.DataFrame({"score": np.sin(point.x * point.y)}) 

232 assert ret.score.hasnans is False 

233 return ret 

234 

235 input_space = CS.ConfigurationSpace(seed=1234) 

236 # Add two continuous inputs 

237 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3)) 

238 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3)) 

239 

240 # Initialize an optimizer that uses LlamaTune space adapter 

241 space_adapter_kwargs = { 

242 "num_low_dims": 1, 

243 "special_param_values": None, 

244 "max_unique_values_per_param": None, 

245 } 

246 

247 # Make some adjustments to the kwargs for the optimizer and LlamaTuned 

248 # optimizer for debug/testing. 

249 

250 # if optimizer_type == OptimizerType.SMAC: 

251 # # Allow us to override the number of random init samples. 

252 # kwargs['max_ratio'] = 1.0 

253 optimizer_kwargs = deepcopy(kwargs) 

254 llamatune_optimizer_kwargs = deepcopy(kwargs) 

255 # if optimizer_type == OptimizerType.SMAC: 

256 # optimizer_kwargs['n_random_init'] = 20 

257 # llamatune_optimizer_kwargs['n_random_init'] = 10 

258 

259 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create( 

260 parameter_space=input_space, 

261 optimization_targets=["score"], 

262 optimizer_type=optimizer_type, 

263 optimizer_kwargs=llamatune_optimizer_kwargs, 

264 space_adapter_type=SpaceAdapterType.LLAMATUNE, 

265 space_adapter_kwargs=space_adapter_kwargs, 

266 ) 

267 # Initialize an optimizer that uses the original space 

268 optimizer: BaseOptimizer = OptimizerFactory.create( 

269 parameter_space=input_space, 

270 optimization_targets=["score"], 

271 optimizer_type=optimizer_type, 

272 optimizer_kwargs=optimizer_kwargs, 

273 ) 

274 assert optimizer is not None 

275 assert llamatune_optimizer is not None 

276 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space 

277 

278 llamatune_n_random_init = 0 

279 opt_n_random_init = int(kwargs.get("n_random_init", 0)) 

280 if optimizer_type == OptimizerType.SMAC: 

281 assert isinstance(optimizer, SmacOptimizer) 

282 assert isinstance(llamatune_optimizer, SmacOptimizer) 

283 opt_n_random_init = optimizer.n_random_init 

284 llamatune_n_random_init = llamatune_optimizer.n_random_init 

285 

286 for i in range(num_iters): 

287 # Place to set a breakpoint for when the optimizer is done with random init. 

288 if llamatune_n_random_init and i > llamatune_n_random_init: 

289 _LOG.debug("LlamaTuned Optimizer is done with random init.") 

290 if opt_n_random_init and i >= opt_n_random_init: 

291 _LOG.debug("Optimizer is done with random init.") 

292 

293 # loop for optimizer 

294 suggestion, metadata = optimizer.suggest() 

295 observation = objective(suggestion) 

296 optimizer.register(configs=suggestion, scores=observation, metadata=metadata) 

297 

298 # loop for llamatune-optimizer 

299 suggestion, metadata = llamatune_optimizer.suggest() 

300 _x, _y = suggestion["x"].iloc[0], suggestion["y"].iloc[0] 

301 # optimizer explores 1-dimensional space 

302 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3) 

303 observation = objective(suggestion) 

304 llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata) 

305 

306 # Retrieve best observations 

307 best_observation = optimizer.get_best_observations() 

308 llamatune_best_observation = llamatune_optimizer.get_best_observations() 

309 

310 for best_config, best_score, best_context in (best_observation, llamatune_best_observation): 

311 assert isinstance(best_config, pd.DataFrame) 

312 assert isinstance(best_score, pd.DataFrame) 

313 assert best_context is None 

314 assert set(best_config.columns) == {"x", "y"} 

315 assert set(best_score.columns) == {"score"} 

316 

317 (best_config, best_score, _context) = best_observation 

318 (llamatune_best_config, llamatune_best_score, _context) = llamatune_best_observation 

319 

320 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's 

321 # one, or close to that 

322 assert ( 

323 best_score.score.iloc[0] > llamatune_best_score.score.iloc[0] 

324 or best_score.score.iloc[0] + 1e-3 > llamatune_best_score.score.iloc[0] 

325 ) 

326 

327 # Retrieve and check all observations 

328 for all_configs, all_scores, all_contexts in ( 

329 optimizer.get_observations(), 

330 llamatune_optimizer.get_observations(), 

331 ): 

332 assert isinstance(all_configs, pd.DataFrame) 

333 assert isinstance(all_scores, pd.DataFrame) 

334 assert all_contexts is None 

335 assert set(all_configs.columns) == {"x", "y"} 

336 assert set(all_scores.columns) == {"score"} 

337 assert len(all_configs) == num_iters 

338 assert len(all_scores) == num_iters 

339 

340 # .surrogate_predict method not currently implemented if space adapter is employed 

341 if isinstance(llamatune_optimizer, BaseBayesianOptimizer): 

342 with pytest.raises(NotImplementedError): 

343 llamatune_optimizer.surrogate_predict(configs=llamatune_best_config) 

344 

345 

346# Dynamically determine all of the optimizers we have implemented. 

347# Note: these must be sorted. 

348optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses( 

349 BaseOptimizer, # type: ignore[type-abstract] 

350 pkg_name="mlos_core", 

351) 

352assert optimizer_subclasses 

353 

354 

355@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses) 

356def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: 

357 """Test that all optimizer classes are listed in the OptimizerType enum.""" 

358 optimizer_type_classes = {member.value for member in OptimizerType} 

359 assert optimizer_class in optimizer_type_classes 

360 

361 

362@pytest.mark.parametrize( 

363 ("optimizer_type", "kwargs"), 

364 [ 

365 # Default optimizer 

366 (None, {}), 

367 # Enumerate all supported Optimizers 

368 *[(member, {}) for member in OptimizerType], 

369 # Optimizer with non-empty kwargs argument 

370 ], 

371) 

372def test_mixed_numerics_type_input_space_types( 

373 optimizer_type: Optional[OptimizerType], 

374 kwargs: Optional[dict], 

375) -> None: 

376 """Toy problem to test the optimizers with mixed numeric types to ensure that 

377 original dtypes are retained. 

378 """ 

379 # pylint: disable=too-many-locals 

380 max_iterations = 10 

381 if kwargs is None: 

382 kwargs = {} 

383 

384 def objective(point: pd.DataFrame) -> pd.DataFrame: 

385 # mix of hyperparameters, optimal is to select the highest possible 

386 return pd.DataFrame({"score": point["x"] + point["y"]}) 

387 

388 input_space = CS.ConfigurationSpace(seed=SEED) 

389 # add a mix of numeric datatypes 

390 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5)) 

391 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0)) 

392 

393 if optimizer_type is None: 

394 optimizer = OptimizerFactory.create( 

395 parameter_space=input_space, 

396 optimization_targets=["score"], 

397 optimizer_kwargs=kwargs, 

398 ) 

399 else: 

400 optimizer = OptimizerFactory.create( 

401 parameter_space=input_space, 

402 optimization_targets=["score"], 

403 optimizer_type=optimizer_type, 

404 optimizer_kwargs=kwargs, 

405 ) 

406 

407 with pytest.raises(ValueError, match="No observations"): 

408 optimizer.get_best_observations() 

409 

410 with pytest.raises(ValueError, match="No observations"): 

411 optimizer.get_observations() 

412 

413 for _ in range(max_iterations): 

414 suggestion, metadata = optimizer.suggest() 

415 assert isinstance(suggestion, pd.DataFrame) 

416 assert (suggestion.columns == ["x", "y"]).all() 

417 # Check suggestion values are the expected dtype 

418 assert isinstance(suggestion["x"].iloc[0], np.integer) 

419 assert isinstance(suggestion["y"].iloc[0], np.floating) 

420 # Check that suggestion is in the space 

421 test_configuration = CS.Configuration( 

422 optimizer.parameter_space, suggestion.astype("O").iloc[0].to_dict() 

423 ) 

424 # Raises an error if outside of configuration space 

425 test_configuration.check_valid_configuration() 

426 # Test registering the suggested configuration with a score. 

427 observation = objective(suggestion) 

428 assert isinstance(observation, pd.DataFrame) 

429 optimizer.register(configs=suggestion, scores=observation, metadata=metadata) 

430 

431 (best_config, best_score, best_context) = optimizer.get_best_observations() 

432 assert isinstance(best_config, pd.DataFrame) 

433 assert isinstance(best_score, pd.DataFrame) 

434 assert best_context is None 

435 

436 (all_configs, all_scores, all_contexts) = optimizer.get_observations() 

437 assert isinstance(all_configs, pd.DataFrame) 

438 assert isinstance(all_scores, pd.DataFrame) 

439 assert all_contexts is None