Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 98%
207 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-14 01:58 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Tests for Bayesian Optimizers."""
7import logging
8from copy import deepcopy
9from typing import Any, List, Optional, Type
11import ConfigSpace as CS
12import numpy as np
13import pandas as pd
14import pytest
16from mlos_core.data_classes import Observations, Suggestion
17from mlos_core.optimizers import (
18 BaseOptimizer,
19 ConcreteOptimizer,
20 OptimizerFactory,
21 OptimizerType,
22)
23from mlos_core.optimizers.bayesian_optimizers import (
24 BaseBayesianOptimizer,
25 SmacOptimizer,
26)
27from mlos_core.spaces.adapters import SpaceAdapterType
28from mlos_core.tests import SEED, get_all_concrete_subclasses
30_LOG = logging.getLogger(__name__)
31_LOG.setLevel(logging.DEBUG)
34@pytest.mark.parametrize(
35 ("optimizer_class", "kwargs"),
36 [
37 *[(member.value, {}) for member in OptimizerType],
38 ],
39)
40def test_create_optimizer_and_suggest(
41 configuration_space: CS.ConfigurationSpace,
42 optimizer_class: Type[BaseOptimizer],
43 kwargs: Optional[dict],
44) -> None:
45 """Test that we can create an optimizer and get a suggestion from it."""
46 if kwargs is None:
47 kwargs = {}
48 optimizer = optimizer_class(
49 parameter_space=configuration_space,
50 optimization_targets=["score"],
51 **kwargs,
52 )
53 assert optimizer is not None
55 assert optimizer.parameter_space is not None
57 suggestion = optimizer.suggest()
58 assert suggestion is not None
60 myrepr = repr(optimizer)
61 assert myrepr.startswith(optimizer_class.__name__)
63 # pending not implemented
64 with pytest.raises(NotImplementedError):
65 optimizer.register_pending(pending=suggestion)
68@pytest.mark.parametrize(
69 ("optimizer_class", "kwargs"),
70 [
71 *[(member.value, {}) for member in OptimizerType],
72 ],
73)
74def test_basic_interface_toy_problem(
75 configuration_space: CS.ConfigurationSpace,
76 optimizer_class: Type[BaseOptimizer],
77 kwargs: Optional[dict],
78) -> None:
79 """Toy problem to test the optimizers."""
80 # pylint: disable=too-many-locals
81 max_iterations = 20
82 if kwargs is None:
83 kwargs = {}
84 if optimizer_class == OptimizerType.SMAC.value:
85 # SMAC sets the initial random samples as a percentage of the max
86 # iterations, which defaults to 100.
87 # To avoid having to train more than 25 model iterations, we set a lower
88 # number of max iterations.
89 kwargs["max_trials"] = max_iterations * 2
91 def objective(inp: float) -> pd.Series:
92 series: pd.Series = pd.Series(
93 {"score": (6 * inp - 2) ** 2 * np.sin(12 * inp - 4)}
94 ) # needed for type hinting
95 return series
97 # Emukit doesn't allow specifying a random state, so we set the global seed.
98 np.random.seed(SEED)
99 optimizer = optimizer_class(
100 parameter_space=configuration_space,
101 optimization_targets=["score"],
102 **kwargs,
103 )
105 with pytest.raises(ValueError, match="No observations"):
106 optimizer.get_best_observations()
108 with pytest.raises(ValueError, match="No observations"):
109 optimizer.get_observations()
111 for _ in range(max_iterations):
112 suggestion = optimizer.suggest()
113 assert isinstance(suggestion, Suggestion)
114 assert isinstance(suggestion.config, pd.Series)
115 assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.Series)
116 assert set(suggestion.config.index) == {"x", "y", "z"}
117 # check that suggestion is in the space
118 dict_config: dict = suggestion.config.to_dict()
119 configuration = CS.Configuration(optimizer.parameter_space, dict_config)
120 # Raises an error if outside of configuration space
121 configuration.check_valid_configuration()
122 inp: Any = suggestion.config["x"]
123 assert isinstance(inp, (int, float))
124 observation = objective(inp)
125 assert isinstance(observation, pd.Series)
126 optimizer.register(observations=suggestion.complete(observation))
128 best_observation = optimizer.get_best_observations()
129 assert isinstance(best_observation, Observations)
130 assert isinstance(best_observation.configs, pd.DataFrame)
131 assert isinstance(best_observation.scores, pd.DataFrame)
132 assert best_observation.contexts is None
133 assert set(best_observation.configs.columns) == {"x", "y", "z"}
134 assert set(best_observation.scores.columns) == {"score"}
135 assert best_observation.configs.shape == (1, 3)
136 assert best_observation.scores.shape == (1, 1)
137 assert best_observation.scores.score.iloc[0] < -4
139 all_observations = optimizer.get_observations()
140 assert isinstance(all_observations, Observations)
141 assert isinstance(all_observations.configs, pd.DataFrame)
142 assert isinstance(all_observations.scores, pd.DataFrame)
143 assert all_observations.contexts is None
144 assert set(all_observations.configs.columns) == {"x", "y", "z"}
145 assert set(all_observations.scores.columns) == {"score"}
146 assert all_observations.configs.shape == (20, 3)
147 assert all_observations.scores.shape == (20, 1)
149 # It would be better to put this into bayesian_optimizer_test but then we'd have
150 # to refit the model
151 if isinstance(optimizer, BaseBayesianOptimizer):
152 pred_best = [
153 optimizer.surrogate_predict(suggestion=observation.to_suggestion())
154 for observation in best_observation
155 ]
156 assert len(pred_best) == 1
158 pred_all = [
159 optimizer.surrogate_predict(suggestion=observation.to_suggestion())
160 for observation in all_observations
161 ]
162 assert len(pred_all) == 20
165@pytest.mark.parametrize(
166 ("optimizer_type"),
167 [
168 # Enumerate all supported Optimizers
169 # *[member for member in OptimizerType],
170 *list(OptimizerType),
171 ],
172)
173def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None:
174 """Test that all optimizer types are listed in the ConcreteOptimizer constraints."""
175 # pylint: disable=no-member
176 assert optimizer_type.value in ConcreteOptimizer.__constraints__
179@pytest.mark.parametrize(
180 ("optimizer_type", "kwargs"),
181 [
182 # Default optimizer
183 (None, {}),
184 # Enumerate all supported Optimizers
185 *[(member, {}) for member in OptimizerType],
186 # Optimizer with non-empty kwargs argument
187 ],
188)
189def test_create_optimizer_with_factory_method(
190 configuration_space: CS.ConfigurationSpace,
191 optimizer_type: Optional[OptimizerType],
192 kwargs: Optional[dict],
193) -> None:
194 """Test that we can create an optimizer via a factory."""
195 if kwargs is None:
196 kwargs = {}
197 if optimizer_type is None:
198 optimizer = OptimizerFactory.create(
199 parameter_space=configuration_space,
200 optimization_targets=["score"],
201 optimizer_kwargs=kwargs,
202 )
203 else:
204 optimizer = OptimizerFactory.create(
205 parameter_space=configuration_space,
206 optimization_targets=["score"],
207 optimizer_type=optimizer_type,
208 optimizer_kwargs=kwargs,
209 )
210 assert optimizer is not None
212 assert optimizer.parameter_space is not None
214 suggestion = optimizer.suggest()
215 assert suggestion is not None
217 if optimizer_type is not None:
218 myrepr = repr(optimizer)
219 assert myrepr.startswith(optimizer_type.value.__name__)
222@pytest.mark.parametrize(
223 ("optimizer_type", "kwargs"),
224 [
225 # Enumerate all supported Optimizers
226 *[(member, {}) for member in OptimizerType],
227 # Optimizer with non-empty kwargs argument
228 (
229 OptimizerType.SMAC,
230 {
231 # Test with default config.
232 "use_default_config": True,
233 # 'n_random_init': 10,
234 },
235 ),
236 ],
237)
238def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None:
239 """Toy problem to test the optimizers with llamatune space adapter."""
240 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals
241 num_iters = 50
242 if kwargs is None:
243 kwargs = {}
245 def objective(point: pd.Series) -> pd.Series:
246 # Best value can be reached by tuning an 1-dimensional search space
247 ret: pd.Series = pd.Series({"score": np.sin(point.x * point.y)})
248 assert pd.notna(ret.score)
249 return ret
251 input_space = CS.ConfigurationSpace(seed=1234)
252 # Add two continuous inputs
253 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3))
254 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3))
256 # Initialize an optimizer that uses LlamaTune space adapter
257 space_adapter_kwargs = {
258 "num_low_dims": 1,
259 "special_param_values": None,
260 "max_unique_values_per_param": None,
261 }
263 # Make some adjustments to the kwargs for the optimizer and LlamaTuned
264 # optimizer for debug/testing.
266 # if optimizer_type == OptimizerType.SMAC:
267 # # Allow us to override the number of random init samples.
268 # kwargs['max_ratio'] = 1.0
269 optimizer_kwargs = deepcopy(kwargs)
270 llamatune_optimizer_kwargs = deepcopy(kwargs)
271 # if optimizer_type == OptimizerType.SMAC:
272 # optimizer_kwargs['n_random_init'] = 20
273 # llamatune_optimizer_kwargs['n_random_init'] = 10
275 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create(
276 parameter_space=input_space,
277 optimization_targets=["score"],
278 optimizer_type=optimizer_type,
279 optimizer_kwargs=llamatune_optimizer_kwargs,
280 space_adapter_type=SpaceAdapterType.LLAMATUNE,
281 space_adapter_kwargs=space_adapter_kwargs,
282 )
283 # Initialize an optimizer that uses the original space
284 optimizer: BaseOptimizer = OptimizerFactory.create(
285 parameter_space=input_space,
286 optimization_targets=["score"],
287 optimizer_type=optimizer_type,
288 optimizer_kwargs=optimizer_kwargs,
289 )
290 assert optimizer is not None
291 assert llamatune_optimizer is not None
292 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space
294 llamatune_n_random_init = 0
295 opt_n_random_init = int(kwargs.get("n_random_init", 0))
296 if optimizer_type == OptimizerType.SMAC:
297 assert isinstance(optimizer, SmacOptimizer)
298 assert isinstance(llamatune_optimizer, SmacOptimizer)
299 opt_n_random_init = optimizer.n_random_init
300 llamatune_n_random_init = llamatune_optimizer.n_random_init
302 for i in range(num_iters):
303 # Place to set a breakpoint for when the optimizer is done with random init.
304 if llamatune_n_random_init and i > llamatune_n_random_init:
305 _LOG.debug("LlamaTuned Optimizer is done with random init.")
306 if opt_n_random_init and i >= opt_n_random_init:
307 _LOG.debug("Optimizer is done with random init.")
309 # loop for optimizer
310 suggestion = optimizer.suggest()
311 observation = objective(suggestion.config)
312 optimizer.register(observations=suggestion.complete(observation))
314 # loop for llamatune-optimizer
315 suggestion = llamatune_optimizer.suggest()
316 _x, _y = suggestion.config["x"], suggestion.config["y"]
317 # optimizer explores 1-dimensional space
318 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3)
319 observation = objective(suggestion.config)
320 llamatune_optimizer.register(observations=suggestion.complete(observation))
322 # Retrieve best observations
323 best_observation: Observations = optimizer.get_best_observations()
324 assert isinstance(best_observation, Observations)
325 llamatune_best_observations: Observations = llamatune_optimizer.get_best_observations()
326 assert isinstance(llamatune_best_observations, Observations)
328 for observations in (best_observation, llamatune_best_observations):
329 assert isinstance(observations.configs, pd.DataFrame)
330 assert isinstance(observations.scores, pd.DataFrame)
331 assert observations.contexts is None
332 assert set(observations.configs.columns) == {"x", "y"}
333 assert set(observations.scores.columns) == {"score"}
335 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's
336 # one, or close to that
337 assert (
338 best_observation.scores.score.iloc[0] > llamatune_best_observations.scores.score.iloc[0]
339 or best_observation.scores.score.iloc[0] + 1e-3
340 > llamatune_best_observations.scores.score.iloc[0]
341 )
343 # Retrieve and check all observations
344 for all_observations in (
345 optimizer.get_observations(),
346 llamatune_optimizer.get_observations(),
347 ):
348 assert isinstance(all_observations.configs, pd.DataFrame)
349 assert isinstance(all_observations.scores, pd.DataFrame)
350 assert all_observations.contexts is None
351 assert set(all_observations.configs.columns) == {"x", "y"}
352 assert set(all_observations.scores.columns) == {"score"}
353 assert len(all_observations.configs) == num_iters
354 assert len(all_observations.scores) == num_iters
355 assert len(all_observations) == num_iters
357 # .surrogate_predict method not currently implemented if space adapter is employed
358 if isinstance(llamatune_optimizer, BaseBayesianOptimizer):
359 with pytest.raises(NotImplementedError):
360 for obs in llamatune_best_observations:
361 llamatune_optimizer.surrogate_predict(suggestion=obs.to_suggestion())
364# Dynamically determine all of the optimizers we have implemented.
365# Note: these must be sorted.
366optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses(
367 BaseOptimizer, # type: ignore[type-abstract]
368 pkg_name="mlos_core",
369)
370assert optimizer_subclasses
373@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses)
374def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None:
375 """Test that all optimizer classes are listed in the OptimizerType enum."""
376 optimizer_type_classes = {member.value for member in OptimizerType}
377 assert optimizer_class in optimizer_type_classes
380@pytest.mark.parametrize(
381 ("optimizer_type", "kwargs"),
382 [
383 # Default optimizer
384 (None, {}),
385 # Enumerate all supported Optimizers
386 *[(member, {}) for member in OptimizerType],
387 # Optimizer with non-empty kwargs argument
388 ],
389)
390def test_mixed_numerics_type_input_space_types(
391 optimizer_type: Optional[OptimizerType],
392 kwargs: Optional[dict],
393) -> None:
394 """Toy problem to test the optimizers with mixed numeric types to ensure that
395 original dtypes are retained.
396 """
397 # pylint: disable=too-many-locals
398 max_iterations = 10
399 if kwargs is None:
400 kwargs = {}
402 def objective(point: pd.Series) -> pd.Series:
403 # mix of hyperparameters, optimal is to select the highest possible
404 ret: pd.Series = pd.Series({"score": point["x"] + point["y"]})
405 return ret
407 input_space = CS.ConfigurationSpace(seed=SEED)
408 # add a mix of numeric datatypes
409 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5))
410 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0))
412 if optimizer_type is None:
413 optimizer = OptimizerFactory.create(
414 parameter_space=input_space,
415 optimization_targets=["score"],
416 optimizer_kwargs=kwargs,
417 )
418 else:
419 optimizer = OptimizerFactory.create(
420 parameter_space=input_space,
421 optimization_targets=["score"],
422 optimizer_type=optimizer_type,
423 optimizer_kwargs=kwargs,
424 )
426 assert isinstance(optimizer, BaseOptimizer)
428 with pytest.raises(ValueError, match="No observations"):
429 optimizer.get_best_observations()
431 with pytest.raises(ValueError, match="No observations"):
432 optimizer.get_observations()
434 for _ in range(max_iterations):
435 suggestion = optimizer.suggest()
436 assert isinstance(suggestion, Suggestion)
437 assert isinstance(suggestion.config, pd.Series)
438 assert set(suggestion.config.index) == {"x", "y"}
439 # Check suggestion values are the expected dtype
440 assert isinstance(suggestion.config["x"], int)
441 assert isinstance(suggestion.config["y"], float)
442 # Check that suggestion is in the space
443 test_configuration = CS.Configuration(
444 optimizer.parameter_space, suggestion.config.to_dict()
445 )
446 # Raises an error if outside of configuration space
447 test_configuration.check_valid_configuration()
448 # Test registering the suggested configuration with a score.
449 observation = objective(suggestion.config)
450 assert isinstance(observation, pd.Series)
451 optimizer.register(observations=suggestion.complete(observation))
453 best_observations = optimizer.get_best_observations()
454 assert isinstance(best_observations.configs, pd.DataFrame)
455 assert isinstance(best_observations.scores, pd.DataFrame)
456 assert best_observations.contexts is None
458 all_observations = optimizer.get_observations()
459 assert isinstance(all_observations.configs, pd.DataFrame)
460 assert isinstance(all_observations.scores, pd.DataFrame)
461 assert all_observations.contexts is None