Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 98%
207 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Tests for Bayesian Optimizers."""
7import logging
8from copy import deepcopy
9from typing import Any
11import ConfigSpace as CS
12import numpy as np
13import pandas as pd
14import pytest
16from mlos_core.data_classes import Observations, Suggestion
17from mlos_core.optimizers import (
18 BaseOptimizer,
19 ConcreteOptimizer,
20 OptimizerFactory,
21 OptimizerType,
22)
23from mlos_core.optimizers.bayesian_optimizers import (
24 BaseBayesianOptimizer,
25 SmacOptimizer,
26)
27from mlos_core.spaces.adapters import SpaceAdapterType
28from mlos_core.tests import SEED, get_all_concrete_subclasses
30_LOG = logging.getLogger(__name__)
31_LOG.setLevel(logging.DEBUG)
34@pytest.mark.parametrize(
35 ("optimizer_class", "kwargs"),
36 [
37 *[(member.value, {}) for member in OptimizerType],
38 ],
39)
40def test_create_optimizer_and_suggest(
41 configuration_space: CS.ConfigurationSpace,
42 optimizer_class: type[BaseOptimizer],
43 kwargs: dict | None,
44) -> None:
45 """Test that we can create an optimizer and get a suggestion from it."""
46 if kwargs is None:
47 kwargs = {}
48 optimizer = optimizer_class(
49 parameter_space=configuration_space,
50 optimization_targets=["score"],
51 **kwargs,
52 )
53 assert optimizer is not None
55 assert optimizer.parameter_space is not None
57 suggestion = optimizer.suggest()
58 assert suggestion is not None
60 myrepr = repr(optimizer)
61 assert myrepr.startswith(optimizer_class.__name__)
63 # pending not implemented
64 with pytest.raises(NotImplementedError):
65 optimizer.register_pending(pending=suggestion)
68@pytest.mark.parametrize(
69 ("optimizer_class", "kwargs"),
70 [
71 *[(member.value, {}) for member in OptimizerType],
72 ],
73)
74def test_basic_interface_toy_problem(
75 configuration_space: CS.ConfigurationSpace,
76 optimizer_class: type[BaseOptimizer],
77 kwargs: dict | None,
78) -> None:
79 """Toy problem to test the optimizers."""
80 # pylint: disable=too-many-locals
81 max_iterations = 20
82 if kwargs is None:
83 kwargs = {}
84 if optimizer_class == OptimizerType.SMAC.value:
85 # SMAC sets the initial random samples as a percentage of the max
86 # iterations, which defaults to 100.
87 # To avoid having to train more than 25 model iterations, we set a lower
88 # number of max iterations.
89 kwargs["max_trials"] = max_iterations * 2
91 def objective(inp: float) -> pd.Series:
92 series: pd.Series = pd.Series(
93 {"score": (6 * inp - 2) ** 2 * np.sin(12 * inp - 4)}
94 ) # needed for type hinting
95 return series
97 # Emukit doesn't allow specifying a random state, so we set the global seed.
98 np.random.seed(SEED)
99 optimizer = optimizer_class(
100 parameter_space=configuration_space,
101 optimization_targets=["score"],
102 **kwargs,
103 )
105 with pytest.raises(ValueError, match="No observations"):
106 optimizer.get_best_observations()
108 with pytest.raises(ValueError, match="No observations"):
109 optimizer.get_observations()
111 for _ in range(max_iterations):
112 suggestion = optimizer.suggest()
113 assert isinstance(suggestion, Suggestion)
114 assert isinstance(suggestion.config, pd.Series)
115 assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.Series)
116 assert set(suggestion.config.index) == {"x", "y", "z"}
117 # check that suggestion is in the space
118 dict_config: dict = suggestion.config.to_dict()
119 configuration = CS.Configuration(optimizer.parameter_space, dict_config)
120 # Raises an error if outside of configuration space
121 configuration.check_valid_configuration()
122 inp: Any = suggestion.config["x"]
123 assert isinstance(inp, (int, float))
124 observation = objective(inp)
125 assert isinstance(observation, pd.Series)
126 optimizer.register(observations=suggestion.complete(observation))
128 best_observation = optimizer.get_best_observations()
129 assert isinstance(best_observation, Observations)
130 assert isinstance(best_observation.configs, pd.DataFrame)
131 assert isinstance(best_observation.scores, pd.DataFrame)
132 assert best_observation.contexts is None
133 assert set(best_observation.configs.columns) == {"x", "y", "z"}
134 assert set(best_observation.scores.columns) == {"score"}
135 assert best_observation.configs.shape == (1, 3)
136 assert best_observation.scores.shape == (1, 1)
137 assert best_observation.scores.score.iloc[0] < -4
139 all_observations = optimizer.get_observations()
140 assert isinstance(all_observations, Observations)
141 assert isinstance(all_observations.configs, pd.DataFrame)
142 assert isinstance(all_observations.scores, pd.DataFrame)
143 assert all_observations.contexts is None
144 assert set(all_observations.configs.columns) == {"x", "y", "z"}
145 assert set(all_observations.scores.columns) == {"score"}
146 assert all_observations.configs.shape == (20, 3)
147 assert all_observations.scores.shape == (20, 1)
149 # It would be better to put this into bayesian_optimizer_test but then we'd have
150 # to refit the model
151 if isinstance(optimizer, BaseBayesianOptimizer):
152 pred_best = [
153 optimizer.surrogate_predict(suggestion=observation.to_suggestion())
154 for observation in best_observation
155 ]
156 assert len(pred_best) == 1
158 pred_all = [
159 optimizer.surrogate_predict(suggestion=observation.to_suggestion())
160 for observation in all_observations
161 ]
162 assert len(pred_all) == 20
165@pytest.mark.parametrize(
166 ("optimizer_type"),
167 [
168 # Enumerate all supported Optimizers
169 # *[member for member in OptimizerType],
170 *list(OptimizerType),
171 ],
172)
173def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None:
174 """Test that all optimizer types are listed in the ConcreteOptimizer constraints."""
175 assert optimizer_type.value in ConcreteOptimizer.__args__
178@pytest.mark.parametrize(
179 ("optimizer_type", "kwargs"),
180 [
181 # Default optimizer
182 (None, {}),
183 # Enumerate all supported Optimizers
184 *[(member, {}) for member in OptimizerType],
185 # Optimizer with non-empty kwargs argument
186 ],
187)
188def test_create_optimizer_with_factory_method(
189 configuration_space: CS.ConfigurationSpace,
190 optimizer_type: OptimizerType | None,
191 kwargs: dict | None,
192) -> None:
193 """Test that we can create an optimizer via a factory."""
194 if kwargs is None:
195 kwargs = {}
196 if optimizer_type is None:
197 optimizer = OptimizerFactory.create(
198 parameter_space=configuration_space,
199 optimization_targets=["score"],
200 optimizer_kwargs=kwargs,
201 )
202 else:
203 optimizer = OptimizerFactory.create(
204 parameter_space=configuration_space,
205 optimization_targets=["score"],
206 optimizer_type=optimizer_type,
207 optimizer_kwargs=kwargs,
208 )
209 assert optimizer is not None
211 assert optimizer.parameter_space is not None
213 suggestion = optimizer.suggest()
214 assert suggestion is not None
216 if optimizer_type is not None:
217 myrepr = repr(optimizer)
218 assert myrepr.startswith(optimizer_type.value.__name__)
221@pytest.mark.parametrize(
222 ("optimizer_type", "kwargs"),
223 [
224 # Enumerate all supported Optimizers
225 *[(member, {}) for member in OptimizerType],
226 # Optimizer with non-empty kwargs argument
227 (
228 OptimizerType.SMAC,
229 {
230 # Test with default config.
231 "use_default_config": True,
232 # 'n_random_init': 10,
233 },
234 ),
235 ],
236)
237def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: dict | None) -> None:
238 """Toy problem to test the optimizers with llamatune space adapter."""
239 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals
240 num_iters = 50
241 if kwargs is None:
242 kwargs = {}
244 def objective(point: pd.Series) -> pd.Series:
245 # Best value can be reached by tuning an 1-dimensional search space
246 ret: pd.Series = pd.Series({"score": np.sin(point.x * point.y)})
247 assert pd.notna(ret.score)
248 return ret
250 input_space = CS.ConfigurationSpace(seed=1234)
251 # Add two continuous inputs
252 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3))
253 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3))
255 # Initialize an optimizer that uses LlamaTune space adapter
256 space_adapter_kwargs = {
257 "num_low_dims": 1,
258 "special_param_values": None,
259 "max_unique_values_per_param": None,
260 }
262 # Make some adjustments to the kwargs for the optimizer and LlamaTuned
263 # optimizer for debug/testing.
265 # if optimizer_type == OptimizerType.SMAC:
266 # # Allow us to override the number of random init samples.
267 # kwargs['max_ratio'] = 1.0
268 optimizer_kwargs = deepcopy(kwargs)
269 llamatune_optimizer_kwargs = deepcopy(kwargs)
270 # if optimizer_type == OptimizerType.SMAC:
271 # optimizer_kwargs['n_random_init'] = 20
272 # llamatune_optimizer_kwargs['n_random_init'] = 10
274 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create(
275 parameter_space=input_space,
276 optimization_targets=["score"],
277 optimizer_type=optimizer_type,
278 optimizer_kwargs=llamatune_optimizer_kwargs,
279 space_adapter_type=SpaceAdapterType.LLAMATUNE,
280 space_adapter_kwargs=space_adapter_kwargs,
281 )
282 # Initialize an optimizer that uses the original space
283 optimizer: BaseOptimizer = OptimizerFactory.create(
284 parameter_space=input_space,
285 optimization_targets=["score"],
286 optimizer_type=optimizer_type,
287 optimizer_kwargs=optimizer_kwargs,
288 )
289 assert optimizer is not None
290 assert llamatune_optimizer is not None
291 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space
293 llamatune_n_random_init = 0
294 opt_n_random_init = int(kwargs.get("n_random_init", 0))
295 if optimizer_type == OptimizerType.SMAC:
296 assert isinstance(optimizer, SmacOptimizer)
297 assert isinstance(llamatune_optimizer, SmacOptimizer)
298 opt_n_random_init = optimizer.n_random_init
299 llamatune_n_random_init = llamatune_optimizer.n_random_init
301 for i in range(num_iters):
302 # Place to set a breakpoint for when the optimizer is done with random init.
303 if llamatune_n_random_init and i > llamatune_n_random_init:
304 _LOG.debug("LlamaTuned Optimizer is done with random init.")
305 if opt_n_random_init and i >= opt_n_random_init:
306 _LOG.debug("Optimizer is done with random init.")
308 # loop for optimizer
309 suggestion = optimizer.suggest()
310 observation = objective(suggestion.config)
311 optimizer.register(observations=suggestion.complete(observation))
313 # loop for llamatune-optimizer
314 suggestion = llamatune_optimizer.suggest()
315 _x, _y = suggestion.config["x"], suggestion.config["y"]
316 # optimizer explores 1-dimensional space
317 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3)
318 observation = objective(suggestion.config)
319 llamatune_optimizer.register(observations=suggestion.complete(observation))
321 # Retrieve best observations
322 best_observation: Observations = optimizer.get_best_observations()
323 assert isinstance(best_observation, Observations)
324 llamatune_best_observations: Observations = llamatune_optimizer.get_best_observations()
325 assert isinstance(llamatune_best_observations, Observations)
327 for observations in (best_observation, llamatune_best_observations):
328 assert isinstance(observations.configs, pd.DataFrame)
329 assert isinstance(observations.scores, pd.DataFrame)
330 assert observations.contexts is None
331 assert set(observations.configs.columns) == {"x", "y"}
332 assert set(observations.scores.columns) == {"score"}
334 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's
335 # one, or close to that
336 assert (
337 best_observation.scores.score.iloc[0] > llamatune_best_observations.scores.score.iloc[0]
338 or best_observation.scores.score.iloc[0] + 1e-3
339 > llamatune_best_observations.scores.score.iloc[0]
340 )
342 # Retrieve and check all observations
343 for all_observations in (
344 optimizer.get_observations(),
345 llamatune_optimizer.get_observations(),
346 ):
347 assert isinstance(all_observations.configs, pd.DataFrame)
348 assert isinstance(all_observations.scores, pd.DataFrame)
349 assert all_observations.contexts is None
350 assert set(all_observations.configs.columns) == {"x", "y"}
351 assert set(all_observations.scores.columns) == {"score"}
352 assert len(all_observations.configs) == num_iters
353 assert len(all_observations.scores) == num_iters
354 assert len(all_observations) == num_iters
356 # .surrogate_predict method not currently implemented if space adapter is employed
357 if isinstance(llamatune_optimizer, BaseBayesianOptimizer):
358 with pytest.raises(NotImplementedError):
359 for obs in llamatune_best_observations:
360 llamatune_optimizer.surrogate_predict(suggestion=obs.to_suggestion())
363# Dynamically determine all of the optimizers we have implemented.
364# Note: these must be sorted.
365optimizer_subclasses: list[type[BaseOptimizer]] = get_all_concrete_subclasses(
366 BaseOptimizer, # type: ignore[type-abstract]
367 pkg_name="mlos_core",
368)
369assert optimizer_subclasses
372@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses)
373def test_optimizer_type_defs(optimizer_class: type[BaseOptimizer]) -> None:
374 """Test that all optimizer classes are listed in the OptimizerType enum."""
375 optimizer_type_classes = {member.value for member in OptimizerType}
376 assert optimizer_class in optimizer_type_classes
379@pytest.mark.parametrize(
380 ("optimizer_type", "kwargs"),
381 [
382 # Default optimizer
383 (None, {}),
384 # Enumerate all supported Optimizers
385 *[(member, {}) for member in OptimizerType],
386 # Optimizer with non-empty kwargs argument
387 ],
388)
389def test_mixed_numerics_type_input_space_types(
390 optimizer_type: OptimizerType | None,
391 kwargs: dict | None,
392) -> None:
393 """Toy problem to test the optimizers with mixed numeric types to ensure that
394 original dtypes are retained.
395 """
396 # pylint: disable=too-many-locals
397 max_iterations = 10
398 if kwargs is None:
399 kwargs = {}
401 def objective(point: pd.Series) -> pd.Series:
402 # mix of hyperparameters, optimal is to select the highest possible
403 ret: pd.Series = pd.Series({"score": point["x"] + point["y"]})
404 return ret
406 input_space = CS.ConfigurationSpace(seed=SEED)
407 # add a mix of numeric datatypes
408 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5))
409 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0))
411 if optimizer_type is None:
412 optimizer = OptimizerFactory.create(
413 parameter_space=input_space,
414 optimization_targets=["score"],
415 optimizer_kwargs=kwargs,
416 )
417 else:
418 optimizer = OptimizerFactory.create(
419 parameter_space=input_space,
420 optimization_targets=["score"],
421 optimizer_type=optimizer_type,
422 optimizer_kwargs=kwargs,
423 )
425 assert isinstance(optimizer, BaseOptimizer)
427 with pytest.raises(ValueError, match="No observations"):
428 optimizer.get_best_observations()
430 with pytest.raises(ValueError, match="No observations"):
431 optimizer.get_observations()
433 for _ in range(max_iterations):
434 suggestion = optimizer.suggest()
435 assert isinstance(suggestion, Suggestion)
436 assert isinstance(suggestion.config, pd.Series)
437 assert set(suggestion.config.index) == {"x", "y"}
438 # Check suggestion values are the expected dtype
439 assert isinstance(suggestion.config["x"], int)
440 assert isinstance(suggestion.config["y"], float)
441 # Check that suggestion is in the space
442 test_configuration = CS.Configuration(
443 optimizer.parameter_space, suggestion.config.to_dict()
444 )
445 # Raises an error if outside of configuration space
446 test_configuration.check_valid_configuration()
447 # Test registering the suggested configuration with a score.
448 observation = objective(suggestion.config)
449 assert isinstance(observation, pd.Series)
450 optimizer.register(observations=suggestion.complete(observation))
452 best_observations = optimizer.get_best_observations()
453 assert isinstance(best_observations.configs, pd.DataFrame)
454 assert isinstance(best_observations.scores, pd.DataFrame)
455 assert best_observations.contexts is None
457 all_observations = optimizer.get_observations()
458 assert isinstance(all_observations.configs, pd.DataFrame)
459 assert isinstance(all_observations.scores, pd.DataFrame)
460 assert all_observations.contexts is None