Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 98%
119 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Contains the :py:class:`.BaseOptimizer` abstract class."""
7import collections
8from abc import ABCMeta, abstractmethod
9from copy import deepcopy
11import ConfigSpace
12import numpy as np
13import numpy.typing as npt
14import pandas as pd
16from mlos_core.data_classes import Observation, Observations, Suggestion
17from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
18from mlos_core.util import config_to_series
21class BaseOptimizer(metaclass=ABCMeta):
22 """Optimizer abstract base class defining the basic interface:
23 :py:meth:`~.BaseOptimizer.suggest`,
24 :py:meth:`~.BaseOptimizer.register`,
25 """
27 # pylint: disable=too-many-instance-attributes
29 def __init__(
30 self,
31 *,
32 parameter_space: ConfigSpace.ConfigurationSpace,
33 optimization_targets: list[str],
34 objective_weights: list[float] | None = None,
35 space_adapter: BaseSpaceAdapter | None = None,
36 ):
37 """
38 Create a new instance of the base optimizer.
40 Parameters
41 ----------
42 parameter_space : ConfigSpace.ConfigurationSpace
43 The parameter space to optimize.
44 optimization_targets : list[str]
45 The names of the optimization targets to minimize.
46 To maximize a target, use the negative of the target when registering scores.
47 objective_weights : Optional[list[float]]
48 Optional list of weights of optimization targets.
49 space_adapter : BaseSpaceAdapter
50 The space adapter class to employ for parameter space transformations.
51 """
52 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
53 """The parameter space to optimize."""
55 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = (
56 parameter_space if space_adapter is None else space_adapter.target_parameter_space
57 )
58 """
59 The parameter space actually used by the optimizer.
61 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used)
62 """
64 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space:
65 raise ValueError("Given parameter space differs from the one given to space adapter")
67 self._optimization_targets = optimization_targets
68 self._objective_weights = objective_weights
69 if objective_weights is not None and len(objective_weights) != len(optimization_targets):
70 raise ValueError("Number of weights must match the number of optimization targets")
72 self._space_adapter: BaseSpaceAdapter | None = space_adapter
73 self._observations: Observations = Observations()
74 self._has_context: bool | None = None
75 self._pending_observations: list[tuple[pd.DataFrame, pd.DataFrame | None]] = []
77 def __repr__(self) -> str:
78 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})"
80 @property
81 def space_adapter(self) -> BaseSpaceAdapter | None:
82 """Get the space adapter instance (if any)."""
83 return self._space_adapter
85 def register(
86 self,
87 observations: Observation | Observations,
88 ) -> None:
89 """
90 Register all observations at once. Exactly one of observations or observation
91 must be provided.
93 Parameters
94 ----------
95 observations: Optional[Union[Observation, Observations]]
96 The observations to register.
97 """
98 if isinstance(observations, Observation):
99 observations = Observations(observations=[observations])
100 # Check input and transform the observations if a space adapter is present.
101 observations = Observations(
102 observations=[
103 self._preprocess_observation(observation) for observation in observations
104 ]
105 )
106 # Now bulk register all observations (details delegated to the underlying classes).
107 self._register(observations)
109 def _preprocess_observation(self, observation: Observation) -> Observation:
110 """
111 Wrapper method, which employs the space adapter (if any), and does some input
112 validation, before registering the configs and scores.
114 Parameters
115 ----------
116 observation: Observation
117 The observation to register.
119 Returns
120 -------
121 observation: Observation
122 The (possibly transformed) observation to register.
123 """
124 # Do some input validation.
125 assert observation.metadata is None or isinstance(observation.metadata, pd.Series)
126 assert set(observation.score.index) == set(
127 self._optimization_targets
128 ), "Mismatched optimization targets."
129 assert self._has_context is None or self._has_context ^ (
130 observation.context is None
131 ), "Context must always be added or never be added."
132 assert len(observation.config) == len(
133 self.parameter_space.values()
134 ), "Mismatched configuration shape."
136 self._has_context = observation.context is not None
137 self._observations.append(observation)
139 transformed_observation = deepcopy(observation) # Needed to support named tuples
140 if self._space_adapter:
141 transformed_observation = Observation(
142 config=self._space_adapter.inverse_transform(transformed_observation.config),
143 score=transformed_observation.score,
144 context=transformed_observation.context,
145 metadata=transformed_observation.metadata,
146 )
147 assert len(transformed_observation.config) == len(
148 self.optimizer_parameter_space.values()
149 ), "Mismatched configuration shape after inverse transform."
150 return transformed_observation
152 @abstractmethod
153 def _register(
154 self,
155 observations: Observations,
156 ) -> None:
157 """
158 Registers the given configs and scores.
160 Parameters
161 ----------
162 observations: Observations
163 The set of observations to register.
164 """
165 pass # pylint: disable=unnecessary-pass # pragma: no cover
167 def suggest(
168 self,
169 *,
170 context: pd.Series | None = None,
171 defaults: bool = False,
172 ) -> Suggestion:
173 """
174 Wrapper method, which employs the space adapter (if any), after suggesting a new
175 configuration.
177 Parameters
178 ----------
179 context : pandas.Series
180 Not Yet Implemented.
181 defaults : bool
182 Whether or not to return the default config instead of an optimizer guided one.
183 By default, use the one from the optimizer.
185 Returns
186 -------
187 suggestion: Suggestion
188 The suggested point to evaluate.
189 """
190 if defaults:
191 configuration = config_to_series(self.parameter_space.get_default_configuration())
192 if self.space_adapter is not None:
193 configuration = self.space_adapter.inverse_transform(configuration)
194 suggestion = Suggestion(config=configuration, context=context, metadata=None)
195 else:
196 suggestion = self._suggest(context=context)
197 assert set(suggestion.config.index).issubset(set(self.optimizer_parameter_space)), (
198 "Optimizer suggested a configuration that does "
199 "not match the expected parameter space."
200 )
201 if self._space_adapter:
202 suggestion = Suggestion(
203 config=self._space_adapter.transform(suggestion.config),
204 context=suggestion.context,
205 metadata=suggestion.metadata,
206 )
207 assert set(suggestion.config.index).issubset(set(self.parameter_space)), (
208 "Space adapter produced a configuration that does "
209 "not match the expected parameter space."
210 )
211 return suggestion
213 @abstractmethod
214 def _suggest(
215 self,
216 *,
217 context: pd.Series | None = None,
218 ) -> Suggestion:
219 """
220 Suggests a new configuration.
222 Parameters
223 ----------
224 context : pandas.Series
225 Not Yet Implemented.
227 Returns
228 -------
229 suggestion: Suggestion
230 The suggestion to evaluate.
231 """
232 pass # pylint: disable=unnecessary-pass # pragma: no cover
234 @abstractmethod
235 def register_pending(self, pending: Suggestion) -> None:
236 """
237 Registers the given suggestion as "pending". That is it say, it has been
238 suggested by the optimizer, and an experiment trial has been started. This can
239 be useful for executing multiple trials in parallel, retry logic, etc.
241 Parameters
242 ----------
243 pending: Suggestion
244 The pending suggestion to register.
245 """
246 pass # pylint: disable=unnecessary-pass # pragma: no cover
248 def get_observations(self) -> Observations:
249 """
250 Returns the observations as a triplet of DataFrames (config, score, context).
252 Returns
253 -------
254 observations : Observations
255 All the observations registered so far.
256 """
257 if len(self._observations) == 0:
258 raise ValueError("No observations registered yet.")
259 return self._observations
261 def get_best_observations(
262 self,
263 n_max: int = 1,
264 ) -> Observations:
265 """
266 Get the N best observations so far as a filtered version of Observations.
267 Default is N=1. The columns are ordered in ASCENDING order of the optimization
268 targets. The function uses `pandas.DataFrame.nsmallest(..., keep="first")`
269 method under the hood.
271 Parameters
272 ----------
273 n_max : int
274 Maximum number of best observations to return. Default is 1.
276 Returns
277 -------
278 observations : Observations
279 A filtered version of Observations with the best N observations.
280 """
281 observations = self.get_observations()
282 if len(observations) == 0:
283 raise ValueError("No observations registered yet.")
285 idx = observations.scores.nsmallest(
286 n_max,
287 columns=self._optimization_targets,
288 keep="first",
289 ).index
290 return observations.filter_by_index(idx)
292 def cleanup(self) -> None:
293 """
294 Remove temp files, release resources, etc.
296 after use. Default is no-op. Redefine this method in optimizers that require
297 cleanup.
298 """
300 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
301 """Convert numpy array from one-hot encoding to a DataFrame with categoricals
302 and ints in proper columns.
303 """
304 df_dict = collections.defaultdict(list)
305 for i in range(config.shape[0]):
306 j = 0
307 for param in self.optimizer_parameter_space.values():
308 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
309 for offset, val in enumerate(param.choices):
310 if config[i][j + offset] == 1:
311 df_dict[param.name].append(val)
312 break
313 j += len(param.choices)
314 else:
315 val = config[i][j]
316 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter):
317 val = int(val)
318 df_dict[param.name].append(val)
319 j += 1
320 return pd.DataFrame(df_dict)
322 def _to_1hot(self, config: pd.DataFrame | pd.Series) -> npt.NDArray:
323 """Convert pandas DataFrame to one-hot-encoded numpy array."""
324 n_cols = 0
325 n_rows = config.shape[0] if config.ndim > 1 else 1
326 for param in self.optimizer_parameter_space.values():
327 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
328 n_cols += len(param.choices)
329 else:
330 n_cols += 1
331 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32)
332 for i in range(n_rows):
333 j = 0
334 for param in self.optimizer_parameter_space.values():
335 if config.ndim > 1:
336 assert isinstance(config, pd.DataFrame)
337 col = config.columns.get_loc(param.name)
338 assert isinstance(col, int)
339 val = config.iloc[i, col]
340 else:
341 assert isinstance(config, pd.Series)
342 col = config.index.get_loc(param.name)
343 assert isinstance(col, int)
344 val = config.iloc[col]
345 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
346 offset = param.choices.index(val)
347 one_hot[i][j + offset] = 1
348 j += len(param.choices)
349 else:
350 one_hot[i][j] = val
351 j += 1
352 return one_hot