Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%
117 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Contains the BaseOptimizer abstract class."""
7import collections
8from abc import ABCMeta, abstractmethod
9from typing import List, Optional, Tuple, Union
11import ConfigSpace
12import numpy as np
13import numpy.typing as npt
14import pandas as pd
16from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
17from mlos_core.util import config_to_dataframe
20class BaseOptimizer(metaclass=ABCMeta):
21 """Optimizer abstract base class defining the basic interface."""
23 # pylint: disable=too-many-instance-attributes
25 def __init__(
26 self,
27 *,
28 parameter_space: ConfigSpace.ConfigurationSpace,
29 optimization_targets: List[str],
30 objective_weights: Optional[List[float]] = None,
31 space_adapter: Optional[BaseSpaceAdapter] = None,
32 ):
33 """
34 Create a new instance of the base optimizer.
36 Parameters
37 ----------
38 parameter_space : ConfigSpace.ConfigurationSpace
39 The parameter space to optimize.
40 optimization_targets : List[str]
41 The names of the optimization targets to minimize.
42 objective_weights : Optional[List[float]]
43 Optional list of weights of optimization targets.
44 space_adapter : BaseSpaceAdapter
45 The space adapter class to employ for parameter space transformations.
46 """
47 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
48 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = (
49 parameter_space if space_adapter is None else space_adapter.target_parameter_space
50 )
52 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space:
53 raise ValueError("Given parameter space differs from the one given to space adapter")
55 self._optimization_targets = optimization_targets
56 self._objective_weights = objective_weights
57 if objective_weights is not None and len(objective_weights) != len(optimization_targets):
58 raise ValueError("Number of weights must match the number of optimization targets")
60 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter
61 self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = []
62 self._has_context: Optional[bool] = None
63 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = []
65 def __repr__(self) -> str:
66 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})"
68 @property
69 def space_adapter(self) -> Optional[BaseSpaceAdapter]:
70 """Get the space adapter instance (if any)."""
71 return self._space_adapter
73 def register(
74 self,
75 *,
76 configs: pd.DataFrame,
77 scores: pd.DataFrame,
78 context: Optional[pd.DataFrame] = None,
79 metadata: Optional[pd.DataFrame] = None,
80 ) -> None:
81 """
82 Wrapper method, which employs the space adapter (if any), before registering the
83 configs and scores.
85 Parameters
86 ----------
87 configs : pd.DataFrame
88 Dataframe of configs / parameters. The columns are parameter names and
89 the rows are the configs.
90 scores : pd.DataFrame
91 Scores from running the configs. The index is the same as the index of the configs.
93 context : pd.DataFrame
94 Not Yet Implemented.
96 metadata : Optional[pd.DataFrame]
97 Metadata returned by the backend optimizer's suggest method.
98 """
99 # Do some input validation.
100 assert metadata is None or isinstance(metadata, pd.DataFrame)
101 assert set(scores.columns) == set(
102 self._optimization_targets
103 ), "Mismatched optimization targets."
104 assert self._has_context is None or self._has_context ^ (
105 context is None
106 ), "Context must always be added or never be added."
107 assert len(configs) == len(scores), "Mismatched number of configs and scores."
108 if context is not None:
109 assert len(configs) == len(context), "Mismatched number of configs and context."
110 assert configs.shape[1] == len(
111 self.parameter_space.values()
112 ), "Mismatched configuration shape."
113 self._observations.append((configs, scores, context))
114 self._has_context = context is not None
116 if self._space_adapter:
117 configs = self._space_adapter.inverse_transform(configs)
118 assert configs.shape[1] == len(
119 self.optimizer_parameter_space.values()
120 ), "Mismatched configuration shape after inverse transform."
121 return self._register(configs=configs, scores=scores, context=context)
123 @abstractmethod
124 def _register(
125 self,
126 *,
127 configs: pd.DataFrame,
128 scores: pd.DataFrame,
129 context: Optional[pd.DataFrame] = None,
130 metadata: Optional[pd.DataFrame] = None,
131 ) -> None:
132 """
133 Registers the given configs and scores.
135 Parameters
136 ----------
137 configs : pd.DataFrame
138 Dataframe of configs / parameters. The columns are parameter names and
139 the rows are the configs.
140 scores : pd.DataFrame
141 Scores from running the configs. The index is the same as the index of the configs.
143 context : pd.DataFrame
144 Not Yet Implemented.
145 """
146 pass # pylint: disable=unnecessary-pass # pragma: no cover
148 def suggest(
149 self,
150 *,
151 context: Optional[pd.DataFrame] = None,
152 defaults: bool = False,
153 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
154 """
155 Wrapper method, which employs the space adapter (if any), after suggesting a new
156 configuration.
158 Parameters
159 ----------
160 context : pd.DataFrame
161 Not Yet Implemented.
162 defaults : bool
163 Whether or not to return the default config instead of an optimizer guided one.
164 By default, use the one from the optimizer.
166 Returns
167 -------
168 configuration : pd.DataFrame
169 Pandas dataframe with a single row. Column names are the parameter names.
171 metadata : Optional[pd.DataFrame]
172 The metadata associated with the given configuration used for evaluations.
173 Backend optimizer specific.
174 """
175 if defaults:
176 configuration = config_to_dataframe(self.parameter_space.get_default_configuration())
177 metadata = None
178 if self.space_adapter is not None:
179 configuration = self.space_adapter.inverse_transform(configuration)
180 else:
181 configuration, metadata = self._suggest(context=context)
182 assert len(configuration) == 1, "Suggest must return a single configuration."
183 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), (
184 "Optimizer suggested a configuration that does "
185 "not match the expected parameter space."
186 )
187 if self._space_adapter:
188 configuration = self._space_adapter.transform(configuration)
189 assert set(configuration.columns).issubset(set(self.parameter_space)), (
190 "Space adapter produced a configuration that does "
191 "not match the expected parameter space."
192 )
193 return configuration, metadata
195 @abstractmethod
196 def _suggest(
197 self,
198 *,
199 context: Optional[pd.DataFrame] = None,
200 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
201 """
202 Suggests a new configuration.
204 Parameters
205 ----------
206 context : pd.DataFrame
207 Not Yet Implemented.
209 Returns
210 -------
211 configuration : pd.DataFrame
212 Pandas dataframe with a single row. Column names are the parameter names.
214 metadata : Optional[pd.DataFrame]
215 The metadata associated with the given configuration used for evaluations.
216 Backend optimizer specific.
217 """
218 pass # pylint: disable=unnecessary-pass # pragma: no cover
220 @abstractmethod
221 def register_pending(
222 self,
223 *,
224 configs: pd.DataFrame,
225 context: Optional[pd.DataFrame] = None,
226 metadata: Optional[pd.DataFrame] = None,
227 ) -> None:
228 """
229 Registers the given configs as "pending". That is it say, it has been suggested
230 by the optimizer, and an experiment trial has been started. This can be useful
231 for executing multiple trials in parallel, retry logic, etc.
233 Parameters
234 ----------
235 configs : pd.DataFrame
236 Dataframe of configs / parameters. The columns are parameter names and
237 the rows are the configs.
238 context : pd.DataFrame
239 Not Yet Implemented.
240 metadata : Optional[pd.DataFrame]
241 Metadata returned by the backend optimizer's suggest method.
242 """
243 pass # pylint: disable=unnecessary-pass # pragma: no cover
245 def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
246 """
247 Returns the observations as a triplet of DataFrames (config, score, context).
249 Returns
250 -------
251 observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]
252 A triplet of (config, score, context) DataFrames of observations.
253 """
254 if len(self._observations) == 0:
255 raise ValueError("No observations registered yet.")
256 configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True)
257 scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True)
258 contexts = pd.concat(
259 [
260 pd.DataFrame() if context is None else context
261 for _, _, context in self._observations
262 ]
263 ).reset_index(drop=True)
264 return (configs, scores, contexts if len(contexts.columns) > 0 else None)
266 def get_best_observations(
267 self,
268 *,
269 n_max: int = 1,
270 ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
271 """
272 Get the N best observations so far as a triplet of DataFrames (config, score,
273 context). Default is N=1. The columns are ordered in ASCENDING order of the
274 optimization targets. The function uses `pandas.DataFrame.nsmallest(...,
275 keep="first")` method under the hood.
277 Parameters
278 ----------
279 n_max : int
280 Maximum number of best observations to return. Default is 1.
282 Returns
283 -------
284 observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]
285 A triplet of best (config, score, context) DataFrames of best observations.
286 """
287 if len(self._observations) == 0:
288 raise ValueError("No observations registered yet.")
289 (configs, scores, contexts) = self.get_observations()
290 idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index
291 return (configs.loc[idx], scores.loc[idx], None if contexts is None else contexts.loc[idx])
293 def cleanup(self) -> None:
294 """
295 Remove temp files, release resources, etc.
297 after use. Default is no-op. Redefine this method in optimizers that require
298 cleanup.
299 """
301 def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame:
302 """Convert numpy array from one-hot encoding to a DataFrame with categoricals
303 and ints in proper columns.
304 """
305 df_dict = collections.defaultdict(list)
306 for i in range(config.shape[0]):
307 j = 0
308 for param in self.optimizer_parameter_space.values():
309 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
310 for offset, val in enumerate(param.choices):
311 if config[i][j + offset] == 1:
312 df_dict[param.name].append(val)
313 break
314 j += len(param.choices)
315 else:
316 val = config[i][j]
317 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter):
318 val = int(val)
319 df_dict[param.name].append(val)
320 j += 1
321 return pd.DataFrame(df_dict)
323 def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
324 """Convert pandas DataFrame to one-hot-encoded numpy array."""
325 n_cols = 0
326 n_rows = config.shape[0] if config.ndim > 1 else 1
327 for param in self.optimizer_parameter_space.values():
328 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
329 n_cols += len(param.choices)
330 else:
331 n_cols += 1
332 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32)
333 for i in range(n_rows):
334 j = 0
335 for param in self.optimizer_parameter_space.values():
336 if config.ndim > 1:
337 assert isinstance(config, pd.DataFrame)
338 col = config.columns.get_loc(param.name)
339 assert isinstance(col, int)
340 val = config.iloc[i, col]
341 else:
342 assert isinstance(config, pd.Series)
343 col = config.index.get_loc(param.name)
344 assert isinstance(col, int)
345 val = config.iloc[col]
346 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
347 offset = param.choices.index(val)
348 one_hot[i][j + offset] = 1
349 j += len(param.choices)
350 else:
351 one_hot[i][j] = val
352 j += 1
353 return one_hot