Coverage for mlos_core/mlos_core/data_classes.py: 93%
149 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-21 01:50 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Data classes for ``mlos_core`` used to pass around configurations, observations, and
7suggestions.
9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame`
10s and :external:py:class:`~pandas.Series` to represent configurations and scores and
11context (information about where the configuration was evaluated).
13These modules encapsulate tuples of those for easier passing around and manipulation.
14"""
15from collections.abc import Iterable, Iterator
16from typing import Any
18import pandas as pd
19from ConfigSpace import Configuration, ConfigurationSpace
21from mlos_core.util import compare_optional_dataframe, compare_optional_series
24class Observation:
25 """A single observation of a configuration."""
27 def __init__(
28 self,
29 *,
30 config: pd.Series,
31 score: pd.Series = pd.Series(),
32 context: pd.Series | None = None,
33 metadata: pd.Series | None = None,
34 ):
35 """
36 Creates a new Observation object.
38 Parameters
39 ----------
40 config : pandas.Series
41 The configuration observed.
42 score : pandas.Series
43 The score metrics observed.
44 context : pandas.Series | None
45 The context in which the configuration was evaluated.
46 Not Yet Implemented.
47 metadata: pandas.Series | None
48 The metadata in which the configuration was evaluated
49 """
50 self._config = config
51 self._score = score
52 self._context = context
53 self._metadata = metadata
55 @property
56 def config(self) -> pd.Series:
57 """Gets (a copy of) the config of the Observation."""
58 return self._config.copy()
60 @property
61 def score(self) -> pd.Series:
62 """Gets (a copy of) the score of the Observation."""
63 return self._score.copy()
65 @property
66 def context(self) -> pd.Series | None:
67 """Gets (a copy of) the context of the Observation."""
68 return self._context.copy() if self._context is not None else None
70 @property
71 def metadata(self) -> pd.Series | None:
72 """Gets (a copy of) the metadata of the Observation."""
73 return self._metadata.copy() if self._metadata is not None else None
75 def to_suggestion(self) -> "Suggestion":
76 """
77 Converts the observation to a suggestion.
79 Returns
80 -------
81 Suggestion
82 The suggestion.
83 """
84 return Suggestion(
85 config=self.config,
86 context=self.context,
87 metadata=self.metadata,
88 )
90 def __repr__(self) -> str:
91 return (
92 f"Observation(config={self._config}, score={self._score}, "
93 f"context={self._context}, metadata={self._metadata})"
94 )
96 def __eq__(self, other: Any) -> bool:
97 if not isinstance(other, Observation):
98 return False
100 if not self._config.equals(other._config):
101 return False
102 if not self._score.equals(other._score):
103 return False
104 if not compare_optional_series(self._context, other._context):
105 return False
106 if not compare_optional_series(self._metadata, other._metadata):
107 return False
109 return True
111 def __ne__(self, other: Any) -> bool:
112 return not self.__eq__(other)
115class Observations:
116 """A set of observations of a configuration scores."""
118 def __init__( # pylint: disable=too-many-arguments
119 self,
120 *,
121 configs: pd.DataFrame = pd.DataFrame(),
122 scores: pd.DataFrame = pd.DataFrame(),
123 contexts: pd.DataFrame | None = None,
124 metadata: pd.DataFrame | None = None,
125 observations: Iterable[Observation] | None = None,
126 ):
127 """
128 Creates a new Observation object.
130 Can accept either a set of Observations or a collection of aligned config and
131 score (and optionally context) dataframes.
133 If both are provided the two sets will be merged.
135 Parameters
136 ----------
137 configs : pandas.DataFrame
138 Pandas dataframe containing configurations. Column names are the parameter names.
139 scores : pandas.DataFrame
140 The score metrics observed in a dataframe.
141 contexts : pandas.DataFrame | None
142 The context in which the configuration was evaluated.
143 Not Yet Implemented.
144 metadata: pandas.DataFrame | None
145 The metadata in which the configuration was evaluated
146 Not Yet Implemented.
147 """
148 if observations is None:
149 observations = []
150 if any(observations):
151 configs = pd.concat([obs.config.to_frame().T for obs in observations])
152 scores = pd.concat([obs.score.to_frame().T for obs in observations])
154 if sum(obs.context is None for obs in observations) == 0:
155 contexts = pd.concat(
156 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr]
157 )
158 else:
159 contexts = None
160 if sum(obs.metadata is None for obs in observations) == 0:
161 metadata = pd.concat(
162 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr]
163 )
164 else:
165 metadata = None
166 assert len(configs.index) == len(
167 scores.index
168 ), "config and score must have the same length"
169 if contexts is not None:
170 assert len(configs.index) == len(
171 contexts.index
172 ), "config and context must have the same length"
173 if metadata is not None:
174 assert len(configs.index) == len(
175 metadata.index
176 ), "config and metadata must have the same length"
177 self._configs = configs.reset_index(drop=True)
178 self._scores = scores.reset_index(drop=True)
179 self._contexts = None if contexts is None else contexts.reset_index(drop=True)
180 self._metadata = None if metadata is None else metadata.reset_index(drop=True)
182 @property
183 def configs(self) -> pd.DataFrame:
184 """Gets a copy of the configs of the Observations."""
185 return self._configs.copy()
187 @property
188 def scores(self) -> pd.DataFrame:
189 """Gets a copy of the scores of the Observations."""
190 return self._scores.copy()
192 @property
193 def contexts(self) -> pd.DataFrame | None:
194 """Gets a copy of the contexts of the Observations."""
195 return self._contexts.copy() if self._contexts is not None else None
197 @property
198 def metadata(self) -> pd.DataFrame | None:
199 """Gets a copy of the metadata of the Observations."""
200 return self._metadata.copy() if self._metadata is not None else None
202 def filter_by_index(self, index: pd.Index) -> "Observations":
203 """
204 Filters the observation by the given indices.
206 Parameters
207 ----------
208 index : pandas.Index
209 The indices to filter by.
211 Returns
212 -------
213 Observation
214 The filtered observation.
215 """
216 return Observations(
217 configs=self._configs.loc[index].copy(),
218 scores=self._scores.loc[index].copy(),
219 contexts=None if self._contexts is None else self._contexts.loc[index].copy(),
220 metadata=None if self._metadata is None else self._metadata.loc[index].copy(),
221 )
223 def append(self, observation: Observation) -> None:
224 """
225 Appends the given observation to this observation.
227 Parameters
228 ----------
229 observation : Observation
230 The observation to append.
231 """
232 config = observation.config.to_frame().T
233 score = observation.score.to_frame().T
234 context = None if observation.context is None else observation.context.to_frame().T
235 metadata = None if observation.metadata is None else observation.metadata.to_frame().T
236 if len(self._configs.index) == 0:
237 self._configs = config
238 self._scores = score
239 self._contexts = context
240 self._metadata = metadata
241 assert set(self.configs.index) == set(
242 self.scores.index
243 ), "config and score must have the same index"
244 return
246 self._configs = pd.concat([self._configs, config]).reset_index(drop=True)
247 self._scores = pd.concat([self._scores, score]).reset_index(drop=True)
248 assert set(self.configs.index) == set(
249 self.scores.index
250 ), "config and score must have the same index"
252 if self._contexts is not None:
253 assert context is not None, (
254 "context of appending observation must not be null "
255 "if context of prior observation is not null"
256 )
257 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True)
258 assert self._configs.index.equals(
259 self._contexts.index
260 ), "config and context must have the same index"
261 else:
262 assert context is None, (
263 "context of appending observation must be null "
264 "if context of prior observation is null"
265 )
266 if self._metadata is not None:
267 assert metadata is not None, (
268 "context of appending observation must not be null "
269 "if metadata of prior observation is not null"
270 )
271 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True)
272 assert self._configs.index.equals(
273 self._metadata.index
274 ), "config and metadata must have the same index"
275 else:
276 assert metadata is None, (
277 "context of appending observation must be null "
278 "if metadata of prior observation is null"
279 )
281 def __len__(self) -> int:
282 return len(self._configs.index)
284 def __iter__(self) -> Iterator["Observation"]:
285 for idx in self._configs.index:
286 yield Observation(
287 config=self._configs.loc[idx],
288 score=self._scores.loc[idx],
289 context=None if self._contexts is None else self._contexts.loc[idx],
290 metadata=None if self._metadata is None else self._metadata.loc[idx],
291 )
293 def __repr__(self) -> str:
294 return (
295 f"Observation(configs={self._configs}, score={self._scores}, "
296 "contexts={self._contexts}, metadata={self._metadata})"
297 )
299 def __eq__(self, other: Any) -> bool:
300 if not isinstance(other, Observations):
301 return False
303 if not self._configs.equals(other._configs):
304 return False
305 if not self._scores.equals(other._scores):
306 return False
307 if not compare_optional_dataframe(self._contexts, other._contexts):
308 return False
309 if not compare_optional_dataframe(self._metadata, other._metadata):
310 return False
312 return True
314 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq
315 def __ne__(self, other: Any) -> bool:
316 return not self.__eq__(other)
319class Suggestion:
320 """
321 A single suggestion for a configuration.
323 A Suggestion is an Observation that has not yet been scored. Evaluating the
324 Suggestion and calling `complete(scores)` can convert it to an Observation.
325 """
327 def __init__(
328 self,
329 *,
330 config: pd.Series,
331 context: pd.Series | None = None,
332 metadata: pd.Series | None = None,
333 ):
334 """
335 Creates a new Suggestion.
337 Parameters
338 ----------
339 config : pandas.Series
340 The configuration suggested.
341 context : pandas.Series | None
342 The context for this suggestion, by default None
343 metadata : pandas.Series | None
344 Any metadata provided by the underlying optimizer, by default None
345 """
346 self._config = config
347 self._context = context
348 self._metadata = metadata
350 @property
351 def config(self) -> pd.Series:
352 """Gets (a copy of) the config of the Suggestion."""
353 return self._config.copy()
355 @property
356 def context(self) -> pd.Series | None:
357 """Gets (a copy of) the context of the Suggestion."""
358 return self._context.copy() if self._context is not None else None
360 @property
361 def metadata(self) -> pd.Series | None:
362 """Gets (a copy of) the metadata of the Suggestion."""
363 return self._metadata.copy() if self._metadata is not None else None
365 def complete(self, score: pd.Series) -> Observation:
366 """
367 Completes the Suggestion by adding a score to turn it into an Observation.
369 Parameters
370 ----------
371 score : pandas.Series
372 The score metrics observed.
374 Returns
375 -------
376 Observation
377 The observation of the suggestion.
378 """
379 return Observation(
380 config=self.config,
381 score=score,
382 context=self.context,
383 metadata=self.metadata,
384 )
386 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration:
387 """
388 Convert a Configuration Space to a Configuration.
390 Parameters
391 ----------
392 space : ConfigSpace.ConfigurationSpace
393 The ConfigurationSpace to be converted.
395 Returns
396 -------
397 ConfigSpace.Configuration
398 The output Configuration.
399 """
400 return Configuration(space, values=self._config.dropna().to_dict())
402 def __repr__(self) -> str:
403 return (
404 f"Suggestion(config={self._config}, context={self._context}, "
405 "metadata={self._metadata})"
406 )
408 def __eq__(self, other: Any) -> bool:
409 if not isinstance(other, Suggestion):
410 return False
412 if not self._config.equals(other._config):
413 return False
414 if not compare_optional_series(self._context, other._context):
415 return False
416 if not compare_optional_series(self._metadata, other._metadata):
417 return False
419 return True
421 def __ne__(self, other: Any) -> bool:
422 return not self.__eq__(other)