Coverage for mlos_bench/mlos_bench/storage/base_experiment_data.py: 85%
47 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-07 01:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base interface for accessing the stored benchmark experiment data."""
7from abc import ABCMeta, abstractmethod
8from distutils.util import strtobool # pylint: disable=deprecated-module
9from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple
11import pandas
13from mlos_bench.storage.base_tunable_config_data import TunableConfigData
15if TYPE_CHECKING:
16 from mlos_bench.storage.base_trial_data import TrialData
17 from mlos_bench.storage.base_tunable_config_trial_group_data import (
18 TunableConfigTrialGroupData,
19 )
22class ExperimentData(metaclass=ABCMeta):
23 """
24 Base interface for accessing the stored experiment benchmark data.
26 An experiment groups together a set of trials that are run with a given set of
27 scripts and mlos_bench configuration files.
28 """
30 RESULT_COLUMN_PREFIX = "result."
31 CONFIG_COLUMN_PREFIX = "config."
33 def __init__( # pylint: disable=too-many-arguments
34 self,
35 *,
36 experiment_id: str,
37 description: str,
38 root_env_config: str,
39 git_repo: str,
40 git_commit: str,
41 ):
42 self._experiment_id = experiment_id
43 self._description = description
44 self._root_env_config = root_env_config
45 self._git_repo = git_repo
46 self._git_commit = git_commit
48 @property
49 def experiment_id(self) -> str:
50 """ID of the experiment."""
51 return self._experiment_id
53 @property
54 def description(self) -> str:
55 """Description of the experiment."""
56 return self._description
58 @property
59 def root_env_config(self) -> Tuple[str, str, str]:
60 """
61 Root environment configuration.
63 Returns
64 -------
65 root_env_config : Tuple[str, str, str]
66 A tuple of (root_env_config, git_repo, git_commit) for the root environment.
67 """
68 return (self._root_env_config, self._git_repo, self._git_commit)
70 def __repr__(self) -> str:
71 return f"Experiment :: {self._experiment_id}: '{self._description}'"
73 @property
74 @abstractmethod
75 def objectives(self) -> Dict[str, Literal["min", "max"]]:
76 """
77 Retrieve the experiment's objectives data from the storage.
79 Returns
80 -------
81 objectives : Dict[str, objective]
82 A dictionary of the experiment's objective names (optimization_targets)
83 and their directions (e.g., min or max).
84 """
86 @property
87 @abstractmethod
88 def trials(self) -> Dict[int, "TrialData"]:
89 """
90 Retrieve the experiment's trials' data from the storage.
92 Returns
93 -------
94 trials : Dict[int, TrialData]
95 A dictionary of the trials' data, keyed by trial id.
96 """
98 @property
99 @abstractmethod
100 def tunable_configs(self) -> Dict[int, TunableConfigData]:
101 """
102 Retrieve the experiment's (tunable) configs' data from the storage.
104 Returns
105 -------
106 trials : Dict[int, TunableConfigData]
107 A dictionary of the configs' data, keyed by (tunable) config id.
108 """
110 @property
111 @abstractmethod
112 def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]:
113 """
114 Retrieve the Experiment's (Tunable) Config Trial Group data from the storage.
116 Returns
117 -------
118 trials : Dict[int, TunableConfigTrialGroupData]
119 A dictionary of the trials' data, keyed by (tunable) by config id.
120 """
122 @property
123 def default_tunable_config_id(self) -> Optional[int]:
124 """
125 Retrieves the (tunable) config id for the default tunable values for this
126 experiment.
128 Note: this is by *default* the first trial executed for this experiment.
129 However, it is currently possible that the user changed the tunables config
130 in between resumptions of an experiment.
132 Returns
133 -------
134 int
135 """
136 # Note: this implementation is quite inefficient and may be better
137 # reimplemented by subclasses.
139 # Check to see if we included it in trial metadata.
140 trials_items = sorted(self.trials.items())
141 if not trials_items:
142 return None
143 for _trial_id, trial in trials_items:
144 # Take the first config id marked as "defaults" when it was instantiated.
145 if strtobool(str(trial.metadata_dict.get("is_defaults", False))):
146 return trial.tunable_config_id
147 # Fallback (min trial_id)
148 return trials_items[0][1].tunable_config_id
150 @property
151 @abstractmethod
152 def results_df(self) -> pandas.DataFrame:
153 """
154 Retrieve all experimental results as a single DataFrame.
156 Returns
157 -------
158 results : pandas.DataFrame
159 A DataFrame with configurations and results from all trials of the experiment.
160 Has columns
161 [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status]
162 followed by tunable config parameters (prefixed with "config.") and
163 trial results (prefixed with "result."). The latter can be NULLs if the
164 trial was not successful.
165 """