Coverage for mlos_bench/mlos_bench/storage/base_experiment_data.py: 85%

47 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-07 01:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base interface for accessing the stored benchmark experiment data.""" 

6 

7from abc import ABCMeta, abstractmethod 

8from distutils.util import strtobool # pylint: disable=deprecated-module 

9from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple 

10 

11import pandas 

12 

13from mlos_bench.storage.base_tunable_config_data import TunableConfigData 

14 

15if TYPE_CHECKING: 

16 from mlos_bench.storage.base_trial_data import TrialData 

17 from mlos_bench.storage.base_tunable_config_trial_group_data import ( 

18 TunableConfigTrialGroupData, 

19 ) 

20 

21 

22class ExperimentData(metaclass=ABCMeta): 

23 """ 

24 Base interface for accessing the stored experiment benchmark data. 

25 

26 An experiment groups together a set of trials that are run with a given set of 

27 scripts and mlos_bench configuration files. 

28 """ 

29 

30 RESULT_COLUMN_PREFIX = "result." 

31 CONFIG_COLUMN_PREFIX = "config." 

32 

33 def __init__( # pylint: disable=too-many-arguments 

34 self, 

35 *, 

36 experiment_id: str, 

37 description: str, 

38 root_env_config: str, 

39 git_repo: str, 

40 git_commit: str, 

41 ): 

42 self._experiment_id = experiment_id 

43 self._description = description 

44 self._root_env_config = root_env_config 

45 self._git_repo = git_repo 

46 self._git_commit = git_commit 

47 

48 @property 

49 def experiment_id(self) -> str: 

50 """ID of the experiment.""" 

51 return self._experiment_id 

52 

53 @property 

54 def description(self) -> str: 

55 """Description of the experiment.""" 

56 return self._description 

57 

58 @property 

59 def root_env_config(self) -> Tuple[str, str, str]: 

60 """ 

61 Root environment configuration. 

62 

63 Returns 

64 ------- 

65 root_env_config : Tuple[str, str, str] 

66 A tuple of (root_env_config, git_repo, git_commit) for the root environment. 

67 """ 

68 return (self._root_env_config, self._git_repo, self._git_commit) 

69 

70 def __repr__(self) -> str: 

71 return f"Experiment :: {self._experiment_id}: '{self._description}'" 

72 

73 @property 

74 @abstractmethod 

75 def objectives(self) -> Dict[str, Literal["min", "max"]]: 

76 """ 

77 Retrieve the experiment's objectives data from the storage. 

78 

79 Returns 

80 ------- 

81 objectives : Dict[str, objective] 

82 A dictionary of the experiment's objective names (optimization_targets) 

83 and their directions (e.g., min or max). 

84 """ 

85 

86 @property 

87 @abstractmethod 

88 def trials(self) -> Dict[int, "TrialData"]: 

89 """ 

90 Retrieve the experiment's trials' data from the storage. 

91 

92 Returns 

93 ------- 

94 trials : Dict[int, TrialData] 

95 A dictionary of the trials' data, keyed by trial id. 

96 """ 

97 

98 @property 

99 @abstractmethod 

100 def tunable_configs(self) -> Dict[int, TunableConfigData]: 

101 """ 

102 Retrieve the experiment's (tunable) configs' data from the storage. 

103 

104 Returns 

105 ------- 

106 trials : Dict[int, TunableConfigData] 

107 A dictionary of the configs' data, keyed by (tunable) config id. 

108 """ 

109 

110 @property 

111 @abstractmethod 

112 def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]: 

113 """ 

114 Retrieve the Experiment's (Tunable) Config Trial Group data from the storage. 

115 

116 Returns 

117 ------- 

118 trials : Dict[int, TunableConfigTrialGroupData] 

119 A dictionary of the trials' data, keyed by (tunable) by config id. 

120 """ 

121 

122 @property 

123 def default_tunable_config_id(self) -> Optional[int]: 

124 """ 

125 Retrieves the (tunable) config id for the default tunable values for this 

126 experiment. 

127 

128 Note: this is by *default* the first trial executed for this experiment. 

129 However, it is currently possible that the user changed the tunables config 

130 in between resumptions of an experiment. 

131 

132 Returns 

133 ------- 

134 int 

135 """ 

136 # Note: this implementation is quite inefficient and may be better 

137 # reimplemented by subclasses. 

138 

139 # Check to see if we included it in trial metadata. 

140 trials_items = sorted(self.trials.items()) 

141 if not trials_items: 

142 return None 

143 for _trial_id, trial in trials_items: 

144 # Take the first config id marked as "defaults" when it was instantiated. 

145 if strtobool(str(trial.metadata_dict.get("is_defaults", False))): 

146 return trial.tunable_config_id 

147 # Fallback (min trial_id) 

148 return trials_items[0][1].tunable_config_id 

149 

150 @property 

151 @abstractmethod 

152 def results_df(self) -> pandas.DataFrame: 

153 """ 

154 Retrieve all experimental results as a single DataFrame. 

155 

156 Returns 

157 ------- 

158 results : pandas.DataFrame 

159 A DataFrame with configurations and results from all trials of the experiment. 

160 Has columns 

161 [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status] 

162 followed by tunable config parameters (prefixed with "config.") and 

163 trial results (prefixed with "result."). The latter can be NULLs if the 

164 trial was not successful. 

165 """